From 2e64b43d7789bed26e97cc06498bfd701931777c Mon Sep 17 00:00:00 2001
From: joyguoguo <160556976+joyguoguo@users.noreply.github.com>
Date: Sun, 20 Jul 2025 00:48:48 -0700
Subject: [PATCH 001/134] Upload the python project Fuzz test script

valid_projects.txt: Python project list
script_fuzz_py_final.sh: Single project test script
script_fuzz_py_batch_final.sh: Batch projects test script
---
 script_fuzz_py_batch_final.sh | 201 +++++++++++++++++++++++++++++
 script_fuzz_py_final.sh       | 126 ++++++++++++++++++
 valid_projects.txt            | 234 ++++++++++++++++++++++++++++++++++
 3 files changed, 561 insertions(+)
 create mode 100644 script_fuzz_py_batch_final.sh
 create mode 100644 script_fuzz_py_final.sh
 create mode 100644 valid_projects.txt

diff --git a/script_fuzz_py_batch_final.sh b/script_fuzz_py_batch_final.sh
new file mode 100644
index 0000000..1d7d9af
--- /dev/null
+++ b/script_fuzz_py_batch_final.sh
@@ -0,0 +1,201 @@
+#!/bin/bash
+# script_fuzz_py_batch_final.sh
+# 批量执行OSS-Fuzz本地测试全流程：从文件读取项目列表，依次为每个项目自动发现目标 -> 构建镜像 -> 编译fuzzer -> 运行测试 -> 生成覆盖率报告(暂无))
+# 用法：./script_fuzz_py_batch_final.sh [项目列表文件] [sanitizer类型]
+# 示例: ./script_fuzz_py_batch_final.sh valid_projects.txt address
+
+# --- 全局配置 ---
+PROJECT_LIST_FILE="${1:-valid_projects.txt}" # 默认项目列表文件
+SANITIZER="${2:-address}"                   # 默认检测器类型
+OSS_FUZZ_DIR="$HOME/oss-fuzz-py/oss-fuzz"      # OSS-Fuzz目录
+LOG_DIR="$OSS_FUZZ_DIR/script_lz4_batch_logs"        # 所有项目的总日志目录
+FAILED_PROJECTS=()                          # 存储失败项目列表
+
+# --- 环境检查 ---
+check_environment() {
+  if [ ! -d "$OSS_FUZZ_DIR" ]; then
+    echo "❌ 错误: OSS-Fuzz 目录 '$OSS_FUZZ_DIR' 不存在！"
+    return 1
+  fi
+  if [ ! -f "$PROJECT_LIST_FILE" ]; then
+    echo "❌ 错误: 项目列表文件 '$PROJECT_LIST_FILE' 不存在！"
+    return 1
+  fi
+  mkdir -p "$LOG_DIR"
+  chmod 777 "$LOG_DIR" 2>/dev/null || true
+  cd "$OSS_FUZZ_DIR" || return 1
+  echo "✅ 环境检查通过。OSS-Fuzz 目录: $OSS_FUZZ_DIR"
+}
+
+# --- 带日志记录的命令执行 ---
+run_command() {
+  local cmd="$1"
+  local log_msg="$2"
+  local log_file="$3" # 日志文件作为参数传入
+  local allowed_exit="${4:-}"
+
+  echo "▶️ $log_msg..." | tee -a "$log_file"
+  set +e
+  { yes | eval "$cmd" ; } 2>&1 | tee -a "$log_file"
+  local exit_code=${PIPESTATUS[0]}
+  set -e
+
+  if [[ -n "$allowed_exit" && ",$allowed_exit," =~ ",$exit_code," ]]; then
+    echo "ℹ️ 命令以预期状态退出: $exit_code" | tee -a "$log_file"
+    return 0
+  elif [ $exit_code -ne 0 ]; then
+    echo "❌ 命令执行失败: $cmd (退出码: $exit_code)" | tee -a "$log_file"
+    return 1  # 返回错误而不是退出脚本
+  fi
+}
+
+# --- 自动发现 Fuzz 目标 ---
+discover_fuzz_targets() {
+    local project_name="$1"
+    local project_dir="$OSS_FUZZ_DIR/build/out/$project_name"
+    local project_src="$OSS_FUZZ_DIR/projects/$project_name"
+    local targets=()
+
+    if [ -d "$project_dir" ]; then
+        while IFS= read -r -d $'\0' file; do
+            filename=$(basename "$file")
+            if [[ -x "$file" && "$filename" =~ ^fuzz_ && ! "$file" =~ \..*$ ]]; then
+                targets+=("$filename")
+            fi
+        done < <(find "$project_dir" -maxdepth 1 -type f -print0)
+    fi
+
+    if [ ${#targets[@]} -eq 0 ] && [ -d "$project_src" ]; then
+        while IFS= read -r -d $'\0' file; do
+            if grep -q "atheris.Setup" "$file"; then
+                targets+=("$(basename "${file%.*}")")
+            fi
+        done < <(find "$project_src" -name 'fuzz_*.py' -print0)
+    fi
+
+    echo "${targets[@]}"
+}
+
+# --- 单个项目的完整处理流程 ---
+process_project() {
+  local project_name="$1"
+  local log_file="$LOG_DIR/oss_fuzz_${project_name}_$(date +%Y%m%d%H%M%S).log"
+  local project_failed=0
+
+  echo "============================================================" | tee -a "$log_file"
+  echo "🚀 开始处理项目: $project_name" | tee -a "$log_file"
+  echo "📝 日志文件: $log_file" | tee -a "$log_file"
+  echo "============================================================" | tee -a "$log_file"
+
+  #1. 构建Docker镜像
+  if ! run_command \
+    "python3 infra/helper.py build_image $project_name" \
+    "步骤1/5: 构建 $project_name 的Docker镜像" \
+    "$log_file"; then
+    echo "❌ 项目 $project_name 构建镜像失败，跳过后续步骤" | tee -a "$log_file"
+    project_failed=1
+  fi
+
+  # 2. 编译带检测器的fuzzer (仅在构建镜像成功后执行)
+  if [ $project_failed -eq 0 ]; then
+    if ! run_command \
+      "python3 infra/helper.py build_fuzzers --sanitizer $SANITIZER $project_name" \
+      "步骤2/5: 编译 $project_name 的fuzzer (sanitizer=$SANITIZER)" \
+      "$log_file"; then
+      echo "❌ 项目 $project_name 编译fuzzer失败，跳过后续步骤" | tee -a "$log_file"
+      project_failed=1
+    fi
+  fi
+
+  # 3. 自动发现目标 (仅在编译成功后执行)
+  if [ $project_failed -eq 0 ]; then
+    echo "🔍 正在为 $project_name 自动发现fuzz目标..." | tee -a "$log_file"
+    FUZZ_TARGETS=($(discover_fuzz_targets "$project_name"))
+
+    if [ ${#FUZZ_TARGETS[@]} -eq 0 ]; then
+      echo "⚠️  警告: 项目 $project_name 未找到任何fuzz目标！跳过运行步骤。" | tee -a "$log_file"
+    else
+      echo "✅ 发现目标: ${FUZZ_TARGETS[*]}" | tee -a "$log_file"
+      
+      # 4. 遍历运行所有目标 默认测试60秒
+      for target in "${FUZZ_TARGETS[@]}"; do
+        if ! run_command \
+          "python3 infra/helper.py run_fuzzer $project_name $target -- -max_total_time=60" \
+          "步骤3/5: 运行目标 [$target] (60秒超时)" \
+          "$log_file" \
+          "124,1"; then  # 允许超时(124)和发现崩溃(1)
+          echo "⚠️  警告: 目标 [$target] 运行失败，继续下一个目标" | tee -a "$log_file"
+        fi
+      done
+    fi
+  fi
+
+  # 5. 生成覆盖率报告 (已注释掉，与原脚本保持一致)
+  # [保留原有注释的覆盖率代码]
+
+  if [ $project_failed -eq 0 ]; then
+    echo "✅ 项目 $project_name 处理完成！" | tee -a "$log_file"
+   
+  else
+    echo "❌ 项目 $project_name 处理失败！" | tee -a "$log_file"
+    
+  fi
+  
+  echo "------------------------------------------------------------"
+  return $project_failed
+}
+
+# --- 主流程 ---
+main() {
+if ! check_environment; then
+    echo "❌ 环境检查失败，脚本终止"
+    exit 1
+  fi
+
+  # 新增：读取项目列表到数组（过滤空行和注释行）
+  local PROJECTS=()
+  while IFS= read -r line || [[ -n "$line" ]]; do
+    if [[ -z "$line" || "$line" =~ ^# ]]; then
+      continue
+    fi
+    PROJECTS+=("$line")
+  done < "$PROJECT_LIST_FILE"
+
+  # 项目总数从数组长度获取（原逻辑从文件行数获取）
+  local total_projects=${#PROJECTS[@]}
+  local current_project_num=0
+  local success_count=0
+  local fail_count=0
+
+  # 新增：遍历数组处理项目（替代原while读取文件的循环）
+  for project_name in "${PROJECTS[@]}"; do
+
+    current_project_num=$((current_project_num + 1))
+    echo ">>> [ $current_project_num / $total_projects ] 开始处理项目: $project_name <<<"
+    
+    if process_project "$project_name"; then
+      echo "✅ [$current_project_num/$total_projects] 项目 $project_name 成功完成"
+      ( success_count=$[ $success_count + 1 ])
+    else
+      echo "❌ [$current_project_num/$total_projects] 项目 $project_name 处理失败"
+      FAILED_PROJECTS+=("$project_name")
+      ( success_count=$[ $success_count + 1 ])
+    fi
+
+  done
+
+  echo "============================================================"
+  echo "🎉 批量处理完成！"
+  echo "📊 总计: $total_projects 个项目"
+  echo "✅ 成功: $success_count"
+  echo "❌ 失败: $fail_count"
+  
+  if [ ${#FAILED_PROJECTS[@]} -gt 0 ]; then
+    echo "📛 失败项目列表:"
+    printf '  • %s\n' "${FAILED_PROJECTS[@]}"
+    echo "💡 提示: 可以重新运行失败项目，检查日志获取详细信息"
+    echo "      日志目录: $LOG_DIR"
+  fi
+}
+
+main "$@"
\ No newline at end of file
diff --git a/script_fuzz_py_final.sh b/script_fuzz_py_final.sh
new file mode 100644
index 0000000..c2bf00a
--- /dev/null
+++ b/script_fuzz_py_final.sh
@@ -0,0 +1,126 @@
+#!/bin/bash
+# script_fuzz_py_final.sh
+# 执行OSS-Fuzz本地测试全流程：自动发现目标 -> 构建镜像 -> 编译fuzzer -> 运行测试 -> 生成覆盖率报告
+# 用法：script_fuzz_py_final.sh <项目名> [sanitizer类型]
+
+set -e  # 遇到错误立即退出
+
+PROJECT_NAME="${1:-abseil-py}"      # 默认项目名
+SANITIZER="${2:-address}"           # 默认检测器类型
+OSS_FUZZ_DIR="$HOME/oss-fuzz-py/oss-fuzz"        # OSS-Fuzz目录
+LOG_DIR="$OSS_FUZZ_DIR/script_lz4_logs"
+LOG_FILE="$LOG_DIR/oss_fuzz_${PROJECT_NAME}_$(date +%Y%m%d%H%M%S).log"
+# 验证目录有效性
+check_environment() {
+  if [ ! -d "$OSS_FUZZ_DIR" ]; then
+    echo "❌ 错误: $OSS_FUZZ_DIR 目录不存在！"
+    exit 1
+  fi
+  mkdir -p "$LOG_DIR"  # 关键修复：创建日志目录
+  chmod 777 "$LOG_DIR" 2>/dev/null || true  # 宽松权限设置
+  cd "$OSS_FUZZ_DIR" || exit 1
+}
+
+# 带日志记录的命令执行（支持允许的退出码）
+run_command() {
+  local cmd="$1"
+  local log_msg="$2"
+  local allowed_exit="${3:-}"  # 可选：允许的退出码（逗号分隔）
+  
+  echo "▶️ $log_msg..." | tee -a "$LOG_FILE"
+  set +e  # 临时禁用错误退出
+  eval "$cmd" 2>&1 | tee -a "$LOG_FILE"
+  local exit_code=${PIPESTATUS[0]}
+  set -e  # 重新启用错误退出
+  
+  # 检查退出码是否被允许
+  if [[ -n "$allowed_exit" && ",$allowed_exit," =~ ",$exit_code," ]]; then
+    echo "ℹ️ 命令以预期状态退出: $exit_code" | tee -a "$LOG_FILE"
+    return 0
+  elif [ $exit_code -ne 0 ]; then
+    echo "❌ 命令执行失败: $cmd (退出码: $exit_code)" | tee -a "$LOG_FILE"
+    exit 1
+  fi
+}
+
+# 自动发现fuzz目标
+discover_fuzz_targets() {
+   local project_dir="$OSS_FUZZ_DIR/build/out/$PROJECT_NAME"
+    local project_src="$OSS_FUZZ_DIR/projects/$PROJECT_NAME"
+    local targets=()
+
+    # 编译目录扫描：仅匹配"fuzz_"开头的可执行文件
+    if [ -d "$project_dir" ]; then
+        while IFS= read -r -d $'\0' file; do
+            filename=$(basename "$file")
+            if [[ -x "$file" && "$filename" =~ ^fuzz_ && ! "$file" =~ \..*$ ]]; then
+                targets+=("$filename")
+            fi
+        done < <(find "$project_dir" -maxdepth 1 -type f -print0)
+    fi
+
+    # 源码目录扫描：仅匹配"fuzz_*.py"且含Atheris标识
+    if [ ${#targets[@]} -eq 0 ] && [ -d "$project_src" ]; then
+        while IFS= read -r -d $'\0' file; do
+            if grep -q "atheris.Setup" "$file"; then
+                targets+=("$(basename "${file%.*}")")
+            fi
+        done < <(find "$project_src" -name 'fuzz_*.py' -print0)
+    fi
+
+    echo "${targets[@]}"
+}
+
+# 主流程
+main() {
+  check_environment
+  echo "=============================="
+  echo "🚀 开始OSS-Fuzz测试 - 项目: $PROJECT_NAME"
+  echo "📝 日志文件: $LOG_FILE"
+  echo "=============================="
+
+  #1. 构建Docker镜像
+  run_command \
+    "python3 infra/helper.py build_image $PROJECT_NAME" \
+    "步骤1/5: 构建Docker镜像"
+
+  # 2. 编译带检测器的fuzzer
+  run_command \
+    "python3 infra/helper.py build_fuzzers --sanitizer $SANITIZER $PROJECT_NAME" \
+    "步骤2/5: 编译fuzzer (sanitizer=$SANITIZER)"
+
+  # 3. 自动发现目标
+  echo "🔍 自动发现fuzz目标..."
+  FUZZ_TARGETS=($(discover_fuzz_targets))
+  
+  if [ ${#FUZZ_TARGETS[@]} -eq 0 ]; then
+    echo "❌ 未找到任何fuzz目标！检查项目配置" | tee -a "$LOG_FILE"
+    exit 1
+  fi
+
+  echo "✅ 发现目标: ${FUZZ_TARGETS[*]}" | tee -a "$LOG_FILE"
+
+  # 4. 遍历运行所有目标
+  for target in "${FUZZ_TARGETS[@]}"; do
+    run_command \
+      "python3 infra/helper.py run_fuzzer $PROJECT_NAME $target -- -max_total_time=180" \
+      "步骤3/5: 运行目标 [$target] (120秒超时)" \
+      "124,1"  # 允许超时(124)和发现崩溃(1)
+done
+
+  # 5. 生成覆盖率报告
+  # run_command \
+  #   "python3 infra/helper.py build_fuzzers --sanitizer coverage $PROJECT_NAME" \
+  #   "步骤4/5: 编译覆盖率版本"
+  
+  # run_command \
+  #   "python3 infra/helper.py coverage --no-serve $PROJECT_NAME" \
+  #   "步骤5/5: 生成覆盖率报告"
+
+  echo "✅ 所有步骤完成！结果查看:"
+  echo "🔍 测试日志: $LOG_FILE"
+  echo "📊 覆盖率报告(暂无): $OSS_FUZZ_DIR/build/out/$PROJECT_NAME/report/coverage/index.html"
+  echo "💥 崩溃报告: $OSS_FUZZ_DIR/build/out/$PROJECT_NAME/crashes/"
+}
+
+main "$@"
\ No newline at end of file
diff --git a/valid_projects.txt b/valid_projects.txt
new file mode 100644
index 0000000..94fdcd5
--- /dev/null
+++ b/valid_projects.txt
@@ -0,0 +1,234 @@
+abseil-py
+adal
+aiohttp
+aniso8601
+ansible
+argcomplete
+arrow-py
+asn1crypto
+asteval
+astroid
+asttokens
+attrs
+autoflake
+autopep8
+azure-sdk-for-python
+babel
+black
+botocore
+bottleneck
+bz2file
+cachetools
+cffi
+chardet
+charset_normalizer
+click
+cloud-custodian
+configparser
+connexion
+coveragepy
+croniter
+cryptography
+cssselect
+dask
+decorator
+defusedxml
+digest
+dill
+distlib
+dnspython
+docutils
+ecdsa-python
+et-xmlfile
+face
+filelock
+filesystem_spec
+flask
+flask-jwt-extended
+flask-restx
+flask-wtf
+fonttools
+ftfy
+g-api-auth-httplib2
+g-api-auth-library-python
+g-api-pubsub
+g-api-py-api-common-protos
+g-api-py-oauthlib
+g-api-python-bigquery-storage
+g-api-python-client
+g-api-python-cloud-core
+g-api-python-firestore
+g-api-python-tasks
+g-api-resource-manager
+g-api-resumable-media-python
+g-api-secret-manager
+g-apis-py-api-core
+gast
+gc-iam
+gcloud-error-py
+g-cloud-logging-py
+gcp-python-cloud-storage
+genshi
+gitdb
+glom
+gprof2dot
+g-py-bigquery
+g-py-crc32c
+grpc-py
+gunicorn
+h11
+h5py
+hiredis-py
+html2text
+html5lib-python
+httpcore
+httpretty
+httpx
+idna
+ijson
+importlib_metadata
+iniconfig
+ipaddress
+ipykernel
+ipython
+isodate
+itsdangerous
+jedi
+jinja2
+jmespathpy
+joblib
+jsmin
+jupyter-nbconvert
+jupyter_server
+kafka
+keras
+kiwisolver
+lark-parser
+libcst
+looker-sdk
+lxml
+mako
+markupsafe
+matplotlib
+mccabe
+mdit-py-plugins
+mdurl
+more-itertools
+mrab-regex
+msal
+msgpack-python
+multidict
+mutagen
+nbclassic
+nbformat
+netaddr-py
+networkx
+ntlm2
+ntlm-auth
+numexpr
+numpy
+oauth2
+oauthlib
+olefile
+openapi-schema-validator
+opencensus-python
+openpyxl
+opt_einsum
+oracle-py-cx
+orjson
+oscrypto
+packaging
+pandas
+paramiko
+parse
+parsimonious
+pasta
+pathlib2
+pdoc
+pem
+pendulum
+pip
+ply
+protobuf-python
+proto-plus-python
+psqlparse
+psutil
+psycopg2
+pyasn1
+pyasn1-modules
+pycparser
+pycrypto
+pydantic
+pydateutil
+pygments
+pyjson5
+pyjwt
+pymysql
+pynacl
+pyodbc
+pyparsing
+pyrsistent
+py-serde
+pytables
+pytest-py
+python3-openid
+python-ecdsa
+python-email-validator
+python-fastjsonschema
+python-future
+python-graphviz
+python-hyperlink
+python-jose
+python-lz4
+python-markdown
+python-markdownify
+python-nameparser
+python-nvd3
+python-pathspec
+python-prompt-toolkit
+python-pypdf
+python-rison
+python-rsa
+python-tabulate
+pytz
+pyxdg
+pyyaml
+pyzmq
+redis-py
+requests
+retry
+rfc3967
+rich
+sacremoses
+scikit-learn
+scipy
+setuptools
+sigstore-python
+simplejson
+six
+smart_open
+soupsieve
+sqlalchemy_jsonfield
+sqlalchemy-utils
+sqlparse
+stack_data
+tensorflow-addons
+tinycss2
+toml
+tomlkit
+toolbelt
+toolz
+tqdm
+typing_extensions
+underscore
+uritemplate
+urlextract
+urllib3
+validators
+w3lib
+websocket-client
+wheel
+wtforms
+xlrd
+yarl
+zipp

From 16338dcbe8c04209badcf123d23eef86a64517fe Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Sun, 20 Jul 2025 23:03:36 +0000
Subject: [PATCH 002/134] feat: Add OSS-Fuzz submodule tracking main branch

---
 .gitmodules   | 4 ++++
 fuzz/oss-fuzz | 1 +
 2 files changed, 5 insertions(+)
 create mode 160000 fuzz/oss-fuzz

diff --git a/.gitmodules b/.gitmodules
index 5b761d3..1683a3c 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,7 @@
 [submodule "UniTSyn"]
 	path = UniTSyn
 	url = https://github.com/SecurityLab-UCD/UniTSyn.git
+[submodule "fuzz/oss-fuzz"]
+	path = fuzz/oss-fuzz
+	url = https://github.com/google/oss-fuzz.git
+	branch = main
diff --git a/fuzz/oss-fuzz b/fuzz/oss-fuzz
new file mode 160000
index 0000000..1351fe0
--- /dev/null
+++ b/fuzz/oss-fuzz
@@ -0,0 +1 @@
+Subproject commit 1351fe0fbefb2965b64d51411562fa6df86bcc86

From ca103e9263e14c51cff07edece6da79b638a683e Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Mon, 21 Jul 2025 06:46:24 +0000
Subject: [PATCH 003/134] chore: Switch oss-fuzz submodule to personal fork

---
 fuzz/oss-fuzz | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fuzz/oss-fuzz b/fuzz/oss-fuzz
index 1351fe0..c3d3fb7 160000
--- a/fuzz/oss-fuzz
+++ b/fuzz/oss-fuzz
@@ -1 +1 @@
-Subproject commit 1351fe0fbefb2965b64d51411562fa6df86bcc86
+Subproject commit c3d3fb70f9ece4d31e623ce0f29bc0dfd34e0af6

From f39e72744df2d08f3c3349bb8ee80520dd87c77d Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Mon, 21 Jul 2025 06:47:24 +0000
Subject: [PATCH 004/134]  Switch oss-fuzz submodule to personal fork

---
 .gitmodules | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitmodules b/.gitmodules
index 1683a3c..82926c7 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -3,5 +3,5 @@
 	url = https://github.com/SecurityLab-UCD/UniTSyn.git
 [submodule "fuzz/oss-fuzz"]
 	path = fuzz/oss-fuzz
-	url = https://github.com/google/oss-fuzz.git
+	url = https://github.com/joyguoguo/oss-fuzz.git
 	branch = main

From d691eeabc9996e62824814d22caee45ef8eed2ff Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Mon, 21 Jul 2025 23:48:08 +0000
Subject: [PATCH 005/134] move the valid_project file

---
 data/valid_projects.txt | 234 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 234 insertions(+)
 create mode 100644 data/valid_projects.txt

diff --git a/data/valid_projects.txt b/data/valid_projects.txt
new file mode 100644
index 0000000..94fdcd5
--- /dev/null
+++ b/data/valid_projects.txt
@@ -0,0 +1,234 @@
+abseil-py
+adal
+aiohttp
+aniso8601
+ansible
+argcomplete
+arrow-py
+asn1crypto
+asteval
+astroid
+asttokens
+attrs
+autoflake
+autopep8
+azure-sdk-for-python
+babel
+black
+botocore
+bottleneck
+bz2file
+cachetools
+cffi
+chardet
+charset_normalizer
+click
+cloud-custodian
+configparser
+connexion
+coveragepy
+croniter
+cryptography
+cssselect
+dask
+decorator
+defusedxml
+digest
+dill
+distlib
+dnspython
+docutils
+ecdsa-python
+et-xmlfile
+face
+filelock
+filesystem_spec
+flask
+flask-jwt-extended
+flask-restx
+flask-wtf
+fonttools
+ftfy
+g-api-auth-httplib2
+g-api-auth-library-python
+g-api-pubsub
+g-api-py-api-common-protos
+g-api-py-oauthlib
+g-api-python-bigquery-storage
+g-api-python-client
+g-api-python-cloud-core
+g-api-python-firestore
+g-api-python-tasks
+g-api-resource-manager
+g-api-resumable-media-python
+g-api-secret-manager
+g-apis-py-api-core
+gast
+gc-iam
+gcloud-error-py
+g-cloud-logging-py
+gcp-python-cloud-storage
+genshi
+gitdb
+glom
+gprof2dot
+g-py-bigquery
+g-py-crc32c
+grpc-py
+gunicorn
+h11
+h5py
+hiredis-py
+html2text
+html5lib-python
+httpcore
+httpretty
+httpx
+idna
+ijson
+importlib_metadata
+iniconfig
+ipaddress
+ipykernel
+ipython
+isodate
+itsdangerous
+jedi
+jinja2
+jmespathpy
+joblib
+jsmin
+jupyter-nbconvert
+jupyter_server
+kafka
+keras
+kiwisolver
+lark-parser
+libcst
+looker-sdk
+lxml
+mako
+markupsafe
+matplotlib
+mccabe
+mdit-py-plugins
+mdurl
+more-itertools
+mrab-regex
+msal
+msgpack-python
+multidict
+mutagen
+nbclassic
+nbformat
+netaddr-py
+networkx
+ntlm2
+ntlm-auth
+numexpr
+numpy
+oauth2
+oauthlib
+olefile
+openapi-schema-validator
+opencensus-python
+openpyxl
+opt_einsum
+oracle-py-cx
+orjson
+oscrypto
+packaging
+pandas
+paramiko
+parse
+parsimonious
+pasta
+pathlib2
+pdoc
+pem
+pendulum
+pip
+ply
+protobuf-python
+proto-plus-python
+psqlparse
+psutil
+psycopg2
+pyasn1
+pyasn1-modules
+pycparser
+pycrypto
+pydantic
+pydateutil
+pygments
+pyjson5
+pyjwt
+pymysql
+pynacl
+pyodbc
+pyparsing
+pyrsistent
+py-serde
+pytables
+pytest-py
+python3-openid
+python-ecdsa
+python-email-validator
+python-fastjsonschema
+python-future
+python-graphviz
+python-hyperlink
+python-jose
+python-lz4
+python-markdown
+python-markdownify
+python-nameparser
+python-nvd3
+python-pathspec
+python-prompt-toolkit
+python-pypdf
+python-rison
+python-rsa
+python-tabulate
+pytz
+pyxdg
+pyyaml
+pyzmq
+redis-py
+requests
+retry
+rfc3967
+rich
+sacremoses
+scikit-learn
+scipy
+setuptools
+sigstore-python
+simplejson
+six
+smart_open
+soupsieve
+sqlalchemy_jsonfield
+sqlalchemy-utils
+sqlparse
+stack_data
+tensorflow-addons
+tinycss2
+toml
+tomlkit
+toolbelt
+toolz
+tqdm
+typing_extensions
+underscore
+uritemplate
+urlextract
+urllib3
+validators
+w3lib
+websocket-client
+wheel
+wtforms
+xlrd
+yarl
+zipp

From 25b01915000c45531be9e07a71cfe6d45963441f Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Mon, 21 Jul 2025 23:48:36 +0000
Subject: [PATCH 006/134] move the .py file

---
 fuzz/fuzz_runner_pool.py | 270 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 270 insertions(+)
 create mode 100644 fuzz/fuzz_runner_pool.py

diff --git a/fuzz/fuzz_runner_pool.py b/fuzz/fuzz_runner_pool.py
new file mode 100644
index 0000000..abd4b30
--- /dev/null
+++ b/fuzz/fuzz_runner_pool.py
@@ -0,0 +1,270 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+fuzz_runner_pool.py
+
+并行批量执行 OSS-Fuzz 本地测试全流程。使用 multiprocessing.Pool 将项目
+分发到多个 CPU核心上同时处理。
+
+用法: python3 fuzz_runner_pool.py [项目列表文件] [--sanitizer 类型] [--workers N]
+示例: python3 fuzz_runner_pool.py /home/jiayiguo/FuzzAug/data/valid_projects.txt --workers 4
+"""
+
+import os
+import sys
+import subprocess
+import argparse
+from datetime import datetime
+from pathlib import Path
+from typing import List, Optional, Tuple
+from multiprocessing import Pool, cpu_count
+
+# --- 全局配置 (可通过命令行参数覆盖) ---
+HOME_DIR = Path.home()
+OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz"/"oss-fuzz"
+LOG_DIR = OSS_FUZZ_DIR / "script_pool_batch_logs"
+
+def setup_logging(project_name: str) -> Path:
+    """为单个项目创建带时间戳的日志文件."""
+    LOG_DIR.mkdir(parents=True, exist_ok=True)
+    try:
+        LOG_DIR.chmod(0o777)
+    except PermissionError:
+        # 在并行环境中，这里可能会有多个进程同时尝试，打印一次警告即可
+        pass
+        
+    timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
+    log_file_path = LOG_DIR / f"oss_fuzz_{project_name}_{timestamp}.log"
+    return log_file_path
+
+def log_and_print(message: str, log_file: Path, to_stdout: bool = True):
+    """将消息写入日志文件，并根据需要打印到控制台。"""
+    if to_stdout:
+        # 添加进程ID，以便区分并行输出
+        print(f"[PID:{os.getpid()}] {message}")
+    with open(log_file, "a", encoding="utf-8") as f:
+        f.write(message + "\n")
+
+def run_command(
+    cmd: str, 
+    log_msg: str, 
+    log_file: Path, 
+    allowed_exit_codes: Optional[List[int]] = None,
+    auto_confirm: bool = True  # 新增自动确认参数
+) -> bool:
+    """执行一个 shell 命令，并将输出实时流式传输到日志文件。"""
+    if allowed_exit_codes is None:
+        allowed_exit_codes = []
+
+    log_and_print(f"▶️  {log_msg}...", log_file, to_stdout=False)
+    log_and_print(f"   $ {cmd}", log_file, to_stdout=False)
+
+    try:
+        # 添加自动确认机制
+        if auto_confirm:
+            cmd = f"yes | {cmd}"
+
+        process = subprocess.Popen(
+            cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+            text=True, encoding='utf-8', errors='replace', bufsize=1
+        )
+        
+        with open(log_file, "a", encoding="utf-8") as f:
+            for line in iter(process.stdout.readline, ''):
+                f.write(line) # 只写入日志，避免控制台输出混乱
+
+        process.wait()
+        exit_code = process.returncode
+
+        if exit_code == 0:
+            log_and_print(f"✅ 命令成功完成。", log_file, to_stdout=False)
+            return True
+        elif exit_code in allowed_exit_codes:
+            log_and_print(f"ℹ️  命令以预期状态退出: {exit_code}", log_file, to_stdout=False)
+            return True
+        else:
+            log_and_print(f"❌ 命令执行失败 (退出码: {exit_code})", log_file)
+            return False
+    except Exception as e:
+        log_and_print(f"💥 执行命令时发生异常: {e}", log_file)
+        return False
+
+def discover_fuzz_targets(project_name: str) -> List[str]:
+    """自动发现 Fuzz 目标。"""
+    project_out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name
+    project_src_dir = OSS_FUZZ_DIR / "projects" / project_name
+    targets = []
+
+    if project_out_dir.is_dir():
+        for f in project_out_dir.iterdir():
+            if (f.is_file() and os.access(f, os.X_OK) and 
+                f.name.startswith("fuzz_") and '.' not in f.name):
+                targets.append(f.name)
+
+    if not targets and project_src_dir.is_dir():
+        for py_file in project_src_dir.glob("fuzz_*.py"):
+            try:
+                with open(py_file, "r", encoding="utf-8") as f:
+                    if "atheris.Setup" in f.read():
+                        targets.append(py_file.stem)
+            except Exception as e:
+                # 在worker进程中，只打印到自己的日志
+                print(f"⚠️  警告: 读取文件 {py_file} 失败: {e}")
+    return targets
+
+def run_project_workflow(project_name: str, sanitizer: str) -> Tuple[bool, str]:
+    """
+    处理单个项目的完整工作流 (Worker Function)。
+    此函数由进程池中的每个工作进程独立执行。
+    
+    Returns:
+        一个元组 (is_success: bool, project_name: str)
+    """
+    log_file = setup_logging(project_name)
+    
+    # 在 worker 的开头打印，以便追踪
+    log_and_print("=" * 60, log_file)
+    log_and_print(f"🚀 开始处理项目: {project_name}", log_file)
+    log_and_print(f"📝 日志文件: {log_file}", log_file)
+    log_and_print("=" * 60, log_file)
+    
+    # 每个进程都需要设置自己的工作目录
+    try:
+        os.chdir(OSS_FUZZ_DIR)
+    except FileNotFoundError:
+        log_and_print(f"❌ 严重错误: OSS-Fuzz 目录 '{OSS_FUZZ_DIR}' 不存在！", log_file)
+        return (False, project_name)
+
+    # 步骤 1: 构建Docker镜像（启用自动确认）
+    if not run_command(
+        f"python3 infra/helper.py build_image {project_name}",
+        f"步骤1/5: 构建 {project_name} 的Docker镜像", log_file,
+        auto_confirm=True  # 自动确认所有提示
+    ):
+        log_and_print(f"❌ 项目 {project_name} 构建镜像失败", log_file)
+        return (False, project_name)
+
+    # 步骤 2: 编译带检测器的fuzzer（启用自动确认）
+    if not run_command(
+        f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}",
+        f"步骤2/5: 编译 {project_name} 的fuzzer (sanitizer={sanitizer})", log_file,
+        auto_confirm=True  # 自动确认所有提示
+    ):
+        log_and_print(f"❌ 项目 {project_name} 编译fuzzer失败", log_file)
+        return (False, project_name)
+
+    # 步骤 3: 自动发现目标
+    log_and_print(f"🔍 正在为 {project_name} 自动发现fuzz目标...", log_file)
+    fuzz_targets = discover_fuzz_targets(project_name)
+
+    if not fuzz_targets:
+        log_and_print(f"⚠️  警告: {project_name} 未找到任何fuzz目标！跳过运行步骤。", log_file)
+        return (True, project_name)
+    
+    log_and_print(f"✅ 发现目标: {', '.join(fuzz_targets)}", log_file)
+
+    # 步骤 4: 遍历运行所有目标（启用自动确认）
+    for i, target in enumerate(fuzz_targets, 1):
+        run_command(
+            f"python3 infra/helper.py run_fuzzer {project_name} {target} -- -max_total_time=60",
+            f"步骤4/{len(fuzz_targets)}: 运行目标 [{target}] (60秒)", log_file,
+            allowed_exit_codes=[1, 124],
+            auto_confirm=True  # 自动确认所有提示
+        )
+
+    # 步骤 5: 生成覆盖率报告 (暂无)
+    log_and_print("步骤5/5: 生成覆盖率报告 (当前版本暂未实现)", log_file)
+    log_and_print(f"✅ 项目 {project_name} 处理完成！", log_file)
+    return (True, project_name)
+
+def main():
+    """
+    主流程函数：设置进程池并分发任务。
+    """
+    parser = argparse.ArgumentParser(
+        description="OSS-Fuzz 并行批量测试工具",
+        formatter_class=argparse.RawTextHelpFormatter,
+        epilog="示例:\n  python3 fuzz_runner_parallel.py valid_projects.txt --workers 4\n  python3 fuzz_runner_parallel.py my_projects.txt --sanitizer undefined"
+    )
+    parser.add_argument(
+        "project_list_file", nargs="?", default="valid_projects.txt",
+        help="包含待测试项目列表的文本文件。(默认: valid_projects.txt)"
+    )
+    parser.add_argument(
+        "--sanitizer", default="address", choices=["address", "memory", "undefined", "coverage"],
+        help="要使用的 sanitizer 类型。(默认: address)"
+    )
+    parser.add_argument(
+        "--workers", type=int, default=cpu_count(),
+        help=f"并发执行的工作进程数。(默认: 系统CPU核心数, 即 {cpu_count()})"
+    )
+    args = parser.parse_args()
+    
+    # --- 环境检查 ---
+    if not OSS_FUZZ_DIR.is_dir():
+        print(f"❌ 错误: OSS-Fuzz 目录 '{OSS_FUZZ_DIR}' 不存在！")
+        sys.exit(1)
+    
+    project_list_path = Path(args.project_list_file)
+    if not project_list_path.is_file():
+        print(f"❌ 错误: 项目列表文件 '{project_list_path}' 不存在！")
+        sys.exit(1)
+        
+    print(f"✅ 环境检查通过。将使用 {args.workers} 个并行工作进程。")
+
+    # --- 读取和准备任务 ---
+    try:
+        with open(project_list_path, "r", encoding="utf-8") as f:
+            projects = [line.strip() for line in f if line.strip() and not line.strip().startswith("#")]
+    except Exception as e:
+        print(f"❌ 读取项目列表文件时出错: {e}")
+        sys.exit(1)
+        
+    if not projects:
+        print("⚠️  警告: 项目列表为空，无可执行任务。")
+        sys.exit(0)
+
+    # 为 starmap 准备任务参数列表，每个项目都是一个 (project_name, sanitizer) 元组
+    tasks = [(project, args.sanitizer) for project in projects]
+    total_projects = len(tasks)
+    
+    print(f"\n🚀 即将并行处理 {total_projects} 个项目...")
+
+    # --- 执行并行处理 ---
+    # 使用 with 语句确保进程池被正确关闭
+    with Pool(processes=args.workers) as pool:
+        # starmap 会阻塞直到所有任务完成
+        # 它将 tasks 列表中的每个元组解包作为参数传递给 worker 函数
+        results = pool.starmap(run_project_workflow, tasks)
+
+    # --- 收集并打印结果 ---
+    failed_projects = []
+    for success, project_name in results:
+        if success:
+            print(f"✅ 项目 {project_name} 成功完成")
+        else:
+            print(f"❌ 项目 {project_name} 处理失败")
+            failed_projects.append(project_name)
+
+    # --- 最终总结 ---
+    fail_count = len(failed_projects)
+    success_count = total_projects - fail_count
+    
+    print("\n" + "=" * 60)
+    print("🎉 批量处理完成！")
+    print(f"📊 总计: {total_projects} 个项目")
+    print(f"✅ 成功: {success_count}")
+    print(f"❌ 失败: {fail_count}")
+
+    if failed_projects:
+        print("📛 失败项目列表:")
+        for proj in sorted(failed_projects):
+            print(f"  • {proj}")
+        print("\n💡 提示: 失败项目的详细信息请查看对应的日志文件。")
+        print(f"   日志目录: {LOG_DIR}")
+
+if __name__ == "__main__":
+    # 在 Windows 或 macOS 的某些 Python 版本上，需要将 main 调用放在这个保护块中
+    # 以防止子进程重新导入和执行主模块代码，导致无限递归。
+    main()
\ No newline at end of file

From fcf80a94403de8b1f995b56907b410ebc4537cfd Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Mon, 21 Jul 2025 23:50:42 +0000
Subject: [PATCH 007/134] create build_oss_fuzz.py

---
 fuzz/build_oss_fuzz.py | 131 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 131 insertions(+)
 create mode 100644 fuzz/build_oss_fuzz.py

diff --git a/fuzz/build_oss_fuzz.py b/fuzz/build_oss_fuzz.py
new file mode 100644
index 0000000..525c556
--- /dev/null
+++ b/fuzz/build_oss_fuzz.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+build_oss_fuzz.py
+
+并行构建 OSS-Fuzz 项目（Docker 镜像和 Fuzzer 编译）。
+使用 multiprocessing.Pool 将项目分发到多个 CPU 核心同时处理。
+
+用法: python3 build_oss_fuzz.py [项目列表文件] [--sanitizer 类型] [--workers N]
+示例: python3 build_oss_fuzz.py valid_projects.txt --sanitizer address --workers 4
+"""
+
+import os
+import sys
+import subprocess
+import argparse
+from datetime import datetime
+from pathlib import Path
+from typing import List, Optional, Tuple
+from multiprocessing import Pool, cpu_count
+
+# --- 全局配置 ---
+HOME_DIR = Path.home()
+OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz"
+LOG_DIR = OSS_FUZZ_DIR / "build_logs"
+
+def setup_logging(project_name: str) -> Path:
+    """为单个项目创建带时间戳的日志文件"""
+    LOG_DIR.mkdir(parents=True, exist_ok=True)
+    timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
+    return LOG_DIR / f"build_{project_name}_{timestamp}.log"
+
+def log_and_print(message: str, log_file: Path, to_stdout: bool = True):
+    """将消息写入日志并打印到控制台"""
+    if to_stdout:
+        print(f"[PID:{os.getpid()}] {message}")
+    with open(log_file, "a", encoding="utf-8") as f:
+        f.write(f"{datetime.now().isoformat()} {message}\n")
+
+def run_command(
+    cmd: str, 
+    log_msg: str, 
+    log_file: Path,
+    allowed_exit_codes: Optional[List[int]] = None
+) -> bool:
+    """执行 shell 命令并实时记录输出"""
+    allowed_exit_codes = allowed_exit_codes or []
+    log_and_print(f"▶️ {log_msg}...", log_file, to_stdout=False)
+    log_and_print(f"   $ {cmd}", log_file, to_stdout=False)
+
+    try:
+        process = subprocess.Popen(
+            f"yes | {cmd}",  # 自动确认所有提示
+            shell=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True,
+            encoding="utf-8",
+            errors="replace"
+        )
+        with open(log_file, "a", encoding="utf-8") as f:
+            for line in iter(process.stdout.readline, ""):
+                f.write(line)
+        process.wait()
+        exit_code = process.returncode
+        if exit_code in [0, *allowed_exit_codes]:
+            log_and_print(f"✅ 命令成功完成", log_file, to_stdout=False)
+            return True
+        log_and_print(f"❌ 命令失败 (退出码: {exit_code})", log_file)
+        return False
+    except Exception as e:
+        log_and_print(f"💥 执行异常: {e}", log_file)
+        return False
+
+def build_project(project_name: str, sanitizer: str) -> Tuple[bool, str]:
+    """单个项目的构建工作流"""
+    log_file = setup_logging(project_name)
+    os.chdir(OSS_FUZZ_DIR)
+    
+    log_and_print("="*60, log_file)
+    log_and_print(f"🔨 开始构建项目: {project_name}", log_file)
+    log_and_print(f"📝 日志路径: {log_file}", log_file)
+    log_and_print("="*60, log_file)
+
+    # 1. 构建 Docker 镜像
+    if not run_command(
+        f"python3 infra/helper.py build_image {project_name}",
+        "步骤1/2: 构建 Docker 镜像",
+        log_file
+    ):
+        return (False, project_name)
+
+    # 2. 编译 Fuzzer
+    if not run_command(
+        f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}",
+        f"步骤2/2: 编译 Fuzzer (sanitizer={sanitizer})",
+        log_file
+    ):
+        return (False, project_name)
+
+    log_and_print(f"✅ 项目 {project_name} 构建完成", log_file)
+    return (True, project_name)
+
+def main():
+    parser = argparse.ArgumentParser(description="OSS-Fuzz 并行构建工具")
+    parser.add_argument("project_list", help="项目列表文件路径")
+    parser.add_argument("--sanitizer", default="address", choices=["address", "memory", "undefined"])
+    parser.add_argument("--workers", type=int, default=cpu_count())
+    args = parser.parse_args()
+
+    # 读取项目列表
+    try:
+        with open(args.project_list, "r") as f:
+            projects = [line.strip() for line in f if line.strip()]
+    except Exception as e:
+        print(f"❌ 读取项目列表失败: {e}")
+        sys.exit(1)
+
+    # 并行构建
+    with Pool(args.workers) as pool:
+        results = pool.starmap(build_project, [(p, args.sanitizer) for p in projects])
+
+    # 输出结果
+    failed = [p for success, p in results if not success]
+    print(f"\n📊 构建完成: 成功 {len(projects)-len(failed)}/{len(projects)}")
+    if failed:
+        print("❌ 失败项目: " + ", ".join(failed))
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file

From 3e8e7f4ca758b2f42ab8df7f2c371fadf4b6c06c Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Mon, 21 Jul 2025 23:53:27 +0000
Subject: [PATCH 008/134] create run_fuzz_target.py

---
 fuzz/oss-fuzz           |   2 +-
 fuzz/run_fuzz_target.py | 140 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 141 insertions(+), 1 deletion(-)
 create mode 100644 fuzz/run_fuzz_target.py

diff --git a/fuzz/oss-fuzz b/fuzz/oss-fuzz
index c3d3fb7..0b81ba5 160000
--- a/fuzz/oss-fuzz
+++ b/fuzz/oss-fuzz
@@ -1 +1 @@
-Subproject commit c3d3fb70f9ece4d31e623ce0f29bc0dfd34e0af6
+Subproject commit 0b81ba5d97ae3d1402744e00b1d9075fed7b7f1e
diff --git a/fuzz/run_fuzz_target.py b/fuzz/run_fuzz_target.py
new file mode 100644
index 0000000..ae7b036
--- /dev/null
+++ b/fuzz/run_fuzz_target.py
@@ -0,0 +1,140 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+run_fuzz_target.py
+
+并行运行 OSS-Fuzz 测试目标。
+使用 multiprocessing.Pool 分发任务到多个 CPU 核心。
+
+用法: python3 run_fuzz_target.py [项目列表文件] [--timeout 秒] [--workers N]
+示例: python3 run_fuzz_target.py valid_projects.txt --timeout 120 --workers 4
+"""
+
+import os
+import sys
+import subprocess
+import argparse
+from datetime import datetime
+from pathlib import Path
+from typing import List, Optional, Tuple
+from multiprocessing import Pool, cpu_count
+
+# --- 全局配置 ---
+HOME_DIR = Path.home()
+OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz"
+LOG_DIR = OSS_FUZZ_DIR / "run_logs"
+
+def setup_logging(project_name: str) -> Path:
+    """创建带时间戳的运行日志"""
+    LOG_DIR.mkdir(parents=True, exist_ok=True)
+    timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
+    return LOG_DIR / f"run_{project_name}_{timestamp}.log"
+
+def log_and_print(message: str, log_file: Path, to_stdout: bool = True):
+    """日志和控制台输出"""
+    if to_stdout:
+        print(f"[PID:{os.getpid()}] {message}")
+    with open(log_file, "a", encoding="utf-8") as f:
+        f.write(f"{datetime.now().isoformat()} {message}\n")
+
+def run_command(
+    cmd: str, 
+    log_msg: str, 
+    log_file: Path,
+    allowed_exit_codes: Optional[List[int]] = None
+) -> bool:
+    """执行命令并实时记录输出"""
+    allowed_exit_codes = allowed_exit_codes or []
+    log_and_print(f"▶️ {log_msg}...", log_file, to_stdout=False)
+    log_and_print(f"   $ {cmd}", log_file, to_stdout=False)
+
+    try:
+        process = subprocess.Popen(
+            cmd,
+            shell=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True,
+            encoding="utf-8",
+            errors="replace"
+        )
+        with open(log_file, "a", encoding="utf-8") as f:
+            for line in iter(process.stdout.readline, ""):
+                f.write(line)
+        process.wait()
+        return process.returncode in [0, *allowed_exit_codes]
+    except Exception as e:
+        log_and_print(f"💥 执行异常: {e}", log_file)
+        return False
+
+def discover_targets(project_name: str) -> List[str]:
+    """发现可用的 Fuzz 目标"""
+    out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name
+    targets = []
+    if out_dir.exists():
+        for f in out_dir.iterdir():
+            if f.is_file() and f.name.startswith("fuzz_") and os.access(f, os.X_OK):
+                targets.append(f.name)
+    return targets
+
+def run_project(project_name: str, timeout: int) -> Tuple[bool, str]:
+    """单个项目的测试工作流"""
+    log_file = setup_logging(project_name)
+    os.chdir(OSS_FUZZ_DIR)
+
+    log_and_print("="*60, log_file)
+    log_and_print(f"🚀 开始测试项目: {project_name}", log_file)
+    log_and_print(f"📝 日志路径: {log_file}", log_file)
+    log_and_print("="*60, log_file)
+
+    # 1. 发现测试目标
+    targets = discover_targets(project_name)
+    if not targets:
+        log_and_print("⚠️ 未发现测试目标", log_file)
+        return (False, project_name)
+    log_and_print(f"🔍 发现 {len(targets)} 个测试目标", log_file)
+
+    # 2. 运行所有目标
+    all_success = True
+    for i, target in enumerate(targets, 1):
+        cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target} -- -max_total_time={timeout}"
+        success = run_command(
+            cmd,
+            f"运行目标 [{i}/{len(targets)}] {target} (超时={timeout}s)",
+            log_file,
+            allowed_exit_codes=[1, 124]  # 允许超时退出
+        )
+        all_success &= success
+
+    # 3. 生成报告（占位）
+    log_and_print("📊 覆盖率报告生成 (当前版本暂未实现)", log_file)
+    return (all_success, project_name)
+
+def main():
+    parser = argparse.ArgumentParser(description="OSS-Fuzz 并行测试工具")
+    parser.add_argument("project_list", help="项目列表文件路径")
+    parser.add_argument("--timeout", type=int, default=60, help="单目标测试超时时间（秒）")
+    parser.add_argument("--workers", type=int, default=cpu_count())
+    args = parser.parse_args()
+
+    # 读取项目列表
+    try:
+        with open(args.project_list) as f:
+            projects = [line.strip() for line in f if line.strip()]
+    except Exception as e:
+        print(f"❌ 读取项目列表失败: {e}")
+        sys.exit(1)
+
+    # 并行运行
+    with Pool(args.workers) as pool:
+        results = pool.starmap(run_project, [(p, args.timeout) for p in projects])
+
+    # 输出结果
+    failed = [p for success, p in results if not success]
+    print(f"\n📊 测试完成: 成功 {len(projects)-len(failed)}/{len(projects)}")
+    if failed:
+        print("❌ 失败项目: " + ", ".join(failed))
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file

From def645e95f0040ea862ce635651ada09780d37f4 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 22 Jul 2025 00:15:31 +0000
Subject: [PATCH 009/134] split the pool.py into build_oss_fuzz and
 run_fuzz_target

---
 fuzz/build_oss_fuzz.py   | 4 +++-
 fuzz/fuzz_runner_pool.py | 2 +-
 fuzz/run_fuzz_target.py  | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/fuzz/build_oss_fuzz.py b/fuzz/build_oss_fuzz.py
index 525c556..7c60d56 100644
--- a/fuzz/build_oss_fuzz.py
+++ b/fuzz/build_oss_fuzz.py
@@ -8,7 +8,9 @@
 使用 multiprocessing.Pool 将项目分发到多个 CPU 核心同时处理。
 
 用法: python3 build_oss_fuzz.py [项目列表文件] [--sanitizer 类型] [--workers N]
-示例: python3 build_oss_fuzz.py valid_projects.txt --sanitizer address --workers 4
+示例: python3 fuzz/build_oss_fuzz.py data/valid_projects.txt \
+    --sanitizer address \
+    --workers 8
 """
 
 import os
diff --git a/fuzz/fuzz_runner_pool.py b/fuzz/fuzz_runner_pool.py
index abd4b30..7c40d61 100644
--- a/fuzz/fuzz_runner_pool.py
+++ b/fuzz/fuzz_runner_pool.py
@@ -8,7 +8,7 @@
 分发到多个 CPU核心上同时处理。
 
 用法: python3 fuzz_runner_pool.py [项目列表文件] [--sanitizer 类型] [--workers N]
-示例: python3 fuzz_runner_pool.py /home/jiayiguo/FuzzAug/data/valid_projects.txt --workers 4
+示例: python3 fuzz/fuzz_runner_pool.py data/valid_projects.txt --workers 4
 """
 
 import os
diff --git a/fuzz/run_fuzz_target.py b/fuzz/run_fuzz_target.py
index ae7b036..9765dbb 100644
--- a/fuzz/run_fuzz_target.py
+++ b/fuzz/run_fuzz_target.py
@@ -8,7 +8,7 @@
 使用 multiprocessing.Pool 分发任务到多个 CPU 核心。
 
 用法: python3 run_fuzz_target.py [项目列表文件] [--timeout 秒] [--workers N]
-示例: python3 run_fuzz_target.py valid_projects.txt --timeout 120 --workers 4
+示例: python3 fuzz/run_fuzz_target.py data/valid_projects.txt --timeout 60 --workers 4
 """
 
 import os

From 1251bcd45a1d3ae84741d9217d439bc55b14756f Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 22 Jul 2025 00:15:52 +0000
Subject: [PATCH 010/134] delete the .sh files

---
 script_fuzz_py_batch_final.sh | 201 -----------------------------
 script_fuzz_py_final.sh       | 126 ------------------
 valid_projects.txt            | 234 ----------------------------------
 3 files changed, 561 deletions(-)
 delete mode 100644 script_fuzz_py_batch_final.sh
 delete mode 100644 script_fuzz_py_final.sh
 delete mode 100644 valid_projects.txt

diff --git a/script_fuzz_py_batch_final.sh b/script_fuzz_py_batch_final.sh
deleted file mode 100644
index 1d7d9af..0000000
--- a/script_fuzz_py_batch_final.sh
+++ /dev/null
@@ -1,201 +0,0 @@
-#!/bin/bash
-# script_fuzz_py_batch_final.sh
-# 批量执行OSS-Fuzz本地测试全流程：从文件读取项目列表，依次为每个项目自动发现目标 -> 构建镜像 -> 编译fuzzer -> 运行测试 -> 生成覆盖率报告(暂无))
-# 用法：./script_fuzz_py_batch_final.sh [项目列表文件] [sanitizer类型]
-# 示例: ./script_fuzz_py_batch_final.sh valid_projects.txt address
-
-# --- 全局配置 ---
-PROJECT_LIST_FILE="${1:-valid_projects.txt}" # 默认项目列表文件
-SANITIZER="${2:-address}"                   # 默认检测器类型
-OSS_FUZZ_DIR="$HOME/oss-fuzz-py/oss-fuzz"      # OSS-Fuzz目录
-LOG_DIR="$OSS_FUZZ_DIR/script_lz4_batch_logs"        # 所有项目的总日志目录
-FAILED_PROJECTS=()                          # 存储失败项目列表
-
-# --- 环境检查 ---
-check_environment() {
-  if [ ! -d "$OSS_FUZZ_DIR" ]; then
-    echo "❌ 错误: OSS-Fuzz 目录 '$OSS_FUZZ_DIR' 不存在！"
-    return 1
-  fi
-  if [ ! -f "$PROJECT_LIST_FILE" ]; then
-    echo "❌ 错误: 项目列表文件 '$PROJECT_LIST_FILE' 不存在！"
-    return 1
-  fi
-  mkdir -p "$LOG_DIR"
-  chmod 777 "$LOG_DIR" 2>/dev/null || true
-  cd "$OSS_FUZZ_DIR" || return 1
-  echo "✅ 环境检查通过。OSS-Fuzz 目录: $OSS_FUZZ_DIR"
-}
-
-# --- 带日志记录的命令执行 ---
-run_command() {
-  local cmd="$1"
-  local log_msg="$2"
-  local log_file="$3" # 日志文件作为参数传入
-  local allowed_exit="${4:-}"
-
-  echo "▶️ $log_msg..." | tee -a "$log_file"
-  set +e
-  { yes | eval "$cmd" ; } 2>&1 | tee -a "$log_file"
-  local exit_code=${PIPESTATUS[0]}
-  set -e
-
-  if [[ -n "$allowed_exit" && ",$allowed_exit," =~ ",$exit_code," ]]; then
-    echo "ℹ️ 命令以预期状态退出: $exit_code" | tee -a "$log_file"
-    return 0
-  elif [ $exit_code -ne 0 ]; then
-    echo "❌ 命令执行失败: $cmd (退出码: $exit_code)" | tee -a "$log_file"
-    return 1  # 返回错误而不是退出脚本
-  fi
-}
-
-# --- 自动发现 Fuzz 目标 ---
-discover_fuzz_targets() {
-    local project_name="$1"
-    local project_dir="$OSS_FUZZ_DIR/build/out/$project_name"
-    local project_src="$OSS_FUZZ_DIR/projects/$project_name"
-    local targets=()
-
-    if [ -d "$project_dir" ]; then
-        while IFS= read -r -d $'\0' file; do
-            filename=$(basename "$file")
-            if [[ -x "$file" && "$filename" =~ ^fuzz_ && ! "$file" =~ \..*$ ]]; then
-                targets+=("$filename")
-            fi
-        done < <(find "$project_dir" -maxdepth 1 -type f -print0)
-    fi
-
-    if [ ${#targets[@]} -eq 0 ] && [ -d "$project_src" ]; then
-        while IFS= read -r -d $'\0' file; do
-            if grep -q "atheris.Setup" "$file"; then
-                targets+=("$(basename "${file%.*}")")
-            fi
-        done < <(find "$project_src" -name 'fuzz_*.py' -print0)
-    fi
-
-    echo "${targets[@]}"
-}
-
-# --- 单个项目的完整处理流程 ---
-process_project() {
-  local project_name="$1"
-  local log_file="$LOG_DIR/oss_fuzz_${project_name}_$(date +%Y%m%d%H%M%S).log"
-  local project_failed=0
-
-  echo "============================================================" | tee -a "$log_file"
-  echo "🚀 开始处理项目: $project_name" | tee -a "$log_file"
-  echo "📝 日志文件: $log_file" | tee -a "$log_file"
-  echo "============================================================" | tee -a "$log_file"
-
-  #1. 构建Docker镜像
-  if ! run_command \
-    "python3 infra/helper.py build_image $project_name" \
-    "步骤1/5: 构建 $project_name 的Docker镜像" \
-    "$log_file"; then
-    echo "❌ 项目 $project_name 构建镜像失败，跳过后续步骤" | tee -a "$log_file"
-    project_failed=1
-  fi
-
-  # 2. 编译带检测器的fuzzer (仅在构建镜像成功后执行)
-  if [ $project_failed -eq 0 ]; then
-    if ! run_command \
-      "python3 infra/helper.py build_fuzzers --sanitizer $SANITIZER $project_name" \
-      "步骤2/5: 编译 $project_name 的fuzzer (sanitizer=$SANITIZER)" \
-      "$log_file"; then
-      echo "❌ 项目 $project_name 编译fuzzer失败，跳过后续步骤" | tee -a "$log_file"
-      project_failed=1
-    fi
-  fi
-
-  # 3. 自动发现目标 (仅在编译成功后执行)
-  if [ $project_failed -eq 0 ]; then
-    echo "🔍 正在为 $project_name 自动发现fuzz目标..." | tee -a "$log_file"
-    FUZZ_TARGETS=($(discover_fuzz_targets "$project_name"))
-
-    if [ ${#FUZZ_TARGETS[@]} -eq 0 ]; then
-      echo "⚠️  警告: 项目 $project_name 未找到任何fuzz目标！跳过运行步骤。" | tee -a "$log_file"
-    else
-      echo "✅ 发现目标: ${FUZZ_TARGETS[*]}" | tee -a "$log_file"
-      
-      # 4. 遍历运行所有目标 默认测试60秒
-      for target in "${FUZZ_TARGETS[@]}"; do
-        if ! run_command \
-          "python3 infra/helper.py run_fuzzer $project_name $target -- -max_total_time=60" \
-          "步骤3/5: 运行目标 [$target] (60秒超时)" \
-          "$log_file" \
-          "124,1"; then  # 允许超时(124)和发现崩溃(1)
-          echo "⚠️  警告: 目标 [$target] 运行失败，继续下一个目标" | tee -a "$log_file"
-        fi
-      done
-    fi
-  fi
-
-  # 5. 生成覆盖率报告 (已注释掉，与原脚本保持一致)
-  # [保留原有注释的覆盖率代码]
-
-  if [ $project_failed -eq 0 ]; then
-    echo "✅ 项目 $project_name 处理完成！" | tee -a "$log_file"
-   
-  else
-    echo "❌ 项目 $project_name 处理失败！" | tee -a "$log_file"
-    
-  fi
-  
-  echo "------------------------------------------------------------"
-  return $project_failed
-}
-
-# --- 主流程 ---
-main() {
-if ! check_environment; then
-    echo "❌ 环境检查失败，脚本终止"
-    exit 1
-  fi
-
-  # 新增：读取项目列表到数组（过滤空行和注释行）
-  local PROJECTS=()
-  while IFS= read -r line || [[ -n "$line" ]]; do
-    if [[ -z "$line" || "$line" =~ ^# ]]; then
-      continue
-    fi
-    PROJECTS+=("$line")
-  done < "$PROJECT_LIST_FILE"
-
-  # 项目总数从数组长度获取（原逻辑从文件行数获取）
-  local total_projects=${#PROJECTS[@]}
-  local current_project_num=0
-  local success_count=0
-  local fail_count=0
-
-  # 新增：遍历数组处理项目（替代原while读取文件的循环）
-  for project_name in "${PROJECTS[@]}"; do
-
-    current_project_num=$((current_project_num + 1))
-    echo ">>> [ $current_project_num / $total_projects ] 开始处理项目: $project_name <<<"
-    
-    if process_project "$project_name"; then
-      echo "✅ [$current_project_num/$total_projects] 项目 $project_name 成功完成"
-      ( success_count=$[ $success_count + 1 ])
-    else
-      echo "❌ [$current_project_num/$total_projects] 项目 $project_name 处理失败"
-      FAILED_PROJECTS+=("$project_name")
-      ( success_count=$[ $success_count + 1 ])
-    fi
-
-  done
-
-  echo "============================================================"
-  echo "🎉 批量处理完成！"
-  echo "📊 总计: $total_projects 个项目"
-  echo "✅ 成功: $success_count"
-  echo "❌ 失败: $fail_count"
-  
-  if [ ${#FAILED_PROJECTS[@]} -gt 0 ]; then
-    echo "📛 失败项目列表:"
-    printf '  • %s\n' "${FAILED_PROJECTS[@]}"
-    echo "💡 提示: 可以重新运行失败项目，检查日志获取详细信息"
-    echo "      日志目录: $LOG_DIR"
-  fi
-}
-
-main "$@"
\ No newline at end of file
diff --git a/script_fuzz_py_final.sh b/script_fuzz_py_final.sh
deleted file mode 100644
index c2bf00a..0000000
--- a/script_fuzz_py_final.sh
+++ /dev/null
@@ -1,126 +0,0 @@
-#!/bin/bash
-# script_fuzz_py_final.sh
-# 执行OSS-Fuzz本地测试全流程：自动发现目标 -> 构建镜像 -> 编译fuzzer -> 运行测试 -> 生成覆盖率报告
-# 用法：script_fuzz_py_final.sh <项目名> [sanitizer类型]
-
-set -e  # 遇到错误立即退出
-
-PROJECT_NAME="${1:-abseil-py}"      # 默认项目名
-SANITIZER="${2:-address}"           # 默认检测器类型
-OSS_FUZZ_DIR="$HOME/oss-fuzz-py/oss-fuzz"        # OSS-Fuzz目录
-LOG_DIR="$OSS_FUZZ_DIR/script_lz4_logs"
-LOG_FILE="$LOG_DIR/oss_fuzz_${PROJECT_NAME}_$(date +%Y%m%d%H%M%S).log"
-# 验证目录有效性
-check_environment() {
-  if [ ! -d "$OSS_FUZZ_DIR" ]; then
-    echo "❌ 错误: $OSS_FUZZ_DIR 目录不存在！"
-    exit 1
-  fi
-  mkdir -p "$LOG_DIR"  # 关键修复：创建日志目录
-  chmod 777 "$LOG_DIR" 2>/dev/null || true  # 宽松权限设置
-  cd "$OSS_FUZZ_DIR" || exit 1
-}
-
-# 带日志记录的命令执行（支持允许的退出码）
-run_command() {
-  local cmd="$1"
-  local log_msg="$2"
-  local allowed_exit="${3:-}"  # 可选：允许的退出码（逗号分隔）
-  
-  echo "▶️ $log_msg..." | tee -a "$LOG_FILE"
-  set +e  # 临时禁用错误退出
-  eval "$cmd" 2>&1 | tee -a "$LOG_FILE"
-  local exit_code=${PIPESTATUS[0]}
-  set -e  # 重新启用错误退出
-  
-  # 检查退出码是否被允许
-  if [[ -n "$allowed_exit" && ",$allowed_exit," =~ ",$exit_code," ]]; then
-    echo "ℹ️ 命令以预期状态退出: $exit_code" | tee -a "$LOG_FILE"
-    return 0
-  elif [ $exit_code -ne 0 ]; then
-    echo "❌ 命令执行失败: $cmd (退出码: $exit_code)" | tee -a "$LOG_FILE"
-    exit 1
-  fi
-}
-
-# 自动发现fuzz目标
-discover_fuzz_targets() {
-   local project_dir="$OSS_FUZZ_DIR/build/out/$PROJECT_NAME"
-    local project_src="$OSS_FUZZ_DIR/projects/$PROJECT_NAME"
-    local targets=()
-
-    # 编译目录扫描：仅匹配"fuzz_"开头的可执行文件
-    if [ -d "$project_dir" ]; then
-        while IFS= read -r -d $'\0' file; do
-            filename=$(basename "$file")
-            if [[ -x "$file" && "$filename" =~ ^fuzz_ && ! "$file" =~ \..*$ ]]; then
-                targets+=("$filename")
-            fi
-        done < <(find "$project_dir" -maxdepth 1 -type f -print0)
-    fi
-
-    # 源码目录扫描：仅匹配"fuzz_*.py"且含Atheris标识
-    if [ ${#targets[@]} -eq 0 ] && [ -d "$project_src" ]; then
-        while IFS= read -r -d $'\0' file; do
-            if grep -q "atheris.Setup" "$file"; then
-                targets+=("$(basename "${file%.*}")")
-            fi
-        done < <(find "$project_src" -name 'fuzz_*.py' -print0)
-    fi
-
-    echo "${targets[@]}"
-}
-
-# 主流程
-main() {
-  check_environment
-  echo "=============================="
-  echo "🚀 开始OSS-Fuzz测试 - 项目: $PROJECT_NAME"
-  echo "📝 日志文件: $LOG_FILE"
-  echo "=============================="
-
-  #1. 构建Docker镜像
-  run_command \
-    "python3 infra/helper.py build_image $PROJECT_NAME" \
-    "步骤1/5: 构建Docker镜像"
-
-  # 2. 编译带检测器的fuzzer
-  run_command \
-    "python3 infra/helper.py build_fuzzers --sanitizer $SANITIZER $PROJECT_NAME" \
-    "步骤2/5: 编译fuzzer (sanitizer=$SANITIZER)"
-
-  # 3. 自动发现目标
-  echo "🔍 自动发现fuzz目标..."
-  FUZZ_TARGETS=($(discover_fuzz_targets))
-  
-  if [ ${#FUZZ_TARGETS[@]} -eq 0 ]; then
-    echo "❌ 未找到任何fuzz目标！检查项目配置" | tee -a "$LOG_FILE"
-    exit 1
-  fi
-
-  echo "✅ 发现目标: ${FUZZ_TARGETS[*]}" | tee -a "$LOG_FILE"
-
-  # 4. 遍历运行所有目标
-  for target in "${FUZZ_TARGETS[@]}"; do
-    run_command \
-      "python3 infra/helper.py run_fuzzer $PROJECT_NAME $target -- -max_total_time=180" \
-      "步骤3/5: 运行目标 [$target] (120秒超时)" \
-      "124,1"  # 允许超时(124)和发现崩溃(1)
-done
-
-  # 5. 生成覆盖率报告
-  # run_command \
-  #   "python3 infra/helper.py build_fuzzers --sanitizer coverage $PROJECT_NAME" \
-  #   "步骤4/5: 编译覆盖率版本"
-  
-  # run_command \
-  #   "python3 infra/helper.py coverage --no-serve $PROJECT_NAME" \
-  #   "步骤5/5: 生成覆盖率报告"
-
-  echo "✅ 所有步骤完成！结果查看:"
-  echo "🔍 测试日志: $LOG_FILE"
-  echo "📊 覆盖率报告(暂无): $OSS_FUZZ_DIR/build/out/$PROJECT_NAME/report/coverage/index.html"
-  echo "💥 崩溃报告: $OSS_FUZZ_DIR/build/out/$PROJECT_NAME/crashes/"
-}
-
-main "$@"
\ No newline at end of file
diff --git a/valid_projects.txt b/valid_projects.txt
deleted file mode 100644
index 94fdcd5..0000000
--- a/valid_projects.txt
+++ /dev/null
@@ -1,234 +0,0 @@
-abseil-py
-adal
-aiohttp
-aniso8601
-ansible
-argcomplete
-arrow-py
-asn1crypto
-asteval
-astroid
-asttokens
-attrs
-autoflake
-autopep8
-azure-sdk-for-python
-babel
-black
-botocore
-bottleneck
-bz2file
-cachetools
-cffi
-chardet
-charset_normalizer
-click
-cloud-custodian
-configparser
-connexion
-coveragepy
-croniter
-cryptography
-cssselect
-dask
-decorator
-defusedxml
-digest
-dill
-distlib
-dnspython
-docutils
-ecdsa-python
-et-xmlfile
-face
-filelock
-filesystem_spec
-flask
-flask-jwt-extended
-flask-restx
-flask-wtf
-fonttools
-ftfy
-g-api-auth-httplib2
-g-api-auth-library-python
-g-api-pubsub
-g-api-py-api-common-protos
-g-api-py-oauthlib
-g-api-python-bigquery-storage
-g-api-python-client
-g-api-python-cloud-core
-g-api-python-firestore
-g-api-python-tasks
-g-api-resource-manager
-g-api-resumable-media-python
-g-api-secret-manager
-g-apis-py-api-core
-gast
-gc-iam
-gcloud-error-py
-g-cloud-logging-py
-gcp-python-cloud-storage
-genshi
-gitdb
-glom
-gprof2dot
-g-py-bigquery
-g-py-crc32c
-grpc-py
-gunicorn
-h11
-h5py
-hiredis-py
-html2text
-html5lib-python
-httpcore
-httpretty
-httpx
-idna
-ijson
-importlib_metadata
-iniconfig
-ipaddress
-ipykernel
-ipython
-isodate
-itsdangerous
-jedi
-jinja2
-jmespathpy
-joblib
-jsmin
-jupyter-nbconvert
-jupyter_server
-kafka
-keras
-kiwisolver
-lark-parser
-libcst
-looker-sdk
-lxml
-mako
-markupsafe
-matplotlib
-mccabe
-mdit-py-plugins
-mdurl
-more-itertools
-mrab-regex
-msal
-msgpack-python
-multidict
-mutagen
-nbclassic
-nbformat
-netaddr-py
-networkx
-ntlm2
-ntlm-auth
-numexpr
-numpy
-oauth2
-oauthlib
-olefile
-openapi-schema-validator
-opencensus-python
-openpyxl
-opt_einsum
-oracle-py-cx
-orjson
-oscrypto
-packaging
-pandas
-paramiko
-parse
-parsimonious
-pasta
-pathlib2
-pdoc
-pem
-pendulum
-pip
-ply
-protobuf-python
-proto-plus-python
-psqlparse
-psutil
-psycopg2
-pyasn1
-pyasn1-modules
-pycparser
-pycrypto
-pydantic
-pydateutil
-pygments
-pyjson5
-pyjwt
-pymysql
-pynacl
-pyodbc
-pyparsing
-pyrsistent
-py-serde
-pytables
-pytest-py
-python3-openid
-python-ecdsa
-python-email-validator
-python-fastjsonschema
-python-future
-python-graphviz
-python-hyperlink
-python-jose
-python-lz4
-python-markdown
-python-markdownify
-python-nameparser
-python-nvd3
-python-pathspec
-python-prompt-toolkit
-python-pypdf
-python-rison
-python-rsa
-python-tabulate
-pytz
-pyxdg
-pyyaml
-pyzmq
-redis-py
-requests
-retry
-rfc3967
-rich
-sacremoses
-scikit-learn
-scipy
-setuptools
-sigstore-python
-simplejson
-six
-smart_open
-soupsieve
-sqlalchemy_jsonfield
-sqlalchemy-utils
-sqlparse
-stack_data
-tensorflow-addons
-tinycss2
-toml
-tomlkit
-toolbelt
-toolz
-tqdm
-typing_extensions
-underscore
-uritemplate
-urlextract
-urllib3
-validators
-w3lib
-websocket-client
-wheel
-wtforms
-xlrd
-yarl
-zipp

From 21017f1c2cf08f1929775b3480bf01320ec0315b Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 22 Jul 2025 00:45:28 +0000
Subject: [PATCH 011/134] translate to english

---
 fuzz/build_oss_fuzz.py   |  56 +++++++-------
 fuzz/fuzz_runner_pool.py | 158 +++++++++++++++++++--------------------
 fuzz/run_fuzz_target.py  |  62 +++++++--------
 3 files changed, 138 insertions(+), 138 deletions(-)

diff --git a/fuzz/build_oss_fuzz.py b/fuzz/build_oss_fuzz.py
index 7c60d56..1c78a0d 100644
--- a/fuzz/build_oss_fuzz.py
+++ b/fuzz/build_oss_fuzz.py
@@ -4,11 +4,11 @@
 """
 build_oss_fuzz.py
 
-并行构建 OSS-Fuzz 项目（Docker 镜像和 Fuzzer 编译）。
-使用 multiprocessing.Pool 将项目分发到多个 CPU 核心同时处理。
+Parallel build of OSS-Fuzz projects (Docker images and Fuzzer compilation).
+Uses multiprocessing.Pool to distribute projects across multiple CPU cores for concurrent processing.
 
-用法: python3 build_oss_fuzz.py [项目列表文件] [--sanitizer 类型] [--workers N]
-示例: python3 fuzz/build_oss_fuzz.py data/valid_projects.txt \
+Usage: python3 build_oss_fuzz.py [project_list_file] [--sanitizer type] [--workers N]
+Example: python3 fuzz/build_oss_fuzz.py data/valid_projects.txt \
     --sanitizer address \
     --workers 8
 """
@@ -22,19 +22,19 @@
 from typing import List, Optional, Tuple
 from multiprocessing import Pool, cpu_count
 
-# --- 全局配置 ---
+# --- Global configuration ---
 HOME_DIR = Path.home()
 OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz"
 LOG_DIR = OSS_FUZZ_DIR / "build_logs"
 
 def setup_logging(project_name: str) -> Path:
-    """为单个项目创建带时间戳的日志文件"""
+    """Create a timestamped log file for a single project"""
     LOG_DIR.mkdir(parents=True, exist_ok=True)
     timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
     return LOG_DIR / f"build_{project_name}_{timestamp}.log"
 
 def log_and_print(message: str, log_file: Path, to_stdout: bool = True):
-    """将消息写入日志并打印到控制台"""
+    """Write message to log and print to console"""
     if to_stdout:
         print(f"[PID:{os.getpid()}] {message}")
     with open(log_file, "a", encoding="utf-8") as f:
@@ -46,14 +46,14 @@ def run_command(
     log_file: Path,
     allowed_exit_codes: Optional[List[int]] = None
 ) -> bool:
-    """执行 shell 命令并实时记录输出"""
+    """Execute a shell command and stream output to log in real-time"""
     allowed_exit_codes = allowed_exit_codes or []
     log_and_print(f"▶️ {log_msg}...", log_file, to_stdout=False)
     log_and_print(f"   $ {cmd}", log_file, to_stdout=False)
 
     try:
         process = subprocess.Popen(
-            f"yes | {cmd}",  # 自动确认所有提示
+            f"yes | {cmd}",  # Auto-confirm all prompts
             shell=True,
             stdout=subprocess.PIPE,
             stderr=subprocess.STDOUT,
@@ -67,67 +67,67 @@ def run_command(
         process.wait()
         exit_code = process.returncode
         if exit_code in [0, *allowed_exit_codes]:
-            log_and_print(f"✅ 命令成功完成", log_file, to_stdout=False)
+            log_and_print(f"✅ Command completed successfully", log_file, to_stdout=False)
             return True
-        log_and_print(f"❌ 命令失败 (退出码: {exit_code})", log_file)
+        log_and_print(f"❌ Command failed (exit code: {exit_code})", log_file)
         return False
     except Exception as e:
-        log_and_print(f"💥 执行异常: {e}", log_file)
+        log_and_print(f"💥 Execution exception: {e}", log_file)
         return False
 
 def build_project(project_name: str, sanitizer: str) -> Tuple[bool, str]:
-    """单个项目的构建工作流"""
+    """Build workflow for a single project"""
     log_file = setup_logging(project_name)
     os.chdir(OSS_FUZZ_DIR)
     
     log_and_print("="*60, log_file)
-    log_and_print(f"🔨 开始构建项目: {project_name}", log_file)
-    log_and_print(f"📝 日志路径: {log_file}", log_file)
+    log_and_print(f"🔨 Starting build for project: {project_name}", log_file)
+    log_and_print(f"📝 Log path: {log_file}", log_file)
     log_and_print("="*60, log_file)
 
-    # 1. 构建 Docker 镜像
+    # 1. Build Docker image
     if not run_command(
         f"python3 infra/helper.py build_image {project_name}",
-        "步骤1/2: 构建 Docker 镜像",
+        "Step 1/2: Building Docker image",
         log_file
     ):
         return (False, project_name)
 
-    # 2. 编译 Fuzzer
+    # 2. Compile Fuzzers
     if not run_command(
         f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}",
-        f"步骤2/2: 编译 Fuzzer (sanitizer={sanitizer})",
+        f"Step 2/2: Compiling Fuzzers (sanitizer={sanitizer})",
         log_file
     ):
         return (False, project_name)
 
-    log_and_print(f"✅ 项目 {project_name} 构建完成", log_file)
+    log_and_print(f"✅ Project {project_name} build completed", log_file)
     return (True, project_name)
 
 def main():
-    parser = argparse.ArgumentParser(description="OSS-Fuzz 并行构建工具")
-    parser.add_argument("project_list", help="项目列表文件路径")
+    parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Build Tool")
+    parser.add_argument("project_list", help="Project list file path")
     parser.add_argument("--sanitizer", default="address", choices=["address", "memory", "undefined"])
     parser.add_argument("--workers", type=int, default=cpu_count())
     args = parser.parse_args()
 
-    # 读取项目列表
+    # Read project list
     try:
         with open(args.project_list, "r") as f:
             projects = [line.strip() for line in f if line.strip()]
     except Exception as e:
-        print(f"❌ 读取项目列表失败: {e}")
+        print(f"❌ Failed to read project list: {e}")
         sys.exit(1)
 
-    # 并行构建
+    # Parallel build
     with Pool(args.workers) as pool:
         results = pool.starmap(build_project, [(p, args.sanitizer) for p in projects])
 
-    # 输出结果
+    # Output results
     failed = [p for success, p in results if not success]
-    print(f"\n📊 构建完成: 成功 {len(projects)-len(failed)}/{len(projects)}")
+    print(f"\n📊 Build completed: Success {len(projects)-len(failed)}/{len(projects)}")
     if failed:
-        print("❌ 失败项目: " + ", ".join(failed))
+        print("❌ Failed projects: " + ", ".join(failed))
 
 if __name__ == "__main__":
     main()
\ No newline at end of file
diff --git a/fuzz/fuzz_runner_pool.py b/fuzz/fuzz_runner_pool.py
index 7c40d61..996b940 100644
--- a/fuzz/fuzz_runner_pool.py
+++ b/fuzz/fuzz_runner_pool.py
@@ -4,11 +4,11 @@
 """
 fuzz_runner_pool.py
 
-并行批量执行 OSS-Fuzz 本地测试全流程。使用 multiprocessing.Pool 将项目
-分发到多个 CPU核心上同时处理。
+Parallel batch execution of the entire OSS-Fuzz local testing process. 
+Uses multiprocessing.Pool to distribute projects across multiple CPU cores for concurrent processing.
 
-用法: python3 fuzz_runner_pool.py [项目列表文件] [--sanitizer 类型] [--workers N]
-示例: python3 fuzz/fuzz_runner_pool.py data/valid_projects.txt --workers 4
+Usage: python3 fuzz_runner_pool.py [project_list_file] [--sanitizer type] [--workers N]
+Example: python3 fuzz/fuzz_runner_pool.py data/valid_projects.txt --workers 4
 """
 
 import os
@@ -20,18 +20,18 @@
 from typing import List, Optional, Tuple
 from multiprocessing import Pool, cpu_count
 
-# --- 全局配置 (可通过命令行参数覆盖) ---
+# --- Global configuration (can be overridden by command line arguments) ---
 HOME_DIR = Path.home()
 OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz"/"oss-fuzz"
 LOG_DIR = OSS_FUZZ_DIR / "script_pool_batch_logs"
 
 def setup_logging(project_name: str) -> Path:
-    """为单个项目创建带时间戳的日志文件."""
+    """Create a timestamped log file for a single project."""
     LOG_DIR.mkdir(parents=True, exist_ok=True)
     try:
         LOG_DIR.chmod(0o777)
     except PermissionError:
-        # 在并行环境中，这里可能会有多个进程同时尝试，打印一次警告即可
+        # In a parallel environment, multiple processes may try simultaneously, printing a warning once is sufficient
         pass
         
     timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
@@ -39,9 +39,9 @@ def setup_logging(project_name: str) -> Path:
     return log_file_path
 
 def log_and_print(message: str, log_file: Path, to_stdout: bool = True):
-    """将消息写入日志文件，并根据需要打印到控制台。"""
+    """Write message to log file and optionally print to console."""
     if to_stdout:
-        # 添加进程ID，以便区分并行输出
+        # Add process ID to distinguish parallel outputs
         print(f"[PID:{os.getpid()}] {message}")
     with open(log_file, "a", encoding="utf-8") as f:
         f.write(message + "\n")
@@ -51,9 +51,9 @@ def run_command(
     log_msg: str, 
     log_file: Path, 
     allowed_exit_codes: Optional[List[int]] = None,
-    auto_confirm: bool = True  # 新增自动确认参数
+    auto_confirm: bool = True  # New auto-confirm parameter
 ) -> bool:
-    """执行一个 shell 命令，并将输出实时流式传输到日志文件。"""
+    """Execute a shell command and stream output to log file in real-time."""
     if allowed_exit_codes is None:
         allowed_exit_codes = []
 
@@ -61,7 +61,7 @@ def run_command(
     log_and_print(f"   $ {cmd}", log_file, to_stdout=False)
 
     try:
-        # 添加自动确认机制
+        # Add auto-confirm mechanism
         if auto_confirm:
             cmd = f"yes | {cmd}"
 
@@ -72,26 +72,26 @@ def run_command(
         
         with open(log_file, "a", encoding="utf-8") as f:
             for line in iter(process.stdout.readline, ''):
-                f.write(line) # 只写入日志，避免控制台输出混乱
+                f.write(line) # Write to log only to avoid console clutter
 
         process.wait()
         exit_code = process.returncode
 
         if exit_code == 0:
-            log_and_print(f"✅ 命令成功完成。", log_file, to_stdout=False)
+            log_and_print(f"✅ Command completed successfully.", log_file, to_stdout=False)
             return True
         elif exit_code in allowed_exit_codes:
-            log_and_print(f"ℹ️  命令以预期状态退出: {exit_code}", log_file, to_stdout=False)
+            log_and_print(f"ℹ️  Command exited with expected status: {exit_code}", log_file, to_stdout=False)
             return True
         else:
-            log_and_print(f"❌ 命令执行失败 (退出码: {exit_code})", log_file)
+            log_and_print(f"❌ Command execution failed (exit code: {exit_code})", log_file)
             return False
     except Exception as e:
-        log_and_print(f"💥 执行命令时发生异常: {e}", log_file)
+        log_and_print(f"💥 Exception occurred while executing command: {e}", log_file)
         return False
 
 def discover_fuzz_targets(project_name: str) -> List[str]:
-    """自动发现 Fuzz 目标。"""
+    """Automatically discover Fuzz targets."""
     project_out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name
     project_src_dir = OSS_FUZZ_DIR / "projects" / project_name
     targets = []
@@ -109,162 +109,162 @@ def discover_fuzz_targets(project_name: str) -> List[str]:
                     if "atheris.Setup" in f.read():
                         targets.append(py_file.stem)
             except Exception as e:
-                # 在worker进程中，只打印到自己的日志
-                print(f"⚠️  警告: 读取文件 {py_file} 失败: {e}")
+                # In worker processes, only print to own log
+                print(f"⚠️  Warning: Failed to read file {py_file}: {e}")
     return targets
 
 def run_project_workflow(project_name: str, sanitizer: str) -> Tuple[bool, str]:
     """
-    处理单个项目的完整工作流 (Worker Function)。
-    此函数由进程池中的每个工作进程独立执行。
+    Handle the complete workflow for a single project (Worker Function).
+    This function is executed independently by each worker process in the process pool.
     
     Returns:
-        一个元组 (is_success: bool, project_name: str)
+        A tuple (is_success: bool, project_name: str)
     """
     log_file = setup_logging(project_name)
     
-    # 在 worker 的开头打印，以便追踪
+    # Print at the beginning of worker for tracking
     log_and_print("=" * 60, log_file)
-    log_and_print(f"🚀 开始处理项目: {project_name}", log_file)
-    log_and_print(f"📝 日志文件: {log_file}", log_file)
+    log_and_print(f"🚀 Starting processing for project: {project_name}", log_file)
+    log_and_print(f"📝 Log file: {log_file}", log_file)
     log_and_print("=" * 60, log_file)
     
-    # 每个进程都需要设置自己的工作目录
+    # Each process needs to set its own working directory
     try:
         os.chdir(OSS_FUZZ_DIR)
     except FileNotFoundError:
-        log_and_print(f"❌ 严重错误: OSS-Fuzz 目录 '{OSS_FUZZ_DIR}' 不存在！", log_file)
+        log_and_print(f"❌ Critical error: OSS-Fuzz directory '{OSS_FUZZ_DIR}' does not exist!", log_file)
         return (False, project_name)
 
-    # 步骤 1: 构建Docker镜像（启用自动确认）
+    # Step 1: Build Docker image (with auto-confirm enabled)
     if not run_command(
         f"python3 infra/helper.py build_image {project_name}",
-        f"步骤1/5: 构建 {project_name} 的Docker镜像", log_file,
-        auto_confirm=True  # 自动确认所有提示
+        f"Step 1/5: Building Docker image for {project_name}", log_file,
+        auto_confirm=True  # Auto-confirm all prompts
     ):
-        log_and_print(f"❌ 项目 {project_name} 构建镜像失败", log_file)
+        log_and_print(f"❌ Project {project_name} failed to build image", log_file)
         return (False, project_name)
 
-    # 步骤 2: 编译带检测器的fuzzer（启用自动确认）
+    # Step 2: Compile fuzzers with sanitizer (with auto-confirm enabled)
     if not run_command(
         f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}",
-        f"步骤2/5: 编译 {project_name} 的fuzzer (sanitizer={sanitizer})", log_file,
-        auto_confirm=True  # 自动确认所有提示
+        f"Step 2/5: Compiling fuzzers for {project_name} (sanitizer={sanitizer})", log_file,
+        auto_confirm=True  # Auto-confirm all prompts
     ):
-        log_and_print(f"❌ 项目 {project_name} 编译fuzzer失败", log_file)
+        log_and_print(f"❌ Project {project_name} failed to compile fuzzers", log_file)
         return (False, project_name)
 
-    # 步骤 3: 自动发现目标
-    log_and_print(f"🔍 正在为 {project_name} 自动发现fuzz目标...", log_file)
+    # Step 3: Automatically discover targets
+    log_and_print(f"🔍 Automatically discovering fuzz targets for {project_name}...", log_file)
     fuzz_targets = discover_fuzz_targets(project_name)
 
     if not fuzz_targets:
-        log_and_print(f"⚠️  警告: {project_name} 未找到任何fuzz目标！跳过运行步骤。", log_file)
+        log_and_print(f"⚠️  Warning: No fuzz targets found for {project_name}! Skipping run step.", log_file)
         return (True, project_name)
     
-    log_and_print(f"✅ 发现目标: {', '.join(fuzz_targets)}", log_file)
+    log_and_print(f"✅ Targets discovered: {', '.join(fuzz_targets)}", log_file)
 
-    # 步骤 4: 遍历运行所有目标（启用自动确认）
+    # Step 4: Run all targets (with auto-confirm enabled)
     for i, target in enumerate(fuzz_targets, 1):
         run_command(
             f"python3 infra/helper.py run_fuzzer {project_name} {target} -- -max_total_time=60",
-            f"步骤4/{len(fuzz_targets)}: 运行目标 [{target}] (60秒)", log_file,
+            f"Step 4/{len(fuzz_targets)}: Running target [{target}] (60 seconds)", log_file,
             allowed_exit_codes=[1, 124],
-            auto_confirm=True  # 自动确认所有提示
+            auto_confirm=True  # Auto-confirm all prompts
         )
 
-    # 步骤 5: 生成覆盖率报告 (暂无)
-    log_and_print("步骤5/5: 生成覆盖率报告 (当前版本暂未实现)", log_file)
-    log_and_print(f"✅ 项目 {project_name} 处理完成！", log_file)
+    # Step 5: Generate coverage report (not implemented yet)
+    log_and_print("Step 5/5: Generating coverage report (not implemented in current version)", log_file)
+    log_and_print(f"✅ Project {project_name} processing completed!", log_file)
     return (True, project_name)
 
 def main():
     """
-    主流程函数：设置进程池并分发任务。
+    Main workflow function: Set up process pool and distribute tasks.
     """
     parser = argparse.ArgumentParser(
-        description="OSS-Fuzz 并行批量测试工具",
+        description="OSS-Fuzz Parallel Batch Testing Tool",
         formatter_class=argparse.RawTextHelpFormatter,
-        epilog="示例:\n  python3 fuzz_runner_parallel.py valid_projects.txt --workers 4\n  python3 fuzz_runner_parallel.py my_projects.txt --sanitizer undefined"
+        epilog="Examples:\n  python3 fuzz_runner_parallel.py valid_projects.txt --workers 4\n  python3 fuzz_runner_parallel.py my_projects.txt --sanitizer undefined"
     )
     parser.add_argument(
         "project_list_file", nargs="?", default="valid_projects.txt",
-        help="包含待测试项目列表的文本文件。(默认: valid_projects.txt)"
+        help="Text file containing list of projects to test. (Default: valid_projects.txt)"
     )
     parser.add_argument(
         "--sanitizer", default="address", choices=["address", "memory", "undefined", "coverage"],
-        help="要使用的 sanitizer 类型。(默认: address)"
+        help="Type of sanitizer to use. (Default: address)"
     )
     parser.add_argument(
         "--workers", type=int, default=cpu_count(),
-        help=f"并发执行的工作进程数。(默认: 系统CPU核心数, 即 {cpu_count()})"
+        help=f"Number of concurrent worker processes. (Default: system CPU count, currently {cpu_count()})"
     )
     args = parser.parse_args()
     
-    # --- 环境检查 ---
+    # --- Environment checks ---
     if not OSS_FUZZ_DIR.is_dir():
-        print(f"❌ 错误: OSS-Fuzz 目录 '{OSS_FUZZ_DIR}' 不存在！")
+        print(f"❌ Error: OSS-Fuzz directory '{OSS_FUZZ_DIR}' does not exist!")
         sys.exit(1)
     
     project_list_path = Path(args.project_list_file)
     if not project_list_path.is_file():
-        print(f"❌ 错误: 项目列表文件 '{project_list_path}' 不存在！")
+        print(f"❌ Error: Project list file '{project_list_path}' does not exist!")
         sys.exit(1)
         
-    print(f"✅ 环境检查通过。将使用 {args.workers} 个并行工作进程。")
+    print(f"✅ Environment checks passed. Will use {args.workers} parallel worker processes.")
 
-    # --- 读取和准备任务 ---
+    # --- Read and prepare tasks ---
     try:
         with open(project_list_path, "r", encoding="utf-8") as f:
             projects = [line.strip() for line in f if line.strip() and not line.strip().startswith("#")]
     except Exception as e:
-        print(f"❌ 读取项目列表文件时出错: {e}")
+        print(f"❌ Error reading project list file: {e}")
         sys.exit(1)
         
     if not projects:
-        print("⚠️  警告: 项目列表为空，无可执行任务。")
+        print("⚠️  Warning: Project list is empty, no tasks to execute.")
         sys.exit(0)
 
-    # 为 starmap 准备任务参数列表，每个项目都是一个 (project_name, sanitizer) 元组
+    # Prepare task parameters for starmap, each project is a (project_name, sanitizer) tuple
     tasks = [(project, args.sanitizer) for project in projects]
     total_projects = len(tasks)
     
-    print(f"\n🚀 即将并行处理 {total_projects} 个项目...")
+    print(f"\n🚀 About to process {total_projects} projects in parallel...")
 
-    # --- 执行并行处理 ---
-    # 使用 with 语句确保进程池被正确关闭
+    # --- Execute parallel processing ---
+    # Use with statement to ensure proper pool cleanup
     with Pool(processes=args.workers) as pool:
-        # starmap 会阻塞直到所有任务完成
-        # 它将 tasks 列表中的每个元组解包作为参数传递给 worker 函数
+        # starmap blocks until all tasks complete
+        # Unpacks each tuple in tasks as arguments to worker function
         results = pool.starmap(run_project_workflow, tasks)
 
-    # --- 收集并打印结果 ---
+    # --- Collect and print results ---
     failed_projects = []
     for success, project_name in results:
         if success:
-            print(f"✅ 项目 {project_name} 成功完成")
+            print(f"✅ Project {project_name} completed successfully")
         else:
-            print(f"❌ 项目 {project_name} 处理失败")
+            print(f"❌ Project {project_name} processing failed")
             failed_projects.append(project_name)
 
-    # --- 最终总结 ---
+    # --- Final summary ---
     fail_count = len(failed_projects)
     success_count = total_projects - fail_count
     
     print("\n" + "=" * 60)
-    print("🎉 批量处理完成！")
-    print(f"📊 总计: {total_projects} 个项目")
-    print(f"✅ 成功: {success_count}")
-    print(f"❌ 失败: {fail_count}")
+    print("🎉 Batch processing completed!")
+    print(f"📊 Total: {total_projects} projects")
+    print(f"✅ Success: {success_count}")
+    print(f"❌ Failures: {fail_count}")
 
     if failed_projects:
-        print("📛 失败项目列表:")
+        print("📛 Failed projects list:")
         for proj in sorted(failed_projects):
             print(f"  • {proj}")
-        print("\n💡 提示: 失败项目的详细信息请查看对应的日志文件。")
-        print(f"   日志目录: {LOG_DIR}")
+        print("\n💡 Tip: Detailed information for failed projects can be found in corresponding log files.")
+        print(f"   Log directory: {LOG_DIR}")
 
 if __name__ == "__main__":
-    # 在 Windows 或 macOS 的某些 Python 版本上，需要将 main 调用放在这个保护块中
-    # 以防止子进程重新导入和执行主模块代码，导致无限递归。
-    main()
\ No newline at end of file
+    # On some Python versions for Windows or macOS, main call needs to be in this guard
+    # To prevent child processes from re-importing and executing main module code, causing infinite recursion.
+    main()
diff --git a/fuzz/run_fuzz_target.py b/fuzz/run_fuzz_target.py
index 9765dbb..5b43a9e 100644
--- a/fuzz/run_fuzz_target.py
+++ b/fuzz/run_fuzz_target.py
@@ -4,11 +4,11 @@
 """
 run_fuzz_target.py
 
-并行运行 OSS-Fuzz 测试目标。
-使用 multiprocessing.Pool 分发任务到多个 CPU 核心。
+Run OSS-Fuzz test targets in parallel.
+Uses multiprocessing.Pool to distribute tasks to multiple CPU cores.
 
-用法: python3 run_fuzz_target.py [项目列表文件] [--timeout 秒] [--workers N]
-示例: python3 fuzz/run_fuzz_target.py data/valid_projects.txt --timeout 60 --workers 4
+Usage: python3 run_fuzz_target.py [project_list_file] [--timeout seconds] [--workers N]
+Example: python3 fuzz/run_fuzz_target.py data/valid_projects.txt --timeout 60 --workers 4
 """
 
 import os
@@ -20,19 +20,19 @@
 from typing import List, Optional, Tuple
 from multiprocessing import Pool, cpu_count
 
-# --- 全局配置 ---
+# --- Global configuration ---
 HOME_DIR = Path.home()
 OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz"
 LOG_DIR = OSS_FUZZ_DIR / "run_logs"
 
 def setup_logging(project_name: str) -> Path:
-    """创建带时间戳的运行日志"""
+    """Create a timestamped run log"""
     LOG_DIR.mkdir(parents=True, exist_ok=True)
     timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
     return LOG_DIR / f"run_{project_name}_{timestamp}.log"
 
 def log_and_print(message: str, log_file: Path, to_stdout: bool = True):
-    """日志和控制台输出"""
+    """Log and console output"""
     if to_stdout:
         print(f"[PID:{os.getpid()}] {message}")
     with open(log_file, "a", encoding="utf-8") as f:
@@ -44,7 +44,7 @@ def run_command(
     log_file: Path,
     allowed_exit_codes: Optional[List[int]] = None
 ) -> bool:
-    """执行命令并实时记录输出"""
+    """Execute command and log output in real-time"""
     allowed_exit_codes = allowed_exit_codes or []
     log_and_print(f"▶️ {log_msg}...", log_file, to_stdout=False)
     log_and_print(f"   $ {cmd}", log_file, to_stdout=False)
@@ -65,11 +65,11 @@ def run_command(
         process.wait()
         return process.returncode in [0, *allowed_exit_codes]
     except Exception as e:
-        log_and_print(f"💥 执行异常: {e}", log_file)
+        log_and_print(f"💥 Execution exception: {e}", log_file)
         return False
 
 def discover_targets(project_name: str) -> List[str]:
-    """发现可用的 Fuzz 目标"""
+    """Discover available Fuzz targets"""
     out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name
     targets = []
     if out_dir.exists():
@@ -79,62 +79,62 @@ def discover_targets(project_name: str) -> List[str]:
     return targets
 
 def run_project(project_name: str, timeout: int) -> Tuple[bool, str]:
-    """单个项目的测试工作流"""
+    """Testing workflow for a single project"""
     log_file = setup_logging(project_name)
     os.chdir(OSS_FUZZ_DIR)
 
     log_and_print("="*60, log_file)
-    log_and_print(f"🚀 开始测试项目: {project_name}", log_file)
-    log_and_print(f"📝 日志路径: {log_file}", log_file)
+    log_and_print(f"🚀 Starting testing for project: {project_name}", log_file)
+    log_and_print(f"📝 Log path: {log_file}", log_file)
     log_and_print("="*60, log_file)
 
-    # 1. 发现测试目标
+    # 1. Discover test targets
     targets = discover_targets(project_name)
     if not targets:
-        log_and_print("⚠️ 未发现测试目标", log_file)
+        log_and_print("⚠️ No test targets found", log_file)
         return (False, project_name)
-    log_and_print(f"🔍 发现 {len(targets)} 个测试目标", log_file)
+    log_and_print(f"🔍 Discovered {len(targets)} test targets", log_file)
 
-    # 2. 运行所有目标
+    # 2. Run all targets
     all_success = True
     for i, target in enumerate(targets, 1):
         cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target} -- -max_total_time={timeout}"
         success = run_command(
             cmd,
-            f"运行目标 [{i}/{len(targets)}] {target} (超时={timeout}s)",
+            f"Running target [{i}/{len(targets)}] {target} (timeout={timeout}s)",
             log_file,
-            allowed_exit_codes=[1, 124]  # 允许超时退出
+            allowed_exit_codes=[1, 124]  # Allow timeout exit codes
         )
         all_success &= success
 
-    # 3. 生成报告（占位）
-    log_and_print("📊 覆盖率报告生成 (当前版本暂未实现)", log_file)
+    # 3. Generate report (placeholder)
+    log_and_print("📊 Coverage report generation (not implemented in current version)", log_file)
     return (all_success, project_name)
 
 def main():
-    parser = argparse.ArgumentParser(description="OSS-Fuzz 并行测试工具")
-    parser.add_argument("project_list", help="项目列表文件路径")
-    parser.add_argument("--timeout", type=int, default=60, help="单目标测试超时时间（秒）")
+    parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Testing Tool")
+    parser.add_argument("project_list", help="Project list file path")
+    parser.add_argument("--timeout", type=int, default=60, help="Timeout per target test (seconds)")
     parser.add_argument("--workers", type=int, default=cpu_count())
     args = parser.parse_args()
 
-    # 读取项目列表
+    # Read project list
     try:
         with open(args.project_list) as f:
             projects = [line.strip() for line in f if line.strip()]
     except Exception as e:
-        print(f"❌ 读取项目列表失败: {e}")
+        print(f"❌ Failed to read project list: {e}")
         sys.exit(1)
 
-    # 并行运行
+    # Parallel execution
     with Pool(args.workers) as pool:
         results = pool.starmap(run_project, [(p, args.timeout) for p in projects])
 
-    # 输出结果
+    # Output results
     failed = [p for success, p in results if not success]
-    print(f"\n📊 测试完成: 成功 {len(projects)-len(failed)}/{len(projects)}")
+    print(f"\n📊 Testing completed: Success {len(projects)-len(failed)}/{len(projects)}")
     if failed:
-        print("❌ 失败项目: " + ", ".join(failed))
+        print("❌ Failed projects: " + ", ".join(failed))
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()

From bb5f14aa0f1c8956f2ab19d9e2bacb0a0a59d2d6 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 22 Jul 2025 03:15:29 +0000
Subject: [PATCH 012/134] fuzz_runner_pool.py:74

---
 fuzz/fuzz_runner_pool.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fuzz/fuzz_runner_pool.py b/fuzz/fuzz_runner_pool.py
index 996b940..3580930 100644
--- a/fuzz/fuzz_runner_pool.py
+++ b/fuzz/fuzz_runner_pool.py
@@ -71,8 +71,9 @@ def run_command(
         )
         
         with open(log_file, "a", encoding="utf-8") as f:
-            for line in iter(process.stdout.readline, ''):
-                f.write(line) # Write to log only to avoid console clutter
+            if process.stdout is not None:  # 显式检查
+               for line in iter(process.stdout.readline, ""):
+                    f.write(line) # Write to log only to avoid console clutter
 
         process.wait()
         exit_code = process.returncode

From 1b9b0101071faf8b66a06051adfdb16d68644e44 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 22 Jul 2025 03:19:18 +0000
Subject: [PATCH 013/134] edit stdout

---
 fuzz/fuzz_runner_pool.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fuzz/fuzz_runner_pool.py b/fuzz/fuzz_runner_pool.py
index 3580930..6a2e4dd 100644
--- a/fuzz/fuzz_runner_pool.py
+++ b/fuzz/fuzz_runner_pool.py
@@ -49,7 +49,7 @@ def log_and_print(message: str, log_file: Path, to_stdout: bool = True):
 def run_command(
     cmd: str, 
     log_msg: str, 
-    log_file: Path, 
+    log_file: TextIO , 
     allowed_exit_codes: Optional[List[int]] = None,
     auto_confirm: bool = True  # New auto-confirm parameter
 ) -> bool:

From 49e9dddf1d45fc0ddd240197b92aa6e910b98d67 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 22 Jul 2025 03:23:41 +0000
Subject: [PATCH 014/134] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E7=A9=BA=E5=80=BC?=
 =?UTF-8?q?=E6=A3=80=E6=9F=A5?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 fuzz/build_oss_fuzz.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fuzz/build_oss_fuzz.py b/fuzz/build_oss_fuzz.py
index 1c78a0d..fdd894e 100644
--- a/fuzz/build_oss_fuzz.py
+++ b/fuzz/build_oss_fuzz.py
@@ -62,8 +62,11 @@ def run_command(
             errors="replace"
         )
         with open(log_file, "a", encoding="utf-8") as f:
-            for line in iter(process.stdout.readline, ""):
-                f.write(line)
+            if process.stdout is not None:
+                for line in iter(process.stdout.readline, ""):
+                    f.write(line)
+            else:
+                log_and_print("⚠️ Warning: process.stdout is None", log_file)
         process.wait()
         exit_code = process.returncode
         if exit_code in [0, *allowed_exit_codes]:

From 6e5221dc52b5234b59e2fe42665e92347071a815 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 22 Jul 2025 03:27:29 +0000
Subject: [PATCH 015/134] modify stdout, delete pool.py

---
 fuzz/fuzz_runner_pool.py | 271 ---------------------------------------
 fuzz/run_fuzz_target.py  |   7 +-
 2 files changed, 5 insertions(+), 273 deletions(-)
 delete mode 100644 fuzz/fuzz_runner_pool.py

diff --git a/fuzz/fuzz_runner_pool.py b/fuzz/fuzz_runner_pool.py
deleted file mode 100644
index 6a2e4dd..0000000
--- a/fuzz/fuzz_runner_pool.py
+++ /dev/null
@@ -1,271 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-"""
-fuzz_runner_pool.py
-
-Parallel batch execution of the entire OSS-Fuzz local testing process. 
-Uses multiprocessing.Pool to distribute projects across multiple CPU cores for concurrent processing.
-
-Usage: python3 fuzz_runner_pool.py [project_list_file] [--sanitizer type] [--workers N]
-Example: python3 fuzz/fuzz_runner_pool.py data/valid_projects.txt --workers 4
-"""
-
-import os
-import sys
-import subprocess
-import argparse
-from datetime import datetime
-from pathlib import Path
-from typing import List, Optional, Tuple
-from multiprocessing import Pool, cpu_count
-
-# --- Global configuration (can be overridden by command line arguments) ---
-HOME_DIR = Path.home()
-OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz"/"oss-fuzz"
-LOG_DIR = OSS_FUZZ_DIR / "script_pool_batch_logs"
-
-def setup_logging(project_name: str) -> Path:
-    """Create a timestamped log file for a single project."""
-    LOG_DIR.mkdir(parents=True, exist_ok=True)
-    try:
-        LOG_DIR.chmod(0o777)
-    except PermissionError:
-        # In a parallel environment, multiple processes may try simultaneously, printing a warning once is sufficient
-        pass
-        
-    timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
-    log_file_path = LOG_DIR / f"oss_fuzz_{project_name}_{timestamp}.log"
-    return log_file_path
-
-def log_and_print(message: str, log_file: Path, to_stdout: bool = True):
-    """Write message to log file and optionally print to console."""
-    if to_stdout:
-        # Add process ID to distinguish parallel outputs
-        print(f"[PID:{os.getpid()}] {message}")
-    with open(log_file, "a", encoding="utf-8") as f:
-        f.write(message + "\n")
-
-def run_command(
-    cmd: str, 
-    log_msg: str, 
-    log_file: TextIO , 
-    allowed_exit_codes: Optional[List[int]] = None,
-    auto_confirm: bool = True  # New auto-confirm parameter
-) -> bool:
-    """Execute a shell command and stream output to log file in real-time."""
-    if allowed_exit_codes is None:
-        allowed_exit_codes = []
-
-    log_and_print(f"▶️  {log_msg}...", log_file, to_stdout=False)
-    log_and_print(f"   $ {cmd}", log_file, to_stdout=False)
-
-    try:
-        # Add auto-confirm mechanism
-        if auto_confirm:
-            cmd = f"yes | {cmd}"
-
-        process = subprocess.Popen(
-            cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
-            text=True, encoding='utf-8', errors='replace', bufsize=1
-        )
-        
-        with open(log_file, "a", encoding="utf-8") as f:
-            if process.stdout is not None:  # 显式检查
-               for line in iter(process.stdout.readline, ""):
-                    f.write(line) # Write to log only to avoid console clutter
-
-        process.wait()
-        exit_code = process.returncode
-
-        if exit_code == 0:
-            log_and_print(f"✅ Command completed successfully.", log_file, to_stdout=False)
-            return True
-        elif exit_code in allowed_exit_codes:
-            log_and_print(f"ℹ️  Command exited with expected status: {exit_code}", log_file, to_stdout=False)
-            return True
-        else:
-            log_and_print(f"❌ Command execution failed (exit code: {exit_code})", log_file)
-            return False
-    except Exception as e:
-        log_and_print(f"💥 Exception occurred while executing command: {e}", log_file)
-        return False
-
-def discover_fuzz_targets(project_name: str) -> List[str]:
-    """Automatically discover Fuzz targets."""
-    project_out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name
-    project_src_dir = OSS_FUZZ_DIR / "projects" / project_name
-    targets = []
-
-    if project_out_dir.is_dir():
-        for f in project_out_dir.iterdir():
-            if (f.is_file() and os.access(f, os.X_OK) and 
-                f.name.startswith("fuzz_") and '.' not in f.name):
-                targets.append(f.name)
-
-    if not targets and project_src_dir.is_dir():
-        for py_file in project_src_dir.glob("fuzz_*.py"):
-            try:
-                with open(py_file, "r", encoding="utf-8") as f:
-                    if "atheris.Setup" in f.read():
-                        targets.append(py_file.stem)
-            except Exception as e:
-                # In worker processes, only print to own log
-                print(f"⚠️  Warning: Failed to read file {py_file}: {e}")
-    return targets
-
-def run_project_workflow(project_name: str, sanitizer: str) -> Tuple[bool, str]:
-    """
-    Handle the complete workflow for a single project (Worker Function).
-    This function is executed independently by each worker process in the process pool.
-    
-    Returns:
-        A tuple (is_success: bool, project_name: str)
-    """
-    log_file = setup_logging(project_name)
-    
-    # Print at the beginning of worker for tracking
-    log_and_print("=" * 60, log_file)
-    log_and_print(f"🚀 Starting processing for project: {project_name}", log_file)
-    log_and_print(f"📝 Log file: {log_file}", log_file)
-    log_and_print("=" * 60, log_file)
-    
-    # Each process needs to set its own working directory
-    try:
-        os.chdir(OSS_FUZZ_DIR)
-    except FileNotFoundError:
-        log_and_print(f"❌ Critical error: OSS-Fuzz directory '{OSS_FUZZ_DIR}' does not exist!", log_file)
-        return (False, project_name)
-
-    # Step 1: Build Docker image (with auto-confirm enabled)
-    if not run_command(
-        f"python3 infra/helper.py build_image {project_name}",
-        f"Step 1/5: Building Docker image for {project_name}", log_file,
-        auto_confirm=True  # Auto-confirm all prompts
-    ):
-        log_and_print(f"❌ Project {project_name} failed to build image", log_file)
-        return (False, project_name)
-
-    # Step 2: Compile fuzzers with sanitizer (with auto-confirm enabled)
-    if not run_command(
-        f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}",
-        f"Step 2/5: Compiling fuzzers for {project_name} (sanitizer={sanitizer})", log_file,
-        auto_confirm=True  # Auto-confirm all prompts
-    ):
-        log_and_print(f"❌ Project {project_name} failed to compile fuzzers", log_file)
-        return (False, project_name)
-
-    # Step 3: Automatically discover targets
-    log_and_print(f"🔍 Automatically discovering fuzz targets for {project_name}...", log_file)
-    fuzz_targets = discover_fuzz_targets(project_name)
-
-    if not fuzz_targets:
-        log_and_print(f"⚠️  Warning: No fuzz targets found for {project_name}! Skipping run step.", log_file)
-        return (True, project_name)
-    
-    log_and_print(f"✅ Targets discovered: {', '.join(fuzz_targets)}", log_file)
-
-    # Step 4: Run all targets (with auto-confirm enabled)
-    for i, target in enumerate(fuzz_targets, 1):
-        run_command(
-            f"python3 infra/helper.py run_fuzzer {project_name} {target} -- -max_total_time=60",
-            f"Step 4/{len(fuzz_targets)}: Running target [{target}] (60 seconds)", log_file,
-            allowed_exit_codes=[1, 124],
-            auto_confirm=True  # Auto-confirm all prompts
-        )
-
-    # Step 5: Generate coverage report (not implemented yet)
-    log_and_print("Step 5/5: Generating coverage report (not implemented in current version)", log_file)
-    log_and_print(f"✅ Project {project_name} processing completed!", log_file)
-    return (True, project_name)
-
-def main():
-    """
-    Main workflow function: Set up process pool and distribute tasks.
-    """
-    parser = argparse.ArgumentParser(
-        description="OSS-Fuzz Parallel Batch Testing Tool",
-        formatter_class=argparse.RawTextHelpFormatter,
-        epilog="Examples:\n  python3 fuzz_runner_parallel.py valid_projects.txt --workers 4\n  python3 fuzz_runner_parallel.py my_projects.txt --sanitizer undefined"
-    )
-    parser.add_argument(
-        "project_list_file", nargs="?", default="valid_projects.txt",
-        help="Text file containing list of projects to test. (Default: valid_projects.txt)"
-    )
-    parser.add_argument(
-        "--sanitizer", default="address", choices=["address", "memory", "undefined", "coverage"],
-        help="Type of sanitizer to use. (Default: address)"
-    )
-    parser.add_argument(
-        "--workers", type=int, default=cpu_count(),
-        help=f"Number of concurrent worker processes. (Default: system CPU count, currently {cpu_count()})"
-    )
-    args = parser.parse_args()
-    
-    # --- Environment checks ---
-    if not OSS_FUZZ_DIR.is_dir():
-        print(f"❌ Error: OSS-Fuzz directory '{OSS_FUZZ_DIR}' does not exist!")
-        sys.exit(1)
-    
-    project_list_path = Path(args.project_list_file)
-    if not project_list_path.is_file():
-        print(f"❌ Error: Project list file '{project_list_path}' does not exist!")
-        sys.exit(1)
-        
-    print(f"✅ Environment checks passed. Will use {args.workers} parallel worker processes.")
-
-    # --- Read and prepare tasks ---
-    try:
-        with open(project_list_path, "r", encoding="utf-8") as f:
-            projects = [line.strip() for line in f if line.strip() and not line.strip().startswith("#")]
-    except Exception as e:
-        print(f"❌ Error reading project list file: {e}")
-        sys.exit(1)
-        
-    if not projects:
-        print("⚠️  Warning: Project list is empty, no tasks to execute.")
-        sys.exit(0)
-
-    # Prepare task parameters for starmap, each project is a (project_name, sanitizer) tuple
-    tasks = [(project, args.sanitizer) for project in projects]
-    total_projects = len(tasks)
-    
-    print(f"\n🚀 About to process {total_projects} projects in parallel...")
-
-    # --- Execute parallel processing ---
-    # Use with statement to ensure proper pool cleanup
-    with Pool(processes=args.workers) as pool:
-        # starmap blocks until all tasks complete
-        # Unpacks each tuple in tasks as arguments to worker function
-        results = pool.starmap(run_project_workflow, tasks)
-
-    # --- Collect and print results ---
-    failed_projects = []
-    for success, project_name in results:
-        if success:
-            print(f"✅ Project {project_name} completed successfully")
-        else:
-            print(f"❌ Project {project_name} processing failed")
-            failed_projects.append(project_name)
-
-    # --- Final summary ---
-    fail_count = len(failed_projects)
-    success_count = total_projects - fail_count
-    
-    print("\n" + "=" * 60)
-    print("🎉 Batch processing completed!")
-    print(f"📊 Total: {total_projects} projects")
-    print(f"✅ Success: {success_count}")
-    print(f"❌ Failures: {fail_count}")
-
-    if failed_projects:
-        print("📛 Failed projects list:")
-        for proj in sorted(failed_projects):
-            print(f"  • {proj}")
-        print("\n💡 Tip: Detailed information for failed projects can be found in corresponding log files.")
-        print(f"   Log directory: {LOG_DIR}")
-
-if __name__ == "__main__":
-    # On some Python versions for Windows or macOS, main call needs to be in this guard
-    # To prevent child processes from re-importing and executing main module code, causing infinite recursion.
-    main()
diff --git a/fuzz/run_fuzz_target.py b/fuzz/run_fuzz_target.py
index 5b43a9e..aa6bafb 100644
--- a/fuzz/run_fuzz_target.py
+++ b/fuzz/run_fuzz_target.py
@@ -60,8 +60,11 @@ def run_command(
             errors="replace"
         )
         with open(log_file, "a", encoding="utf-8") as f:
-            for line in iter(process.stdout.readline, ""):
-                f.write(line)
+           if process.stdout is not None:
+                for line in iter(process.stdout.readline, ""):
+                    f.write(line)
+            else:
+                log_and_print("⚠️ Warning: process.stdout is None", log_file)
         process.wait()
         return process.returncode in [0, *allowed_exit_codes]
     except Exception as e:

From 4a5befabedf3cc425b7bc78fbb0d07a51ba62c1e Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 22 Jul 2025 03:32:03 +0000
Subject: [PATCH 016/134] indentation level check

---
 fuzz/oss-fuzz           | 2 +-
 fuzz/run_fuzz_target.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fuzz/oss-fuzz b/fuzz/oss-fuzz
index 0b81ba5..f73b405 160000
--- a/fuzz/oss-fuzz
+++ b/fuzz/oss-fuzz
@@ -1 +1 @@
-Subproject commit 0b81ba5d97ae3d1402744e00b1d9075fed7b7f1e
+Subproject commit f73b405d84e886bac90f8b15200230f08a2709c9
diff --git a/fuzz/run_fuzz_target.py b/fuzz/run_fuzz_target.py
index aa6bafb..4a9f15a 100644
--- a/fuzz/run_fuzz_target.py
+++ b/fuzz/run_fuzz_target.py
@@ -60,7 +60,7 @@ def run_command(
             errors="replace"
         )
         with open(log_file, "a", encoding="utf-8") as f:
-           if process.stdout is not None:
+            if process.stdout is not None:
                 for line in iter(process.stdout.readline, ""):
                     f.write(line)
             else:

From 7e6c1a2dd4d100ef8e3d65be015ce36e2f33e0a6 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 22 Jul 2025 17:59:07 +0000
Subject: [PATCH 017/134] Remove build log write files

---
 fuzz/build_oss_fuzz.py | 92 +++++++++++-------------------------------
 1 file changed, 24 insertions(+), 68 deletions(-)

diff --git a/fuzz/build_oss_fuzz.py b/fuzz/build_oss_fuzz.py
index fdd894e..3e44315 100644
--- a/fuzz/build_oss_fuzz.py
+++ b/fuzz/build_oss_fuzz.py
@@ -10,14 +10,13 @@
 Usage: python3 build_oss_fuzz.py [project_list_file] [--sanitizer type] [--workers N]
 Example: python3 fuzz/build_oss_fuzz.py data/valid_projects.txt \
     --sanitizer address \
-    --workers 8
+    --workers 4
 """
 
 import os
 import sys
 import subprocess
 import argparse
-from datetime import datetime
 from pathlib import Path
 from typing import List, Optional, Tuple
 from multiprocessing import Pool, cpu_count
@@ -25,35 +24,24 @@
 # --- Global configuration ---
 HOME_DIR = Path.home()
 OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz"
-LOG_DIR = OSS_FUZZ_DIR / "build_logs"
 
-def setup_logging(project_name: str) -> Path:
-    """Create a timestamped log file for a single project"""
-    LOG_DIR.mkdir(parents=True, exist_ok=True)
-    timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
-    return LOG_DIR / f"build_{project_name}_{timestamp}.log"
-
-def log_and_print(message: str, log_file: Path, to_stdout: bool = True):
-    """Write message to log and print to console"""
-    if to_stdout:
-        print(f"[PID:{os.getpid()}] {message}")
-    with open(log_file, "a", encoding="utf-8") as f:
-        f.write(f"{datetime.now().isoformat()} {message}\n")
+def log_and_print(message: str):
+    """Print to console with process info"""
+    print(f"[PID:{os.getpid()}] {message}")
 
 def run_command(
-    cmd: str, 
-    log_msg: str, 
-    log_file: Path,
+    cmd: str,
+    log_msg: str,
     allowed_exit_codes: Optional[List[int]] = None
 ) -> bool:
-    """Execute a shell command and stream output to log in real-time"""
+    """Execute a shell command and stream output to console in real-time"""
     allowed_exit_codes = allowed_exit_codes or []
-    log_and_print(f"▶️ {log_msg}...", log_file, to_stdout=False)
-    log_and_print(f"   $ {cmd}", log_file, to_stdout=False)
+    log_and_print(f"▶️ {log_msg}...")
+    log_and_print(f"   $ {cmd}")
 
     try:
         process = subprocess.Popen(
-            f"yes | {cmd}",  # Auto-confirm all prompts
+            f"yes | {cmd}",
             shell=True,
             stdout=subprocess.PIPE,
             stderr=subprocess.STDOUT,
@@ -61,76 +49,44 @@ def run_command(
             encoding="utf-8",
             errors="replace"
         )
-        with open(log_file, "a", encoding="utf-8") as f:
-            if process.stdout is not None:
-                for line in iter(process.stdout.readline, ""):
-                    f.write(line)
-            else:
-                log_and_print("⚠️ Warning: process.stdout is None", log_file)
+        if process.stdout is not None:
+            for line in iter(process.stdout.readline, ""):
+                print(line, end="")  # real-time output to console
         process.wait()
         exit_code = process.returncode
         if exit_code in [0, *allowed_exit_codes]:
-            log_and_print(f"✅ Command completed successfully", log_file, to_stdout=False)
+            log_and_print(f"✅ Command completed successfully")
             return True
-        log_and_print(f"❌ Command failed (exit code: {exit_code})", log_file)
+        log_and_print(f"❌ Command failed (exit code: {exit_code})")
         return False
     except Exception as e:
-        log_and_print(f"💥 Execution exception: {e}", log_file)
+        log_and_print(f"💥 Execution exception: {e}")
         return False
 
 def build_project(project_name: str, sanitizer: str) -> Tuple[bool, str]:
     """Build workflow for a single project"""
-    log_file = setup_logging(project_name)
     os.chdir(OSS_FUZZ_DIR)
-    
-    log_and_print("="*60, log_file)
-    log_and_print(f"🔨 Starting build for project: {project_name}", log_file)
-    log_and_print(f"📝 Log path: {log_file}", log_file)
-    log_and_print("="*60, log_file)
 
-    # 1. Build Docker image
+    log_and_print("="*60)
+    log_and_print(f"🔨 Starting build for project: {project_name}")
+    log_and_print("="*60)
+
     if not run_command(
         f"python3 infra/helper.py build_image {project_name}",
-        "Step 1/2: Building Docker image",
-        log_file
+        "Step 1/2: Building Docker image"
     ):
         return (False, project_name)
 
-    # 2. Compile Fuzzers
     if not run_command(
         f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}",
-        f"Step 2/2: Compiling Fuzzers (sanitizer={sanitizer})",
-        log_file
+        f"Step 2/2: Compiling Fuzzers (sanitizer={sanitizer})"
     ):
         return (False, project_name)
 
-    log_and_print(f"✅ Project {project_name} build completed", log_file)
+    log_and_print(f"✅ Project {project_name} build completed")
     return (True, project_name)
 
 def main():
     parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Build Tool")
     parser.add_argument("project_list", help="Project list file path")
-    parser.add_argument("--sanitizer", default="address", choices=["address", "memory", "undefined"])
-    parser.add_argument("--workers", type=int, default=cpu_count())
-    args = parser.parse_args()
-
-    # Read project list
-    try:
-        with open(args.project_list, "r") as f:
-            projects = [line.strip() for line in f if line.strip()]
-    except Exception as e:
-        print(f"❌ Failed to read project list: {e}")
-        sys.exit(1)
-
-    # Parallel build
-    with Pool(args.workers) as pool:
-        results = pool.starmap(build_project, [(p, args.sanitizer) for p in projects])
-
-    # Output results
-    failed = [p for success, p in results if not success]
-    print(f"\n📊 Build completed: Success {len(projects)-len(failed)}/{len(projects)}")
-    if failed:
-        print("❌ Failed projects: " + ", ".join(failed))
-
-if __name__ == "__main__":
-    main()
\ No newline at end of file
+    parser.add_argument("--sanitizer", default="addres_

From a7e447e5ae31aea0b432c5458ef87612651cd28b Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 22 Jul 2025 18:00:36 +0000
Subject: [PATCH 018/134] Remove build log write files

---
 fuzz/build_oss_fuzz.py | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/fuzz/build_oss_fuzz.py b/fuzz/build_oss_fuzz.py
index 3e44315..9553e35 100644
--- a/fuzz/build_oss_fuzz.py
+++ b/fuzz/build_oss_fuzz.py
@@ -10,7 +10,7 @@
 Usage: python3 build_oss_fuzz.py [project_list_file] [--sanitizer type] [--workers N]
 Example: python3 fuzz/build_oss_fuzz.py data/valid_projects.txt \
     --sanitizer address \
-    --workers 4
+    --workers 8
 """
 
 import os
@@ -89,4 +89,24 @@ def build_project(project_name: str, sanitizer: str) -> Tuple[bool, str]:
 def main():
     parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Build Tool")
     parser.add_argument("project_list", help="Project list file path")
-    parser.add_argument("--sanitizer", default="addres_
+    parser.add_argument("--sanitizer", default="address", choices=["address", "memory", "undefined"])
+    parser.add_argument("--workers", type=int, default=cpu_count())
+    args = parser.parse_args()
+
+    try:
+        with open(args.project_list, "r") as f:
+            projects = [line.strip() for line in f if line.strip()]
+    except Exception as e:
+        print(f"❌ Failed to read project list: {e}")
+        sys.exit(1)
+
+    with Pool(args.workers) as pool:
+        results = pool.starmap(build_project, [(p, args.sanitizer) for p in projects])
+
+    failed = [p for success, p in results if not success]
+    print(f"\n📊 Build completed: Success {len(projects)-len(failed)}/{len(projects)}")
+    if failed:
+        print("❌ Failed projects: " + ", ".join(failed))
+
+if __name__ == "__main__":
+    main()

From fb4da2a3e75dfa33c49b4f3fc2af4dbf6c53e662 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 22 Jul 2025 18:18:41 +0000
Subject: [PATCH 019/134] use logging mdule

---
 fuzz/build_oss_fuzz.py | 56 ++++++++++++++++++++++++++++--------------
 1 file changed, 38 insertions(+), 18 deletions(-)

diff --git a/fuzz/build_oss_fuzz.py b/fuzz/build_oss_fuzz.py
index 9553e35..89cbc25 100644
--- a/fuzz/build_oss_fuzz.py
+++ b/fuzz/build_oss_fuzz.py
@@ -12,7 +12,6 @@
     --sanitizer address \
     --workers 8
 """
-
 import os
 import sys
 import subprocess
@@ -20,24 +19,22 @@
 from pathlib import Path
 from typing import List, Optional, Tuple
 from multiprocessing import Pool, cpu_count
+import logging
 
 # --- Global configuration ---
 HOME_DIR = Path.home()
 OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz"
-
-def log_and_print(message: str):
-    """Print to console with process info"""
-    print(f"[PID:{os.getpid()}] {message}")
-
 def run_command(
     cmd: str,
     log_msg: str,
     allowed_exit_codes: Optional[List[int]] = None
 ) -> bool:
-    """Execute a shell command and stream output to console in real-time"""
+
+    """Execute a shell command and stream output to console"""
     allowed_exit_codes = allowed_exit_codes or []
-    log_and_print(f"▶️ {log_msg}...")
-    log_and_print(f"   $ {cmd}")
+
+    logging.info(f"▶️ {log_msg}")
+    logging.debug(f"$ {cmd}")
 
     try:
         process = subprocess.Popen(
@@ -49,27 +46,28 @@ def run_command(
             encoding="utf-8",
             errors="replace"
         )
-        if process.stdout is not None:
+        if process.stdout:
             for line in iter(process.stdout.readline, ""):
-                print(line, end="")  # real-time output to console
+                sys.stdout.write(line)
+                sys.stdout.flush()
         process.wait()
         exit_code = process.returncode
         if exit_code in [0, *allowed_exit_codes]:
-            log_and_print(f"✅ Command completed successfully")
+            logging.info("✅ Command completed successfully")
             return True
-        log_and_print(f"❌ Command failed (exit code: {exit_code})")
+        logging.error(f"❌ Command failed (exit code: {exit_code})")
         return False
     except Exception as e:
-        log_and_print(f"💥 Execution exception: {e}")
+        logging.exception(f"💥 Execution exception: {e}")
         return False
 
 def build_project(project_name: str, sanitizer: str) -> Tuple[bool, str]:
     """Build workflow for a single project"""
     os.chdir(OSS_FUZZ_DIR)
 
-    log_and_print("="*60)
-    log_and_print(f"🔨 Starting build for project: {project_name}")
-    log_and_print("="*60)
+    logging.info("=" * 60)
+    logging.info(f"🔨 Starting build for project: {project_name}")
+    logging.info("=" * 60)
 
     if not run_command(
         f"python3 infra/helper.py build_image {project_name}",
@@ -83,7 +81,7 @@ def build_project(project_name: str, sanitizer: str) -> Tuple[bool, str]:
     ):
         return (False, project_name)
 
-    log_and_print(f"✅ Project {project_name} build completed")
+    logging.info(f"✅ Project {project_name} build completed")
     return (True, project_name)
 
 def main():
@@ -93,7 +91,29 @@ def main():
     parser.add_argument("--workers", type=int, default=cpu_count())
     args = parser.parse_args()
 
+    logging.basicConfig(
+        level=logging.INFO,
+        format='[%(levelname)s] [PID:%(process)d] %(message)s'
+    )
+
     try:
+        with open(args.project_list, "r") as f:
+            projects = [line.strip() for line in f if line.strip()]
+    except Exception as e:
+        logging.error(f"Failed to read project list: {e}")
+        sys.exit(1)
+
+    with Pool(args.workers) as pool:
+        results = pool.starmap(build_project, [(p, args.sanitizer) for p in projects])
+
+    failed = [p for success, p in results if not success]
+    logging.info(f"\n📊 Build completed: Success {len(projects) - len(failed)}/{len(projects)}")
+    if failed:
+        logging.warning("❌ Failed projects: " + ", ".join(failed))
+
+if __name__ == "__main__":
+    main()
+
         with open(args.project_list, "r") as f:
             projects = [line.strip() for line in f if line.strip()]
     except Exception as e:

From e582af60ebd584b73eafc3533f5792ea7148b388 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 22 Jul 2025 18:32:37 +0000
Subject: [PATCH 020/134] use precise logging

---
 fuzz/build_oss_fuzz.py | 48 ++++++++++++++++++++++++------------------
 1 file changed, 27 insertions(+), 21 deletions(-)

diff --git a/fuzz/build_oss_fuzz.py b/fuzz/build_oss_fuzz.py
index 89cbc25..e4d2b3b 100644
--- a/fuzz/build_oss_fuzz.py
+++ b/fuzz/build_oss_fuzz.py
@@ -29,7 +29,6 @@ def run_command(
     log_msg: str,
     allowed_exit_codes: Optional[List[int]] = None
 ) -> bool:
-
     """Execute a shell command and stream output to console"""
     allowed_exit_codes = allowed_exit_codes or []
 
@@ -46,6 +45,17 @@ def run_command(
             encoding="utf-8",
             errors="replace"
         )
+    except FileNotFoundError:
+        logging.error(f"Command not found: {cmd}")
+        return False
+    except OSError as e:
+        logging.error(f"OS error while executing command: {e}")
+        return False
+    except ValueError as e:
+        logging.error(f"Invalid arguments to Popen: {e}")
+        return False
+
+    try:
         if process.stdout:
             for line in iter(process.stdout.readline, ""):
                 sys.stdout.write(line)
@@ -57,10 +67,15 @@ def run_command(
             return True
         logging.error(f"❌ Command failed (exit code: {exit_code})")
         return False
+    except KeyboardInterrupt:
+        logging.warning("⛔️ Command interrupted by user")
+        process.terminate()
+        return False
     except Exception as e:
-        logging.exception(f"💥 Execution exception: {e}")
+        logging.exception(f"Unexpected error during process execution: {e}")
         return False
 
+
 def build_project(project_name: str, sanitizer: str) -> Tuple[bool, str]:
     """Build workflow for a single project"""
     os.chdir(OSS_FUZZ_DIR)
@@ -96,11 +111,18 @@ def main():
         format='[%(levelname)s] [PID:%(process)d] %(message)s'
     )
 
+    if not os.path.isfile(args.project_list):
+        logging.error(f"Project list file not found: {args.project_list}")
+        sys.exit(1)
+
     try:
-        with open(args.project_list, "r") as f:
+        with open(args.project_list, "r", encoding="utf-8") as f:
             projects = [line.strip() for line in f if line.strip()]
-    except Exception as e:
-        logging.error(f"Failed to read project list: {e}")
+    except OSError as e:
+        logging.error(f"OS error while reading project list: {e}")
+        sys.exit(1)
+    except UnicodeDecodeError as e:
+        logging.error(f"Encoding error while reading file: {e}")
         sys.exit(1)
 
     with Pool(args.workers) as pool:
@@ -114,19 +136,3 @@ def main():
 if __name__ == "__main__":
     main()
 
-        with open(args.project_list, "r") as f:
-            projects = [line.strip() for line in f if line.strip()]
-    except Exception as e:
-        print(f"❌ Failed to read project list: {e}")
-        sys.exit(1)
-
-    with Pool(args.workers) as pool:
-        results = pool.starmap(build_project, [(p, args.sanitizer) for p in projects])
-
-    failed = [p for success, p in results if not success]
-    print(f"\n📊 Build completed: Success {len(projects)-len(failed)}/{len(projects)}")
-    if failed:
-        print("❌ Failed projects: " + ", ".join(failed))
-
-if __name__ == "__main__":
-    main()

From 02f9269d720b75759619f3021520b26a97c28e54 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 22 Jul 2025 18:50:34 +0000
Subject: [PATCH 021/134] use logging

---
 fuzz/run_fuzz_target.py | 154 +++++++++++++++++++++++++++-------------
 1 file changed, 103 insertions(+), 51 deletions(-)

diff --git a/fuzz/run_fuzz_target.py b/fuzz/run_fuzz_target.py
index 4a9f15a..b13afb6 100644
--- a/fuzz/run_fuzz_target.py
+++ b/fuzz/run_fuzz_target.py
@@ -4,8 +4,8 @@
 """
 run_fuzz_target.py
 
-Run OSS-Fuzz test targets in parallel.
-Uses multiprocessing.Pool to distribute tasks to multiple CPU cores.
+Run OSS-Fuzz test targets in parallel with enhanced logging.
+Uses multiprocessing.Pool and logging module for robust task management.
 
 Usage: python3 run_fuzz_target.py [project_list_file] [--timeout seconds] [--workers N]
 Example: python3 fuzz/run_fuzz_target.py data/valid_projects.txt --timeout 60 --workers 4
@@ -15,39 +15,64 @@
 import sys
 import subprocess
 import argparse
+import logging
 from datetime import datetime
 from pathlib import Path
 from typing import List, Optional, Tuple
-from multiprocessing import Pool, cpu_count
+from multiprocessing import Pool, cpu_count, current_process
 
 # --- Global configuration ---
 HOME_DIR = Path.home()
 OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz"
 LOG_DIR = OSS_FUZZ_DIR / "run_logs"
 
-def setup_logging(project_name: str) -> Path:
-    """Create a timestamped run log"""
+def setup_logging(project_name: str) -> logging.Logger:
+    """Configure hierarchical logger with file and console handlers"""
     LOG_DIR.mkdir(parents=True, exist_ok=True)
     timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
-    return LOG_DIR / f"run_{project_name}_{timestamp}.log"
-
-def log_and_print(message: str, log_file: Path, to_stdout: bool = True):
-    """Log and console output"""
-    if to_stdout:
-        print(f"[PID:{os.getpid()}] {message}")
-    with open(log_file, "a", encoding="utf-8") as f:
-        f.write(f"{datetime.now().isoformat()} {message}\n")
+    log_file = LOG_DIR / f"run_{project_name}_{timestamp}.log"
+    
+    # Create process-specific logger
+    logger = logging.getLogger(f"{project_name}.{current_process().name}")
+    logger.setLevel(logging.DEBUG)
+    
+    # File handler (all levels)
+    file_handler = logging.FileHandler(log_file, encoding="utf-8")
+    file_handler.setLevel(logging.DEBUG)
+    file_formatter = logging.Formatter(
+        "%(asctime)s [PID:%(process)d] %(levelname)s: %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S"
+    )
+    file_handler.setFormatter(file_formatter)
+    
+    # Console handler (INFO+ only)
+    console_handler = logging.StreamHandler()
+    console_handler.setLevel(logging.INFO)
+    console_formatter = logging.Formatter(
+        "[%(levelname)s] %(message)s"
+    )
+    console_handler.setFormatter(console_formatter)
+    
+    logger.addHandler(file_handler)
+    logger.addHandler(console_handler)
+    
+    # Capture uncaught exceptions
+    sys.excepthook = lambda exc_type, exc_value, exc_traceback: (
+        logger.critical("Unhandled exception", exc_info=(exc_type, exc_value, exc_traceback))
+    )
+    
+    return logger
 
 def run_command(
     cmd: str, 
     log_msg: str, 
-    log_file: Path,
+    logger: logging.Logger,
     allowed_exit_codes: Optional[List[int]] = None
 ) -> bool:
-    """Execute command and log output in real-time"""
+    """Execute command with real-time logging"""
     allowed_exit_codes = allowed_exit_codes or []
-    log_and_print(f"▶️ {log_msg}...", log_file, to_stdout=False)
-    log_and_print(f"   $ {cmd}", log_file, to_stdout=False)
+    logger.info(f"▶️ {log_msg}...")
+    logger.debug(f"   $ {cmd}")
 
     try:
         process = subprocess.Popen(
@@ -59,45 +84,56 @@ def run_command(
             encoding="utf-8",
             errors="replace"
         )
-        with open(log_file, "a", encoding="utf-8") as f:
-            if process.stdout is not None:
-                for line in iter(process.stdout.readline, ""):
-                    f.write(line)
-            else:
-                log_and_print("⚠️ Warning: process.stdout is None", log_file)
+        
+        # Stream output to logger
+        if process.stdout:
+            for line in iter(process.stdout.readline, ""):
+                logger.debug(line.strip())
+        else:
+            logger.warning("Process stdout is None")
+            
         process.wait()
-        return process.returncode in [0, *allowed_exit_codes]
+        
+        if process.returncode not in [0, *allowed_exit_codes]:
+            logger.error(f"Command failed with exit code: {process.returncode}")
+            return False
+        return True
     except Exception as e:
-        log_and_print(f"💥 Execution exception: {e}", log_file)
+        logger.exception(f"💥 Execution exception: {e}")
         return False
 
-def discover_targets(project_name: str) -> List[str]:
-    """Discover available Fuzz targets"""
+def discover_targets(project_name: str, logger: logging.Logger) -> List[str]:
+    """Discover available Fuzz targets with error handling"""
     out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name
     targets = []
-    if out_dir.exists():
-        for f in out_dir.iterdir():
-            if f.is_file() and f.name.startswith("fuzz_") and os.access(f, os.X_OK):
-                targets.append(f.name)
+    try:
+        if out_dir.exists():
+            for f in out_dir.iterdir():
+                if f.is_file() and f.name.startswith("fuzz_") and os.access(f, os.X_OK):
+                    targets.append(f.name)
+        else:
+            logger.warning(f"Build directory not found: {out_dir}")
+    except Exception as e:
+        logger.exception(f"Target discovery failed: {e}")
     return targets
 
 def run_project(project_name: str, timeout: int) -> Tuple[bool, str]:
-    """Testing workflow for a single project"""
-    log_file = setup_logging(project_name)
+    """Testing workflow for a single project with logging"""
+    logger = setup_logging(project_name)
     os.chdir(OSS_FUZZ_DIR)
-
-    log_and_print("="*60, log_file)
-    log_and_print(f"🚀 Starting testing for project: {project_name}", log_file)
-    log_and_print(f"📝 Log path: {log_file}", log_file)
-    log_and_print("="*60, log_file)
-
+    
+    logger.info("=" * 60)
+    logger.info(f"🚀 Starting testing for project: {project_name}")
+    logger.debug(f"📝 Log path: {[h.baseFilename for h in logger.handlers if isinstance(h, logging.FileHandler)]}")
+    logger.info("=" * 60)
+    
     # 1. Discover test targets
-    targets = discover_targets(project_name)
+    targets = discover_targets(project_name, logger)
     if not targets:
-        log_and_print("⚠️ No test targets found", log_file)
+        logger.error("⚠️ No test targets found")
         return (False, project_name)
-    log_and_print(f"🔍 Discovered {len(targets)} test targets", log_file)
-
+    logger.info(f"🔍 Discovered {len(targets)} test targets: {', '.join(targets)}")
+    
     # 2. Run all targets
     all_success = True
     for i, target in enumerate(targets, 1):
@@ -105,28 +141,44 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]:
         success = run_command(
             cmd,
             f"Running target [{i}/{len(targets)}] {target} (timeout={timeout}s)",
-            log_file,
+            logger,
             allowed_exit_codes=[1, 124]  # Allow timeout exit codes
         )
         all_success &= success
-
-    # 3. Generate report (placeholder)
-    log_and_print("📊 Coverage report generation (not implemented in current version)", log_file)
+        if not success:
+            logger.error(f"❌ Target failed: {target}")
+    
+    # 3. Final status
+    if all_success:
+        logger.info(f"✅ All targets completed successfully for {project_name}")
+    else:
+        logger.error(f"❌ One or more targets failed for {project_name}")
+    
     return (all_success, project_name)
 
 def main():
+    # Root logger config for main process
+    logging.basicConfig(
+        level=logging.INFO,
+        format="[%(levelname)s] %(message)s"
+    )
+    logger = logging.getLogger("Main")
+    
     parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Testing Tool")
     parser.add_argument("project_list", help="Project list file path")
     parser.add_argument("--timeout", type=int, default=60, help="Timeout per target test (seconds)")
     parser.add_argument("--workers", type=int, default=cpu_count())
     args = parser.parse_args()
+    
+    logger.info(f"🚀 Starting parallel fuzzing (workers={args.workers}, timeout={args.timeout}s)")
 
     # Read project list
     try:
         with open(args.project_list) as f:
             projects = [line.strip() for line in f if line.strip()]
+        logger.info(f"📋 Loaded {len(projects)} projects from {args.project_list}")
     except Exception as e:
-        print(f"❌ Failed to read project list: {e}")
+        logger.exception(f"❌ Failed to read project list: {e}")
         sys.exit(1)
 
     # Parallel execution
@@ -135,9 +187,9 @@ def main():
 
     # Output results
     failed = [p for success, p in results if not success]
-    print(f"\n📊 Testing completed: Success {len(projects)-len(failed)}/{len(projects)}")
+    logger.info(f"\n📊 Testing completed: Success {len(projects)-len(failed)}/{len(projects)}")
     if failed:
-        print("❌ Failed projects: " + ", ".join(failed))
+        logger.error("❌ Failed projects: " + ", ".join(failed))
 
 if __name__ == "__main__":
-    main()
+    main()
\ No newline at end of file

From 15a7095da4563ad4b7f3072a62eafdb619d90428 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 22 Jul 2025 18:57:47 +0000
Subject: [PATCH 022/134] use precise exception log info

---
 fuzz/run_fuzz_target.py | 268 ++++++++++++++++++++++++++++------------
 1 file changed, 186 insertions(+), 82 deletions(-)

diff --git a/fuzz/run_fuzz_target.py b/fuzz/run_fuzz_target.py
index b13afb6..7b6b893 100644
--- a/fuzz/run_fuzz_target.py
+++ b/fuzz/run_fuzz_target.py
@@ -4,7 +4,7 @@
 """
 run_fuzz_target.py
 
-Run OSS-Fuzz test targets in parallel with enhanced logging.
+Run OSS-Fuzz test targets in parallel with enhanced logging and precise exception handling.
 Uses multiprocessing.Pool and logging module for robust task management.
 
 Usage: python3 run_fuzz_target.py [project_list_file] [--timeout seconds] [--workers N]
@@ -16,6 +16,8 @@
 import subprocess
 import argparse
 import logging
+import time
+import shutil
 from datetime import datetime
 from pathlib import Path
 from typing import List, Optional, Tuple
@@ -28,53 +30,60 @@
 
 def setup_logging(project_name: str) -> logging.Logger:
     """Configure hierarchical logger with file and console handlers"""
-    LOG_DIR.mkdir(parents=True, exist_ok=True)
-    timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
-    log_file = LOG_DIR / f"run_{project_name}_{timestamp}.log"
-    
-    # Create process-specific logger
-    logger = logging.getLogger(f"{project_name}.{current_process().name}")
-    logger.setLevel(logging.DEBUG)
-    
-    # File handler (all levels)
-    file_handler = logging.FileHandler(log_file, encoding="utf-8")
-    file_handler.setLevel(logging.DEBUG)
-    file_formatter = logging.Formatter(
-        "%(asctime)s [PID:%(process)d] %(levelname)s: %(message)s",
-        datefmt="%Y-%m-%d %H:%M:%S"
-    )
-    file_handler.setFormatter(file_formatter)
-    
-    # Console handler (INFO+ only)
-    console_handler = logging.StreamHandler()
-    console_handler.setLevel(logging.INFO)
-    console_formatter = logging.Formatter(
-        "[%(levelname)s] %(message)s"
-    )
-    console_handler.setFormatter(console_formatter)
-    
-    logger.addHandler(file_handler)
-    logger.addHandler(console_handler)
-    
-    # Capture uncaught exceptions
-    sys.excepthook = lambda exc_type, exc_value, exc_traceback: (
-        logger.critical("Unhandled exception", exc_info=(exc_type, exc_value, exc_traceback))
-    )
-    
-    return logger
+    try:
+        LOG_DIR.mkdir(parents=True, exist_ok=True)
+        timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
+        log_file = LOG_DIR / f"run_{project_name}_{timestamp}.log"
+        
+        # Create process-specific logger
+        logger = logging.getLogger(f"{project_name}.{current_process().name}")
+        logger.setLevel(logging.DEBUG)
+        
+        # File handler (all levels)
+        file_handler = logging.FileHandler(log_file, encoding="utf-8")
+        file_handler.setLevel(logging.DEBUG)
+        file_formatter = logging.Formatter(
+            "%(asctime)s [PID:%(process)d] %(levelname)s: %(message)s",
+            datefmt="%Y-%m-%d %H:%M:%S"
+        )
+        file_handler.setFormatter(file_formatter)
+        
+        # Console handler (INFO+ only)
+        console_handler = logging.StreamHandler()
+        console_handler.setLevel(logging.INFO)
+        console_formatter = logging.Formatter(
+            "[%(levelname)s] %(message)s"
+        )
+        console_handler.setFormatter(console_formatter)
+        
+        logger.addHandler(file_handler)
+        logger.addHandler(console_handler)
+        
+        # Capture uncaught exceptions
+        sys.excepthook = lambda exc_type, exc_value, exc_traceback: (
+            logger.critical("Unhandled exception", exc_info=(exc_type, exc_value, exc_traceback))
+        )
+        
+        return logger
+    except (PermissionError, OSError) as e:
+        print(f"❌ Critical logging setup error: {e}")
+        sys.exit(1)
 
 def run_command(
     cmd: str, 
     log_msg: str, 
     logger: logging.Logger,
-    allowed_exit_codes: Optional[List[int]] = None
+    allowed_exit_codes: Optional[List[int]] = None,
+    timeout: int = 3600  # 1 hour default timeout
 ) -> bool:
-    """Execute command with real-time logging"""
+    """Execute command with real-time logging and precise error handling"""
     allowed_exit_codes = allowed_exit_codes or []
     logger.info(f"▶️ {log_msg}...")
     logger.debug(f"   $ {cmd}")
 
+    process = None
     try:
+        # Start process with explicit timeout handling
         process = subprocess.Popen(
             cmd,
             shell=True,
@@ -85,42 +94,89 @@ def run_command(
             errors="replace"
         )
         
-        # Stream output to logger
-        if process.stdout:
-            for line in iter(process.stdout.readline, ""):
-                logger.debug(line.strip())
-        else:
-            logger.warning("Process stdout is None")
-            
-        process.wait()
+        # Stream output to logger with timeout control
+        start_time = time.time()
+        while process.poll() is None:
+            if time.time() - start_time > timeout:
+                logger.error(f"⌛ Command timed out after {timeout} seconds")
+                process.terminate()
+                try:
+                    process.wait(timeout=5)
+                except subprocess.TimeoutExpired:
+                    process.kill()
+                return False
+                
+            # Read available output
+            if process.stdout:
+                line = process.stdout.readline()
+                if line:
+                    logger.debug(line.strip())
+            else:
+                logger.warning("Process stdout is None")
+                time.sleep(0.1)
         
-        if process.returncode not in [0, *allowed_exit_codes]:
-            logger.error(f"Command failed with exit code: {process.returncode}")
+        # Check exit code
+        exit_code = process.returncode
+        if exit_code not in [0, *allowed_exit_codes]:
+            logger.error(f"❌ Command failed with exit code: {exit_code}")
             return False
         return True
-    except Exception as e:
-        logger.exception(f"💥 Execution exception: {e}")
+        
+    except FileNotFoundError as e:
+        logger.error(f"🔍 Command not found: {cmd.split()[0]}")
         return False
+    except PermissionError as e:
+        logger.error(f"🔒 Permission denied for command: {cmd}")
+        return False
+    except subprocess.SubprocessError as e:
+        logger.exception(f"💥 Subprocess error: {e}")
+        return False
+    except OSError as e:
+        logger.exception(f"💥 OS error during command execution: {e}")
+        return False
+    finally:
+        # Ensure process is cleaned up
+        if process and process.poll() is None:
+            try:
+                process.terminate()
+                process.wait(timeout=5)
+            except:
+                pass
 
 def discover_targets(project_name: str, logger: logging.Logger) -> List[str]:
-    """Discover available Fuzz targets with error handling"""
+    """Discover available Fuzz targets with precise error handling"""
     out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name
     targets = []
+    
     try:
-        if out_dir.exists():
-            for f in out_dir.iterdir():
+        if not out_dir.exists():
+            logger.warning(f"⚠️ Build directory not found: {out_dir}")
+            return targets
+            
+        for f in out_dir.iterdir():
+            try:
                 if f.is_file() and f.name.startswith("fuzz_") and os.access(f, os.X_OK):
                     targets.append(f.name)
-        else:
-            logger.warning(f"Build directory not found: {out_dir}")
-    except Exception as e:
-        logger.exception(f"Target discovery failed: {e}")
+            except OSError as e:
+                logger.warning(f"⚠️ Skipping file {f.name} due to access error: {e}")
+                
+    except FileNotFoundError:
+        logger.error(f"❌ Directory not found: {out_dir}")
+    except PermissionError:
+        logger.error(f"🔒 Permission denied accessing: {out_dir}")
+    except OSError as e:
+        logger.exception(f"💥 OS error during target discovery: {e}")
+    
     return targets
 
 def run_project(project_name: str, timeout: int) -> Tuple[bool, str]:
-    """Testing workflow for a single project with logging"""
-    logger = setup_logging(project_name)
-    os.chdir(OSS_FUZZ_DIR)
+    """Testing workflow for a single project with precise error handling"""
+    try:
+        logger = setup_logging(project_name)
+        os.chdir(OSS_FUZZ_DIR)
+    except (OSError, PermissionError) as e:
+        print(f"❌ Critical error initializing project {project_name}: {e}")
+        return (False, project_name)
     
     logger.info("=" * 60)
     logger.info(f"🚀 Starting testing for project: {project_name}")
@@ -128,25 +184,34 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]:
     logger.info("=" * 60)
     
     # 1. Discover test targets
-    targets = discover_targets(project_name, logger)
-    if not targets:
-        logger.error("⚠️ No test targets found")
+    try:
+        targets = discover_targets(project_name, logger)
+        if not targets:
+            logger.error("⚠️ No test targets found")
+            return (False, project_name)
+        logger.info(f"🔍 Discovered {len(targets)} test targets: {', '.join(targets)}")
+    except Exception as e:
+        logger.exception(f"💥 Target discovery failed unexpectedly: {e}")
         return (False, project_name)
-    logger.info(f"🔍 Discovered {len(targets)} test targets: {', '.join(targets)}")
     
     # 2. Run all targets
     all_success = True
     for i, target in enumerate(targets, 1):
-        cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target} -- -max_total_time={timeout}"
-        success = run_command(
-            cmd,
-            f"Running target [{i}/{len(targets)}] {target} (timeout={timeout}s)",
-            logger,
-            allowed_exit_codes=[1, 124]  # Allow timeout exit codes
-        )
-        all_success &= success
-        if not success:
-            logger.error(f"❌ Target failed: {target}")
+        try:
+            cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target} -- -max_total_time={timeout}"
+            success = run_command(
+                cmd,
+                f"Running target [{i}/{len(targets)}] {target} (timeout={timeout}s)",
+                logger,
+                allowed_exit_codes=[1, 124],  # Allow timeout exit codes
+                timeout=timeout + 300  # Add buffer for setup/teardown
+            )
+            all_success &= success
+            if not success:
+                logger.error(f"❌ Target failed: {target}")
+        except Exception as e:
+            logger.exception(f"💥 Unexpected error running target {target}: {e}")
+            all_success = False
     
     # 3. Final status
     if all_success:
@@ -172,24 +237,63 @@ def main():
     
     logger.info(f"🚀 Starting parallel fuzzing (workers={args.workers}, timeout={args.timeout}s)")
 
-    # Read project list
+    # Read project list with precise error handling
     try:
-        with open(args.project_list) as f:
+        project_path = Path(args.project_list)
+        if not project_path.exists():
+            raise FileNotFoundError(f"Project list file not found: {project_path}")
+            
+        if not project_path.is_file():
+            raise ValueError(f"Path is not a file: {project_path}")
+            
+        with open(project_path, "r") as f:
             projects = [line.strip() for line in f if line.strip()]
-        logger.info(f"📋 Loaded {len(projects)} projects from {args.project_list}")
-    except Exception as e:
-        logger.exception(f"❌ Failed to read project list: {e}")
+            
+        logger.info(f"📋 Loaded {len(projects)} projects from {project_path}")
+    except FileNotFoundError as e:
+        logger.error(f"❌ {e}")
+        sys.exit(1)
+    except PermissionError as e:
+        logger.error(f"🔒 Permission denied: {e}")
+        sys.exit(1)
+    except (OSError, ValueError) as e:
+        logger.exception(f"💥 Error reading project list: {e}")
         sys.exit(1)
 
-    # Parallel execution
+    # Parallel execution with error isolation
     with Pool(args.workers) as pool:
-        results = pool.starmap(run_project, [(p, args.timeout) for p in projects])
+        results = []
+        for p in projects:
+            try:
+                results.append(pool.apply_async(run_project, (p, args.timeout)))
+            except Exception as e:
+                logger.error(f"💥 Failed to schedule project {p}: {e}")
+                results.append((False, p))
+        
+        # Collect results with timeout
+        final_results = []
+        for res in results:
+            try:
+                final_results.append(res.get(timeout=args.timeout * 2))
+            except TimeoutError:
+                logger.error("⌛ Project execution timed out")
+                final_results.append((False, "unknown"))
+            except Exception as e:
+                logger.error(f"💥 Error collecting result: {e}")
+                final_results.append((False, "unknown"))
 
     # Output results
-    failed = [p for success, p in results if not success]
+    failed = [p for success, p in final_results if not success]
     logger.info(f"\n📊 Testing completed: Success {len(projects)-len(failed)}/{len(projects)}")
     if failed:
         logger.error("❌ Failed projects: " + ", ".join(failed))
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    try:
+        main()
+    except KeyboardInterrupt:
+        print("\n🛑 Operation cancelled by user")
+        sys.exit(1)
+    except Exception as e:
+        print(f"💥 Critical error in main: {e}")
+        sys.exit(1)
\ No newline at end of file

From 9a471aa4a2f5be51675249be15f81ed7a236c714 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 22 Jul 2025 20:04:17 +0000
Subject: [PATCH 023/134] correct type problems

---
 fuzz/run_fuzz_target.py | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/fuzz/run_fuzz_target.py b/fuzz/run_fuzz_target.py
index 7b6b893..7324d3c 100644
--- a/fuzz/run_fuzz_target.py
+++ b/fuzz/run_fuzz_target.py
@@ -21,7 +21,7 @@
 from datetime import datetime
 from pathlib import Path
 from typing import List, Optional, Tuple
-from multiprocessing import Pool, cpu_count, current_process
+from multiprocessing import Pool, cpu_count, current_process, ApplyResult
 
 # --- Global configuration ---
 HOME_DIR = Path.home()
@@ -146,7 +146,7 @@ def run_command(
 def discover_targets(project_name: str, logger: logging.Logger) -> List[str]:
     """Discover available Fuzz targets with precise error handling"""
     out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name
-    targets = []
+    targets: List[str] = []  # 添加类型注解
     
     try:
         if not out_dir.exists():
@@ -221,6 +221,16 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]:
     
     return (all_success, project_name)
 
+def _create_fake_async_result(result: bool, project_name: str) -> ApplyResult:
+    """创建兼容的异步结果包装器"""
+    from multiprocessing.pool import ApplyResult
+    from functools import partial
+    
+    def _wrapper():
+        return (result, project_name)
+    
+    return ApplyResult(None, _wrapper, ())
+
 def main():
     # Root logger config for main process
     logging.basicConfig(
@@ -260,19 +270,20 @@ def main():
         logger.exception(f"💥 Error reading project list: {e}")
         sys.exit(1)
 
-    # Parallel execution with error isolation
+    # 修改后的并行执行部分
     with Pool(args.workers) as pool:
-        results = []
+        async_results = []
         for p in projects:
             try:
-                results.append(pool.apply_async(run_project, (p, args.timeout)))
+                async_results.append(pool.apply_async(run_project, (p, args.timeout)))
             except Exception as e:
                 logger.error(f"💥 Failed to schedule project {p}: {e}")
-                results.append((False, p))
+                # 使用包装器保持类型一致
+                async_results.append(_create_fake_async_result(False, p))
         
-        # Collect results with timeout
-        final_results = []
-        for res in results:
+        # 收集结果
+        final_results: List[Tuple[bool, str]] = []
+        for res in async_results:
             try:
                 final_results.append(res.get(timeout=args.timeout * 2))
             except TimeoutError:

From 026614bafc20d06d0508de3f31a6258f36416555 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 22 Jul 2025 20:10:20 +0000
Subject: [PATCH 024/134] correct some mistakes

---
 fuzz/run_fuzz_target.py | 45 +++++++++++++----------------------------
 1 file changed, 14 insertions(+), 31 deletions(-)

diff --git a/fuzz/run_fuzz_target.py b/fuzz/run_fuzz_target.py
index 7324d3c..402089d 100644
--- a/fuzz/run_fuzz_target.py
+++ b/fuzz/run_fuzz_target.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python3 
 # -*- coding: utf-8 -*-
 
 """
@@ -21,7 +21,7 @@
 from datetime import datetime
 from pathlib import Path
 from typing import List, Optional, Tuple
-from multiprocessing import Pool, cpu_count, current_process, ApplyResult
+from multiprocessing import Pool, cpu_count, current_process
 
 # --- Global configuration ---
 HOME_DIR = Path.home()
@@ -83,7 +83,6 @@ def run_command(
 
     process = None
     try:
-        # Start process with explicit timeout handling
         process = subprocess.Popen(
             cmd,
             shell=True,
@@ -94,7 +93,6 @@ def run_command(
             errors="replace"
         )
         
-        # Stream output to logger with timeout control
         start_time = time.time()
         while process.poll() is None:
             if time.time() - start_time > timeout:
@@ -106,7 +104,6 @@ def run_command(
                     process.kill()
                 return False
                 
-            # Read available output
             if process.stdout:
                 line = process.stdout.readline()
                 if line:
@@ -115,17 +112,16 @@ def run_command(
                 logger.warning("Process stdout is None")
                 time.sleep(0.1)
         
-        # Check exit code
         exit_code = process.returncode
         if exit_code not in [0, *allowed_exit_codes]:
             logger.error(f"❌ Command failed with exit code: {exit_code}")
             return False
         return True
         
-    except FileNotFoundError as e:
+    except FileNotFoundError:
         logger.error(f"🔍 Command not found: {cmd.split()[0]}")
         return False
-    except PermissionError as e:
+    except PermissionError:
         logger.error(f"🔒 Permission denied for command: {cmd}")
         return False
     except subprocess.SubprocessError as e:
@@ -135,7 +131,6 @@ def run_command(
         logger.exception(f"💥 OS error during command execution: {e}")
         return False
     finally:
-        # Ensure process is cleaned up
         if process and process.poll() is None:
             try:
                 process.terminate()
@@ -146,7 +141,7 @@ def run_command(
 def discover_targets(project_name: str, logger: logging.Logger) -> List[str]:
     """Discover available Fuzz targets with precise error handling"""
     out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name
-    targets: List[str] = []  # 添加类型注解
+    targets: List[str] = []
     
     try:
         if not out_dir.exists():
@@ -183,7 +178,6 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]:
     logger.debug(f"📝 Log path: {[h.baseFilename for h in logger.handlers if isinstance(h, logging.FileHandler)]}")
     logger.info("=" * 60)
     
-    # 1. Discover test targets
     try:
         targets = discover_targets(project_name, logger)
         if not targets:
@@ -194,7 +188,6 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]:
         logger.exception(f"💥 Target discovery failed unexpectedly: {e}")
         return (False, project_name)
     
-    # 2. Run all targets
     all_success = True
     for i, target in enumerate(targets, 1):
         try:
@@ -203,8 +196,8 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]:
                 cmd,
                 f"Running target [{i}/{len(targets)}] {target} (timeout={timeout}s)",
                 logger,
-                allowed_exit_codes=[1, 124],  # Allow timeout exit codes
-                timeout=timeout + 300  # Add buffer for setup/teardown
+                allowed_exit_codes=[1, 124],
+                timeout=timeout + 300
             )
             all_success &= success
             if not success:
@@ -213,7 +206,6 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]:
             logger.exception(f"💥 Unexpected error running target {target}: {e}")
             all_success = False
     
-    # 3. Final status
     if all_success:
         logger.info(f"✅ All targets completed successfully for {project_name}")
     else:
@@ -221,18 +213,14 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]:
     
     return (all_success, project_name)
 
-def _create_fake_async_result(result: bool, project_name: str) -> ApplyResult:
-    """创建兼容的异步结果包装器"""
-    from multiprocessing.pool import ApplyResult
-    from functools import partial
-    
-    def _wrapper():
-        return (result, project_name)
-    
-    return ApplyResult(None, _wrapper, ())
+def _create_fake_async_result(result: bool, project_name: str):
+    """模拟 Pool.apply_async 返回值，便于错误恢复"""
+    class FakeApplyResult:
+        def get(self, timeout=None):
+            return (result, project_name)
+    return FakeApplyResult()
 
 def main():
-    # Root logger config for main process
     logging.basicConfig(
         level=logging.INFO,
         format="[%(levelname)s] %(message)s"
@@ -247,7 +235,6 @@ def main():
     
     logger.info(f"🚀 Starting parallel fuzzing (workers={args.workers}, timeout={args.timeout}s)")
 
-    # Read project list with precise error handling
     try:
         project_path = Path(args.project_list)
         if not project_path.exists():
@@ -270,7 +257,6 @@ def main():
         logger.exception(f"💥 Error reading project list: {e}")
         sys.exit(1)
 
-    # 修改后的并行执行部分
     with Pool(args.workers) as pool:
         async_results = []
         for p in projects:
@@ -278,10 +264,8 @@ def main():
                 async_results.append(pool.apply_async(run_project, (p, args.timeout)))
             except Exception as e:
                 logger.error(f"💥 Failed to schedule project {p}: {e}")
-                # 使用包装器保持类型一致
                 async_results.append(_create_fake_async_result(False, p))
         
-        # 收集结果
         final_results: List[Tuple[bool, str]] = []
         for res in async_results:
             try:
@@ -293,7 +277,6 @@ def main():
                 logger.error(f"💥 Error collecting result: {e}")
                 final_results.append((False, "unknown"))
 
-    # Output results
     failed = [p for success, p in final_results if not success]
     logger.info(f"\n📊 Testing completed: Success {len(projects)-len(failed)}/{len(projects)}")
     if failed:
@@ -307,4 +290,4 @@ def main():
         sys.exit(1)
     except Exception as e:
         print(f"💥 Critical error in main: {e}")
-        sys.exit(1)
\ No newline at end of file
+        sys.exit(1)

From 74f44dc97660aa881991721b2494e056a2cc410a Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 22 Jul 2025 20:32:46 +0000
Subject: [PATCH 025/134] correct some mistakes

---
 fuzz/run_fuzz_target.py | 51 ++++++++++++++++++++++++++---------------
 1 file changed, 33 insertions(+), 18 deletions(-)

diff --git a/fuzz/run_fuzz_target.py b/fuzz/run_fuzz_target.py
index 402089d..326971c 100644
--- a/fuzz/run_fuzz_target.py
+++ b/fuzz/run_fuzz_target.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3 
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """
@@ -17,7 +17,6 @@
 import argparse
 import logging
 import time
-import shutil
 from datetime import datetime
 from pathlib import Path
 from typing import List, Optional, Tuple
@@ -28,6 +27,14 @@
 OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz"
 LOG_DIR = OSS_FUZZ_DIR / "run_logs"
 
+class FakeResult:
+    """模拟ApplyResult的对象，用于在任务调度失败时返回结果"""
+    def __init__(self, result: bool, project_name: str):
+        self.result = (result, project_name)
+    
+    def get(self, timeout=None) -> Tuple[bool, str]:
+        return self.result
+
 def setup_logging(project_name: str) -> logging.Logger:
     """Configure hierarchical logger with file and console handlers"""
     try:
@@ -83,6 +90,7 @@ def run_command(
 
     process = None
     try:
+        # Start process with explicit timeout handling
         process = subprocess.Popen(
             cmd,
             shell=True,
@@ -93,9 +101,11 @@ def run_command(
             errors="replace"
         )
         
+        # Stream output to logger with timeout control
         start_time = time.time()
         while process.poll() is None:
-            if time.time() - start_time > timeout:
+            elapsed = time.time() - start_time
+            if elapsed > timeout:
                 logger.error(f"⌛ Command timed out after {timeout} seconds")
                 process.terminate()
                 try:
@@ -104,24 +114,25 @@ def run_command(
                     process.kill()
                 return False
                 
+            # Read available output
             if process.stdout:
                 line = process.stdout.readline()
                 if line:
                     logger.debug(line.strip())
             else:
-                logger.warning("Process stdout is None")
-                time.sleep(0.1)
+                time.sleep(0.1)  # 减少日志噪音
         
+        # Check exit code
         exit_code = process.returncode
         if exit_code not in [0, *allowed_exit_codes]:
             logger.error(f"❌ Command failed with exit code: {exit_code}")
             return False
         return True
         
-    except FileNotFoundError:
+    except FileNotFoundError as e:
         logger.error(f"🔍 Command not found: {cmd.split()[0]}")
         return False
-    except PermissionError:
+    except PermissionError as e:
         logger.error(f"🔒 Permission denied for command: {cmd}")
         return False
     except subprocess.SubprocessError as e:
@@ -131,6 +142,7 @@ def run_command(
         logger.exception(f"💥 OS error during command execution: {e}")
         return False
     finally:
+        # Ensure process is cleaned up
         if process and process.poll() is None:
             try:
                 process.terminate()
@@ -178,6 +190,7 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]:
     logger.debug(f"📝 Log path: {[h.baseFilename for h in logger.handlers if isinstance(h, logging.FileHandler)]}")
     logger.info("=" * 60)
     
+    # 1. Discover test targets
     try:
         targets = discover_targets(project_name, logger)
         if not targets:
@@ -188,6 +201,7 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]:
         logger.exception(f"💥 Target discovery failed unexpectedly: {e}")
         return (False, project_name)
     
+    # 2. Run all targets
     all_success = True
     for i, target in enumerate(targets, 1):
         try:
@@ -196,8 +210,8 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]:
                 cmd,
                 f"Running target [{i}/{len(targets)}] {target} (timeout={timeout}s)",
                 logger,
-                allowed_exit_codes=[1, 124],
-                timeout=timeout + 300
+                allowed_exit_codes=[1, 124],  # Allow timeout exit codes
+                timeout=timeout + 300  # Add buffer for setup/teardown
             )
             all_success &= success
             if not success:
@@ -206,6 +220,7 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]:
             logger.exception(f"💥 Unexpected error running target {target}: {e}")
             all_success = False
     
+    # 3. Final status
     if all_success:
         logger.info(f"✅ All targets completed successfully for {project_name}")
     else:
@@ -213,14 +228,8 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]:
     
     return (all_success, project_name)
 
-def _create_fake_async_result(result: bool, project_name: str):
-    """模拟 Pool.apply_async 返回值，便于错误恢复"""
-    class FakeApplyResult:
-        def get(self, timeout=None):
-            return (result, project_name)
-    return FakeApplyResult()
-
 def main():
+    # Root logger config for main process
     logging.basicConfig(
         level=logging.INFO,
         format="[%(levelname)s] %(message)s"
@@ -235,6 +244,7 @@ def main():
     
     logger.info(f"🚀 Starting parallel fuzzing (workers={args.workers}, timeout={args.timeout}s)")
 
+    # Read project list with precise error handling
     try:
         project_path = Path(args.project_list)
         if not project_path.exists():
@@ -257,6 +267,7 @@ def main():
         logger.exception(f"💥 Error reading project list: {e}")
         sys.exit(1)
 
+    # Parallel execution with error isolation
     with Pool(args.workers) as pool:
         async_results = []
         for p in projects:
@@ -264,11 +275,14 @@ def main():
                 async_results.append(pool.apply_async(run_project, (p, args.timeout)))
             except Exception as e:
                 logger.error(f"💥 Failed to schedule project {p}: {e}")
-                async_results.append(_create_fake_async_result(False, p))
+                # 使用自定义的FakeResult替代ApplyResult
+                async_results.append(FakeResult(False, p))
         
+        # Collect results with timeout
         final_results: List[Tuple[bool, str]] = []
         for res in async_results:
             try:
+                # 使用双倍超时时间确保结果收集
                 final_results.append(res.get(timeout=args.timeout * 2))
             except TimeoutError:
                 logger.error("⌛ Project execution timed out")
@@ -277,6 +291,7 @@ def main():
                 logger.error(f"💥 Error collecting result: {e}")
                 final_results.append((False, "unknown"))
 
+    # Output results
     failed = [p for success, p in final_results if not success]
     logger.info(f"\n📊 Testing completed: Success {len(projects)-len(failed)}/{len(projects)}")
     if failed:
@@ -290,4 +305,4 @@ def main():
         sys.exit(1)
     except Exception as e:
         print(f"💥 Critical error in main: {e}")
-        sys.exit(1)
+        sys.exit(1)
\ No newline at end of file

From 413c2dd78ba6b63c1ae5a34c9dd7fca36b65f2f2 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 22 Jul 2025 20:45:17 +0000
Subject: [PATCH 026/134] correct

---
 fuzz/run_fuzz_target.py | 49 ++++++++++++++---------------------------
 1 file changed, 17 insertions(+), 32 deletions(-)

diff --git a/fuzz/run_fuzz_target.py b/fuzz/run_fuzz_target.py
index 326971c..2432289 100644
--- a/fuzz/run_fuzz_target.py
+++ b/fuzz/run_fuzz_target.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python3 
 # -*- coding: utf-8 -*-
 
 """
@@ -17,6 +17,7 @@
 import argparse
 import logging
 import time
+import shutil
 from datetime import datetime
 from pathlib import Path
 from typing import List, Optional, Tuple
@@ -27,14 +28,6 @@
 OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz"
 LOG_DIR = OSS_FUZZ_DIR / "run_logs"
 
-class FakeResult:
-    """模拟ApplyResult的对象，用于在任务调度失败时返回结果"""
-    def __init__(self, result: bool, project_name: str):
-        self.result = (result, project_name)
-    
-    def get(self, timeout=None) -> Tuple[bool, str]:
-        return self.result
-
 def setup_logging(project_name: str) -> logging.Logger:
     """Configure hierarchical logger with file and console handlers"""
     try:
@@ -90,7 +83,6 @@ def run_command(
 
     process = None
     try:
-        # Start process with explicit timeout handling
         process = subprocess.Popen(
             cmd,
             shell=True,
@@ -101,11 +93,9 @@ def run_command(
             errors="replace"
         )
         
-        # Stream output to logger with timeout control
         start_time = time.time()
         while process.poll() is None:
-            elapsed = time.time() - start_time
-            if elapsed > timeout:
+            if time.time() - start_time > timeout:
                 logger.error(f"⌛ Command timed out after {timeout} seconds")
                 process.terminate()
                 try:
@@ -114,25 +104,24 @@ def run_command(
                     process.kill()
                 return False
                 
-            # Read available output
             if process.stdout:
                 line = process.stdout.readline()
                 if line:
                     logger.debug(line.strip())
             else:
-                time.sleep(0.1)  # 减少日志噪音
+                logger.warning("Process stdout is None")
+                time.sleep(0.1)
         
-        # Check exit code
         exit_code = process.returncode
         if exit_code not in [0, *allowed_exit_codes]:
             logger.error(f"❌ Command failed with exit code: {exit_code}")
             return False
         return True
         
-    except FileNotFoundError as e:
+    except FileNotFoundError:
         logger.error(f"🔍 Command not found: {cmd.split()[0]}")
         return False
-    except PermissionError as e:
+    except PermissionError:
         logger.error(f"🔒 Permission denied for command: {cmd}")
         return False
     except subprocess.SubprocessError as e:
@@ -142,7 +131,6 @@ def run_command(
         logger.exception(f"💥 OS error during command execution: {e}")
         return False
     finally:
-        # Ensure process is cleaned up
         if process and process.poll() is None:
             try:
                 process.terminate()
@@ -190,7 +178,6 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]:
     logger.debug(f"📝 Log path: {[h.baseFilename for h in logger.handlers if isinstance(h, logging.FileHandler)]}")
     logger.info("=" * 60)
     
-    # 1. Discover test targets
     try:
         targets = discover_targets(project_name, logger)
         if not targets:
@@ -201,7 +188,6 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]:
         logger.exception(f"💥 Target discovery failed unexpectedly: {e}")
         return (False, project_name)
     
-    # 2. Run all targets
     all_success = True
     for i, target in enumerate(targets, 1):
         try:
@@ -210,8 +196,8 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]:
                 cmd,
                 f"Running target [{i}/{len(targets)}] {target} (timeout={timeout}s)",
                 logger,
-                allowed_exit_codes=[1, 124],  # Allow timeout exit codes
-                timeout=timeout + 300  # Add buffer for setup/teardown
+                allowed_exit_codes=[1, 124],
+                timeout=timeout + 300
             )
             all_success &= success
             if not success:
@@ -220,7 +206,6 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]:
             logger.exception(f"💥 Unexpected error running target {target}: {e}")
             all_success = False
     
-    # 3. Final status
     if all_success:
         logger.info(f"✅ All targets completed successfully for {project_name}")
     else:
@@ -228,8 +213,14 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]:
     
     return (all_success, project_name)
 
+def _create_fake_async_result(result: bool, project_name: str):
+    """模拟 Pool.apply_async 返回值，便于错误恢复"""
+    class FakeApplyResult:
+        def get(self, timeout=None):
+            return (result, project_name)
+    return FakeApplyResult()
+
 def main():
-    # Root logger config for main process
     logging.basicConfig(
         level=logging.INFO,
         format="[%(levelname)s] %(message)s"
@@ -244,7 +235,6 @@ def main():
     
     logger.info(f"🚀 Starting parallel fuzzing (workers={args.workers}, timeout={args.timeout}s)")
 
-    # Read project list with precise error handling
     try:
         project_path = Path(args.project_list)
         if not project_path.exists():
@@ -267,7 +257,6 @@ def main():
         logger.exception(f"💥 Error reading project list: {e}")
         sys.exit(1)
 
-    # Parallel execution with error isolation
     with Pool(args.workers) as pool:
         async_results = []
         for p in projects:
@@ -275,14 +264,11 @@ def main():
                 async_results.append(pool.apply_async(run_project, (p, args.timeout)))
             except Exception as e:
                 logger.error(f"💥 Failed to schedule project {p}: {e}")
-                # 使用自定义的FakeResult替代ApplyResult
-                async_results.append(FakeResult(False, p))
+                async_results.append(_create_fake_async_result(False, p))
         
-        # Collect results with timeout
         final_results: List[Tuple[bool, str]] = []
         for res in async_results:
             try:
-                # 使用双倍超时时间确保结果收集
                 final_results.append(res.get(timeout=args.timeout * 2))
             except TimeoutError:
                 logger.error("⌛ Project execution timed out")
@@ -291,7 +277,6 @@ def main():
                 logger.error(f"💥 Error collecting result: {e}")
                 final_results.append((False, "unknown"))
 
-    # Output results
     failed = [p for success, p in final_results if not success]
     logger.info(f"\n📊 Testing completed: Success {len(projects)-len(failed)}/{len(projects)}")
     if failed:

From c8b755dba39b5fa640e9d3c82c4b46e06fdbede1 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 22 Jul 2025 21:53:48 +0000
Subject: [PATCH 027/134] modify discover fuzz target

---
 fuzz/run_fuzz_target.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/fuzz/run_fuzz_target.py b/fuzz/run_fuzz_target.py
index 2432289..20f3e67 100644
--- a/fuzz/run_fuzz_target.py
+++ b/fuzz/run_fuzz_target.py
@@ -139,7 +139,7 @@ def run_command(
                 pass
 
 def discover_targets(project_name: str, logger: logging.Logger) -> List[str]:
-    """Discover available Fuzz targets with precise error handling"""
+    """Discover fuzz targets (fuzz_ prefix, no extension, executable)"""
     out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name
     targets: List[str] = []
     
@@ -150,7 +150,11 @@ def discover_targets(project_name: str, logger: logging.Logger) -> List[str]:
             
         for f in out_dir.iterdir():
             try:
-                if f.is_file() and f.name.startswith("fuzz_") and os.access(f, os.X_OK):
+                # 核心修改：检查无后缀的可执行文件
+                if (f.is_file() and 
+                    f.name.startswith("fuzz_") and 
+                    '.' not in f.name and  # 确保无文件后缀
+                    os.access(f, os.X_OK)):  # 确保可执行权限
                     targets.append(f.name)
             except OSError as e:
                 logger.warning(f"⚠️ Skipping file {f.name} due to access error: {e}")

From 4b92185c6cb84d4226ed862f6e188eb4afd7ec2b Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 22 Jul 2025 22:44:06 +0000
Subject: [PATCH 028/134] modify the oss-fuzz dir

---
 fuzz/build_oss_fuzz.py | 178 +++++++++++++++++++++++------------------
 1 file changed, 100 insertions(+), 78 deletions(-)

diff --git a/fuzz/build_oss_fuzz.py b/fuzz/build_oss_fuzz.py
index e4d2b3b..ba70a62 100644
--- a/fuzz/build_oss_fuzz.py
+++ b/fuzz/build_oss_fuzz.py
@@ -5,105 +5,117 @@
 build_oss_fuzz.py
 
 Parallel build of OSS-Fuzz projects (Docker images and Fuzzer compilation).
-Uses multiprocessing.Pool to distribute projects across multiple CPU cores for concurrent processing.
+Uses multiprocessing.Pool to distribute projects across multiple CPU cores.
 
-Usage: python3 build_oss_fuzz.py [project_list_file] [--sanitizer type] [--workers N]
+Usage: python3 build_oss_fuzz.py [project_list_file] --oss-fuzz-dir /path/to/oss-fuzz \
+    [--sanitizer type] [--workers N]
 Example: python3 fuzz/build_oss_fuzz.py data/valid_projects.txt \
+    --oss-fuzz-dir ./fuzz/oss-fuzz \
     --sanitizer address \
     --workers 8
 """
+
 import os
 import sys
 import subprocess
 import argparse
+import logging
 from pathlib import Path
 from typing import List, Optional, Tuple
 from multiprocessing import Pool, cpu_count
-import logging
 
-# --- Global configuration ---
-HOME_DIR = Path.home()
-OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz"
+class CommandExecutionError(Exception):
+    """Custom command execution exception"""
+    def __init__(self, message: str, exit_code: Optional[int] = None):
+        super().__init__(message)
+        self.exit_code = exit_code
+
 def run_command(
     cmd: str,
-    log_msg: str,
+    oss_fuzz_dir: Path,
     allowed_exit_codes: Optional[List[int]] = None
-) -> bool:
-    """Execute a shell command and stream output to console"""
-    allowed_exit_codes = allowed_exit_codes or []
-
-    logging.info(f"▶️ {log_msg}")
-    logging.debug(f"$ {cmd}")
-
+) -> int:
+    """Execute a command and return the exit code, throws CommandExecutionError on failure"""
+    allowed_exit_codes = allowed_exit_codes or [0]
+    logging.info(f"▶️ Executing command: {cmd}")
+    
     try:
         process = subprocess.Popen(
             f"yes | {cmd}",
             shell=True,
+            cwd=str(oss_fuzz_dir),
             stdout=subprocess.PIPE,
             stderr=subprocess.STDOUT,
             text=True,
             encoding="utf-8",
             errors="replace"
         )
-    except FileNotFoundError:
-        logging.error(f"Command not found: {cmd}")
-        return False
-    except OSError as e:
-        logging.error(f"OS error while executing command: {e}")
-        return False
-    except ValueError as e:
-        logging.error(f"Invalid arguments to Popen: {e}")
-        return False
-
-    try:
-        if process.stdout:
-            for line in iter(process.stdout.readline, ""):
-                sys.stdout.write(line)
-                sys.stdout.flush()
+        
+        # Real-time streaming output processing
+        for line in iter(process.stdout.readline, ''):
+            sys.stdout.write(line)
+            sys.stdout.flush()
+        
         process.wait()
         exit_code = process.returncode
-        if exit_code in [0, *allowed_exit_codes]:
-            logging.info("✅ Command completed successfully")
-            return True
-        logging.error(f"❌ Command failed (exit code: {exit_code})")
-        return False
-    except KeyboardInterrupt:
-        logging.warning("⛔️ Command interrupted by user")
-        process.terminate()
-        return False
-    except Exception as e:
-        logging.exception(f"Unexpected error during process execution: {e}")
-        return False
-
-
-def build_project(project_name: str, sanitizer: str) -> Tuple[bool, str]:
-    """Build workflow for a single project"""
-    os.chdir(OSS_FUZZ_DIR)
-
-    logging.info("=" * 60)
-    logging.info(f"🔨 Starting build for project: {project_name}")
-    logging.info("=" * 60)
+        
+        if exit_code in allowed_exit_codes:
+            return exit_code
+        raise CommandExecutionError(
+            f"Command failed (exit code: {exit_code})", 
+            exit_code=exit_code
+        )
+    
+    except FileNotFoundError as e:
+        raise CommandExecutionError(f"Command not found: {cmd.split()[0]}") from e
+    except OSError as e:
+        raise CommandExecutionError(f"System error: {e}") from e
+    except subprocess.SubprocessError as e:
+        raise CommandExecutionError(f"Subprocess error: {e}") from e
 
-    if not run_command(
-        f"python3 infra/helper.py build_image {project_name}",
-        "Step 1/2: Building Docker image"
-    ):
+def build_project(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> Tuple[bool, str]:
+    """Project build workflow"""
+    try:
+        logging.info("=" * 60)
+        logging.info(f"🔨 Starting build for project: {project_name}")
+        logging.info(f"📁 OSS-Fuzz directory: {oss_fuzz_dir}")
+        logging.info("=" * 60)
+        
+        # Validate paths
+        helper_script = oss_fuzz_dir / "infra" / "helper.py"
+        if not helper_script.exists():
+            raise FileNotFoundError(f"Critical script missing: {helper_script}")
+        
+        # Execute build commands
+        run_command(
+            f"python3 infra/helper.py build_image {project_name}",
+            oss_fuzz_dir
+        )
+        run_command(
+            f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}",
+            oss_fuzz_dir
+        )
+        
+        logging.info(f"✅ Project {project_name} built successfully")
+        return (True, project_name)
+    
+    except CommandExecutionError as e:
+        logging.error(f"❌ Project {project_name} build failed: {str(e)}")
         return (False, project_name)
-
-    if not run_command(
-        f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}",
-        f"Step 2/2: Compiling Fuzzers (sanitizer={sanitizer})"
-    ):
+    except Exception as e:
+        logging.exception(f"🔥 Unhandled exception: {e}")
         return (False, project_name)
 
-    logging.info(f"✅ Project {project_name} build completed")
-    return (True, project_name)
-
 def main():
-    parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Build Tool")
+    parser = argparse.ArgumentParser(description="OSS-Fuzz parallel build tool")
     parser.add_argument("project_list", help="Project list file path")
-    parser.add_argument("--sanitizer", default="address", choices=["address", "memory", "undefined"])
-    parser.add_argument("--workers", type=int, default=cpu_count())
+    parser.add_argument("--oss-fuzz-dir", required=True, type=str, 
+                        help="OSS-Fuzz directory path")
+    parser.add_argument("--sanitizer", default="address", 
+                        choices=["address", "memory", "undefined"],
+                        help="Fuzzer sanitizer type")
+    parser.add_argument("--workers", type=int, default=cpu_count(),
+                        help="Number of parallel worker processes")
     args = parser.parse_args()
 
     logging.basicConfig(
@@ -111,28 +123,38 @@ def main():
         format='[%(levelname)s] [PID:%(process)d] %(message)s'
     )
 
-    if not os.path.isfile(args.project_list):
-        logging.error(f"Project list file not found: {args.project_list}")
-        sys.exit(1)
+    # Process paths
+    oss_fuzz_dir = Path(args.oss_fuzz_dir).resolve()
+    logging.info(f"📁 Using OSS-Fuzz directory: {oss_fuzz_dir}")
 
+    # Read project list
     try:
         with open(args.project_list, "r", encoding="utf-8") as f:
             projects = [line.strip() for line in f if line.strip()]
-    except OSError as e:
-        logging.error(f"OS error while reading project list: {e}")
-        sys.exit(1)
-    except UnicodeDecodeError as e:
-        logging.error(f"Encoding error while reading file: {e}")
+        logging.info(f"📋 Loaded {len(projects)} projects")
+    except Exception as e:
+        logging.error(f"❌ Failed to read project list: {e}")
         sys.exit(1)
 
+    # Parallel build
     with Pool(args.workers) as pool:
-        results = pool.starmap(build_project, [(p, args.sanitizer) for p in projects])
+        results = pool.starmap(
+            build_project, 
+            [(p, args.sanitizer, oss_fuzz_dir) for p in projects]
+        )
 
+    # Output results
     failed = [p for success, p in results if not success]
-    logging.info(f"\n📊 Build completed: Success {len(projects) - len(failed)}/{len(projects)}")
+    logging.info(f"\n📊 Build completed: Successful {len(projects)-len(failed)}/{len(projects)}")
     if failed:
-        logging.warning("❌ Failed projects: " + ", ".join(failed))
+        logging.error("❌ Failed projects: " + ", ".join(failed))
 
 if __name__ == "__main__":
-    main()
-
+    try:
+        main()
+    except KeyboardInterrupt:
+        print("\n🛑 Operation interrupted by user")
+        sys.exit(1)
+    except Exception as e:
+        print(f"💥 Critical error: {e}")
+        sys.exit(1)
\ No newline at end of file

From a8e58e89d2d85832268c0bd0184b048d600ef8fd Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 22 Jul 2025 22:54:07 +0000
Subject: [PATCH 029/134] Redirect the output to an empty device without
 retaining any output

---
 fuzz/build_oss_fuzz.py | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/fuzz/build_oss_fuzz.py b/fuzz/build_oss_fuzz.py
index ba70a62..14b79e7 100644
--- a/fuzz/build_oss_fuzz.py
+++ b/fuzz/build_oss_fuzz.py
@@ -40,22 +40,16 @@ def run_command(
     logging.info(f"▶️ Executing command: {cmd}")
     
     try:
+        # Remove all stdout/stderr capture logic and execute the command directly
         process = subprocess.Popen(
             f"yes | {cmd}",
             shell=True,
             cwd=str(oss_fuzz_dir),
-            stdout=subprocess.PIPE,
-            stderr=subprocess.STDOUT,
-            text=True,
-            encoding="utf-8",
-            errors="replace"
+            # Redirect the output to an empty device without retaining any output
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL
         )
         
-        # Real-time streaming output processing
-        for line in iter(process.stdout.readline, ''):
-            sys.stdout.write(line)
-            sys.stdout.flush()
-        
         process.wait()
         exit_code = process.returncode
         
@@ -86,7 +80,7 @@ def build_project(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> Tupl
         if not helper_script.exists():
             raise FileNotFoundError(f"Critical script missing: {helper_script}")
         
-        # Execute build commands
+        # Execute build commands (The output has been disabled)
         run_command(
             f"python3 infra/helper.py build_image {project_name}",
             oss_fuzz_dir

From ae7e7c126da402dcf899c8cdf8053639e0d30b61 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 22 Jul 2025 23:10:12 +0000
Subject: [PATCH 030/134] add always yes

---
 fuzz/build_images.py | 158 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 158 insertions(+)
 create mode 100644 fuzz/build_images.py

diff --git a/fuzz/build_images.py b/fuzz/build_images.py
new file mode 100644
index 0000000..17c7bfc
--- /dev/null
+++ b/fuzz/build_images.py
@@ -0,0 +1,158 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+build_images.py
+
+Parallel build of OSS-Fuzz Docker images.
+Uses multiprocessing.Pool to distribute projects across multiple CPU cores.
+
+Usage: python3 build_images.py [project_list_file] --oss-fuzz-dir /path/to/oss-fuzz [--workers N]
+Example: python3 fuzz/build_images.py data/valid_projects.txt \
+    --oss-fuzz-dir ./fuzz/oss-fuzz \
+    --workers 4
+"""
+
+import os
+import sys
+import subprocess
+import argparse
+import logging
+import json
+from pathlib import Path
+from typing import List, Optional, Tuple
+from multiprocessing import Pool, cpu_count
+
+class CommandExecutionError(Exception):
+    """Custom command execution exception"""
+    def __init__(self, message: str, exit_code: Optional[int] = None):
+        super().__init__(message)
+        self.exit_code = exit_code
+
+def run_command(
+    cmd: str,
+    oss_fuzz_dir: Path,
+    allowed_exit_codes: Optional[List[int]] = None
+) -> int:
+    """Execute a command and return the exit code, throws CommandExecutionError on failure"""
+    allowed_exit_codes = allowed_exit_codes or [0]
+    logging.info(f"▶️ Executing command: {cmd}")
+    
+    try:
+        process = subprocess.Popen(
+            f"yes | {cmd}",
+            shell=True,
+            cwd=str(oss_fuzz_dir),
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL
+        )
+        
+        process.wait()
+        exit_code = process.returncode
+        
+        if exit_code in allowed_exit_codes:
+            return exit_code
+        raise CommandExecutionError(
+            f"Command failed (exit code: {exit_code})", 
+            exit_code=exit_code
+        )
+    
+    except FileNotFoundError as e:
+        raise CommandExecutionError(f"Command not found: {cmd.split()[0]}") from e
+    except OSError as e:
+        raise CommandExecutionError(f"System error: {e}") from e
+    except subprocess.SubprocessError as e:
+        raise CommandExecutionError(f"Subprocess error: {e}") from e
+
+def build_image(project_name: str, oss_fuzz_dir: Path) -> Tuple[bool, str]:
+    """Docker image build workflow"""
+    try:
+        logging.info("=" * 60)
+        logging.info(f"🔨 Starting Docker build for project: {project_name}")
+        logging.info(f"📁 OSS-Fuzz directory: {oss_fuzz_dir}")
+        logging.info("=" * 60)
+        
+        # Validate paths
+        helper_script = oss_fuzz_dir / "infra" / "helper.py"
+        if not helper_script.exists():
+            raise FileNotFoundError(f"Critical script missing: {helper_script}")
+        
+        # Execute image build command
+        run_command(
+            f"python3 infra/helper.py build_image {project_name}",
+            oss_fuzz_dir
+        )
+        
+        logging.info(f"✅ Docker image for {project_name} built successfully")
+        return (True, project_name)
+    
+    except CommandExecutionError as e:
+        logging.error(f"❌ Docker build for {project_name} failed: {str(e)}")
+        return (False, project_name)
+    except Exception as e:
+        logging.exception(f"🔥 Unhandled exception: {e}")
+        return (False, project_name)
+
+def main():
+    parser = argparse.ArgumentParser(description="OSS-Fuzz Docker Image Builder")
+    parser.add_argument("project_list", help="Project list file path")
+    parser.add_argument("--oss-fuzz-dir", required=True, type=str, 
+                        help="OSS-Fuzz directory path")
+    parser.add_argument("--workers", type=int, default=cpu_count(),
+                        help="Number of parallel worker processes")
+    parser.add_argument("--output", default="image_build_results.json",
+                        help="Output file for build results")
+    args = parser.parse_args()
+
+    logging.basicConfig(
+        level=logging.INFO,
+        format='[%(levelname)s] [PID:%(process)d] %(message)s'
+    )
+
+    # Process paths
+    oss_fuzz_dir = Path(args.oss_fuzz_dir).resolve()
+    logging.info(f"📁 Using OSS-Fuzz directory: {oss_fuzz_dir}")
+
+    # Read project list
+    try:
+        with open(args.project_list, "r", encoding="utf-8") as f:
+            projects = [line.strip() for line in f if line.strip()]
+        logging.info(f"📋 Loaded {len(projects)} projects")
+    except Exception as e:
+        logging.error(f"❌ Failed to read project list: {e}")
+        sys.exit(1)
+
+    # Parallel image builds
+    with Pool(args.workers) as pool:
+        results = pool.starmap(
+            build_image, 
+            [(p, oss_fuzz_dir) for p in projects]
+        )
+
+    # Output results
+    build_results = {project: success for success, project in results}
+    failed = [p for p in projects if not build_results[p]]
+    
+    logging.info(f"\n📊 Docker image builds completed: "
+                 f"Successful {len(projects)-len(failed)}/{len(projects)}")
+    
+    if failed:
+        logging.error("❌ Failed projects: " + ", ".join(failed))
+    
+    # Save build results to JSON file
+    try:
+        with open(args.output, "w") as f:
+            json.dump(build_results, f)
+        logging.info(f"💾 Build results saved to: {args.output}")
+    except Exception as e:
+        logging.error(f"❌ Failed to save build results: {e}")
+
+if __name__ == "__main__":
+    try:
+        main()
+    except KeyboardInterrupt:
+        print("\n🛑 Operation interrupted by user")
+        sys.exit(1)
+    except Exception as e:
+        print(f"💥 Critical error: {e}")
+        sys.exit(1)
\ No newline at end of file

From 2dc8f97914ac65e326ee5ad7a54f8907185d6412 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Wed, 23 Jul 2025 00:19:51 +0000
Subject: [PATCH 031/134] split the build script

---
 fuzz/build_fuzzers.py | 160 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 160 insertions(+)
 create mode 100644 fuzz/build_fuzzers.py

diff --git a/fuzz/build_fuzzers.py b/fuzz/build_fuzzers.py
new file mode 100644
index 0000000..e8d2806
--- /dev/null
+++ b/fuzz/build_fuzzers.py
@@ -0,0 +1,160 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+build_fuzzers.py
+
+OSS-Fuzz模糊测试器并行构建工具
+
+用法: python3 build_fuzzers.py [项目列表文件] --oss-fuzz-dir /path/to/oss-fuzz \
+    [--sanitizer type] [--workers N]
+示例: python3 fuzz/build_fuzzers.py data/valid_projects.txt \
+    --oss-fuzz-dir ./fuzz/oss-fuzz \
+    --sanitizer address \
+    --workers 8
+"""
+
+import os
+import sys
+import subprocess
+import argparse
+import logging
+from pathlib import Path
+from typing import List, Optional, Tuple
+from multiprocessing import Pool, cpu_count
+
+class BuildError(Exception):
+    """基础构建异常"""
+    def __init__(self, message: str, project: str = "", exit_code: int = None):
+        super().__init__(message)
+        self.project = project
+        self.exit_code = exit_code
+
+class CommandError(BuildError):
+    """命令执行异常"""
+    pass
+
+class PathError(BuildError):
+    """路径相关异常"""
+    pass
+
+def run_command(
+    cmd: str,
+    oss_fuzz_dir: Path,
+    allowed_exit_codes: Optional[List[int]] = None
+) -> int:
+    """执行命令并返回退出码"""
+    allowed_exit_codes = allowed_exit_codes or [0]
+    logging.info(f"▶️ 执行命令: {cmd}")
+    
+    try:
+        process = subprocess.Popen(
+            cmd,
+            shell=True,
+            cwd=str(oss_fuzz_dir),
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL
+        )
+        
+        process.wait()
+        exit_code = process.returncode
+        
+        if exit_code in allowed_exit_codes:
+            return exit_code
+        raise CommandError(f"命令失败", exit_code=exit_code)
+    
+    except FileNotFoundError as e:
+        raise CommandError(f"命令不存在: {cmd.split()[0]}") from e
+    except OSError as e:
+        raise CommandError(f"系统错误: {e}") from e
+    except subprocess.SubprocessError as e:
+        raise CommandError(f"子进程错误: {e}") from e
+
+def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> Tuple[bool, str]:
+    """模糊测试器构建工作流"""
+    try:
+        logging.info(f"🔧 开始构建模糊测试器: {project_name}")
+        
+        # 验证路径
+        helper_script = oss_fuzz_dir / "infra" / "helper.py"
+        if not helper_script.exists():
+            raise PathError(f"关键脚本缺失: {helper_script}")
+        
+        # 执行模糊测试器构建命令
+        run_command(
+            f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}",
+            oss_fuzz_dir
+        )
+        
+        logging.info(f"✅ 模糊测试器构建成功: {project_name}")
+        return (True, project_name)
+    
+    except BuildError as e:
+        logging.error(f"❌ 模糊测试器构建失败: {project_name} - {str(e)}")
+        return (False, project_name)
+    except Exception as e:
+        logging.exception(f"🔥 未处理异常: {project_name}")
+        return (False, project_name)
+
+def main():
+    parser = argparse.ArgumentParser(description="OSS-Fuzz模糊测试器构建工具")
+    parser.add_argument("project_list", help="项目列表文件路径")
+    parser.add_argument("--oss-fuzz-dir", required=True, type=str, 
+                        help="OSS-Fuzz目录路径")
+    parser.add_argument("--sanitizer", default="address", 
+                        choices=["address", "memory", "undefined"],
+                        help="模糊测试器检测器类型")
+    parser.add_argument("--workers", type=int, default=cpu_count(),
+                        help="并行工作进程数")
+    args = parser.parse_args()
+
+    logging.basicConfig(
+        level=logging.INFO,
+        format='[%(levelname)s] %(message)s'
+    )
+
+    # 处理路径
+    oss_fuzz_dir = Path(args.oss_fuzz_dir).resolve()
+    logging.info(f"📁 OSS-Fuzz目录: {oss_fuzz_dir}")
+
+    # 读取项目列表
+    try:
+        with open(args.project_list, "r", encoding="utf-8") as f:
+            projects = [line.strip() for line in f if line.strip()]
+        logging.info(f"📋 加载项目数: {len(projects)}")
+    except Exception as e:
+        logging.error(f"❌ 读取项目列表失败: {e}")
+        sys.exit(1)
+
+    # 并行模糊测试器构建
+    with Pool(args.workers) as pool:
+        results = pool.starmap(
+            build_fuzzers, 
+            [(p, args.sanitizer, oss_fuzz_dir) for p in projects]
+        )
+
+    # 输出结果
+    fuzzer_results = {project: success for success, project in results}
+    failed = [p for p in projects if not fuzzer_results[p]]
+    
+    success_count = len(projects) - len(failed)
+    logging.info(f"\n📊 构建完成: {success_count}/{len(projects)}")
+    
+    if failed:
+        logging.error("❌ 失败项目: " + ", ".join(failed))
+    
+    # 生成整体状态报告
+    logging.info("\n📊 整体构建状态:")
+    for project in projects:
+        status = "✅" if fuzzer_results[project] else "❌"
+        logging.info(f"  {project}: {status}")
+
+if __name__ == "__main__":
+    try:
+        main()
+    except KeyboardInterrupt:
+        print("\n🛑 操作被中断")
+        sys.exit(1)
+    except Exception as e:
+        print(f"💥 严重错误: {e}")
+        sys.exit(1)

From ebb68c902ffc60ec108b4c6acb8aca4f8557733c Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Wed, 23 Jul 2025 01:00:25 +0000
Subject: [PATCH 032/134] split the build script

---
 fuzz/build_fuzzers.py    | 148 ++++++++++++++++++++++++---------------
 image_build_results.json |   1 +
 2 files changed, 93 insertions(+), 56 deletions(-)
 create mode 100644 image_build_results.json

diff --git a/fuzz/build_fuzzers.py b/fuzz/build_fuzzers.py
index e8d2806..c02b436 100644
--- a/fuzz/build_fuzzers.py
+++ b/fuzz/build_fuzzers.py
@@ -4,12 +4,15 @@
 """
 build_fuzzers.py
 
-OSS-Fuzz模糊测试器并行构建工具
+Parallel build of OSS-Fuzz fuzzers.
+Requires Docker images to be built first (using build_images.py).
 
-用法: python3 build_fuzzers.py [项目列表文件] --oss-fuzz-dir /path/to/oss-fuzz \
+Usage: python3 build_fuzzers.py [project_list_file] --oss-fuzz-dir /path/to/oss-fuzz \
+    --image-results image_build_results.json \
     [--sanitizer type] [--workers N]
-示例: python3 fuzz/build_fuzzers.py data/valid_projects.txt \
+Example: python3 build_fuzzers.py data/valid_projects.txt \
     --oss-fuzz-dir ./fuzz/oss-fuzz \
+    --image-results image_build_results.json \
     --sanitizer address \
     --workers 8
 """
@@ -19,33 +22,25 @@
 import subprocess
 import argparse
 import logging
+import json
 from pathlib import Path
 from typing import List, Optional, Tuple
 from multiprocessing import Pool, cpu_count
 
-class BuildError(Exception):
-    """基础构建异常"""
-    def __init__(self, message: str, project: str = "", exit_code: int = None):
+class CommandExecutionError(Exception):
+    """Custom command execution exception"""
+    def __init__(self, message: str, exit_code: Optional[int] = None):
         super().__init__(message)
-        self.project = project
         self.exit_code = exit_code
 
-class CommandError(BuildError):
-    """命令执行异常"""
-    pass
-
-class PathError(BuildError):
-    """路径相关异常"""
-    pass
-
 def run_command(
     cmd: str,
     oss_fuzz_dir: Path,
     allowed_exit_codes: Optional[List[int]] = None
 ) -> int:
-    """执行命令并返回退出码"""
+    """Execute a command and return the exit code, throws CommandExecutionError on failure"""
     allowed_exit_codes = allowed_exit_codes or [0]
-    logging.info(f"▶️ 执行命令: {cmd}")
+    logging.info(f"▶️ Executing command: {cmd}")
     
     try:
         process = subprocess.Popen(
@@ -61,100 +56,141 @@ def run_command(
         
         if exit_code in allowed_exit_codes:
             return exit_code
-        raise CommandError(f"命令失败", exit_code=exit_code)
+        raise CommandExecutionError(
+            f"Command failed (exit code: {exit_code})", 
+            exit_code=exit_code
+        )
     
     except FileNotFoundError as e:
-        raise CommandError(f"命令不存在: {cmd.split()[0]}") from e
+        raise CommandExecutionError(f"Command not found: {cmd.split()[0]}") from e
     except OSError as e:
-        raise CommandError(f"系统错误: {e}") from e
+        raise CommandExecutionError(f"System error: {e}") from e
     except subprocess.SubprocessError as e:
-        raise CommandError(f"子进程错误: {e}") from e
+        raise CommandExecutionError(f"Subprocess error: {e}") from e
 
 def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> Tuple[bool, str]:
-    """模糊测试器构建工作流"""
+    """Fuzzer build workflow"""
     try:
-        logging.info(f"🔧 开始构建模糊测试器: {project_name}")
+        logging.info("=" * 60)
+        logging.info(f"🔧 Starting fuzzer build for project: {project_name}")
+        logging.info(f"📁 OSS-Fuzz directory: {oss_fuzz_dir}")
+        logging.info("=" * 60)
         
-        # 验证路径
+        # Validate paths
         helper_script = oss_fuzz_dir / "infra" / "helper.py"
         if not helper_script.exists():
-            raise PathError(f"关键脚本缺失: {helper_script}")
+            raise FileNotFoundError(f"Critical script missing: {helper_script}")
         
-        # 执行模糊测试器构建命令
+        # Execute fuzzer build command
         run_command(
             f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}",
             oss_fuzz_dir
         )
         
-        logging.info(f"✅ 模糊测试器构建成功: {project_name}")
+        logging.info(f"✅ Fuzzers for {project_name} built successfully")
         return (True, project_name)
     
-    except BuildError as e:
-        logging.error(f"❌ 模糊测试器构建失败: {project_name} - {str(e)}")
+    except CommandExecutionError as e:
+        logging.error(f"❌ Fuzzer build for {project_name} failed: {str(e)}")
         return (False, project_name)
     except Exception as e:
-        logging.exception(f"🔥 未处理异常: {project_name}")
+        logging.exception(f"🔥 Unhandled exception: {e}")
         return (False, project_name)
 
 def main():
-    parser = argparse.ArgumentParser(description="OSS-Fuzz模糊测试器构建工具")
-    parser.add_argument("project_list", help="项目列表文件路径")
+    parser = argparse.ArgumentParser(description="OSS-Fuzz Fuzzer Builder")
+    parser.add_argument("project_list", help="Project list file path")
     parser.add_argument("--oss-fuzz-dir", required=True, type=str, 
-                        help="OSS-Fuzz目录路径")
+                        help="OSS-Fuzz directory path")
     parser.add_argument("--sanitizer", default="address", 
                         choices=["address", "memory", "undefined"],
-                        help="模糊测试器检测器类型")
+                        help="Fuzzer sanitizer type")
     parser.add_argument("--workers", type=int, default=cpu_count(),
-                        help="并行工作进程数")
+                        help="Number of parallel worker processes")
+    parser.add_argument("--image-results", required=True,
+                        help="JSON file with image build results from build_images.py")
     args = parser.parse_args()
 
     logging.basicConfig(
         level=logging.INFO,
-        format='[%(levelname)s] %(message)s'
+        format='[%(levelname)s] [PID:%(process)d] %(message)s'
     )
 
-    # 处理路径
+    # Process paths
     oss_fuzz_dir = Path(args.oss_fuzz_dir).resolve()
-    logging.info(f"📁 OSS-Fuzz目录: {oss_fuzz_dir}")
+    logging.info(f"📁 Using OSS-Fuzz directory: {oss_fuzz_dir}")
 
-    # 读取项目列表
+    # Read project list
     try:
         with open(args.project_list, "r", encoding="utf-8") as f:
-            projects = [line.strip() for line in f if line.strip()]
-        logging.info(f"📋 加载项目数: {len(projects)}")
+            all_projects = [line.strip() for line in f if line.strip()]
+        logging.info(f"📋 Loaded {len(all_projects)} projects from list")
+    except Exception as e:
+        logging.error(f"❌ Failed to read project list: {e}")
+        sys.exit(1)
+
+    # Load image build results
+    try:
+        with open(args.image_results, "r") as f:
+            image_results = json.load(f)
+        logging.info(f"📋 Loaded image build results from: {args.image_results}")
     except Exception as e:
-        logging.error(f"❌ 读取项目列表失败: {e}")
+        logging.error(f"❌ Failed to load image build results: {e}")
+        sys.exit(1)
+
+    # Filter projects with successful image builds
+    projects_to_build = [p for p in all_projects if p in image_results and image_results[p]]
+    
+    if not projects_to_build:
+        logging.error("❌ No projects with successful image builds found")
         sys.exit(1)
+        
+    skipped = len(all_projects) - len(projects_to_build)
+    logging.info(f"🔍 Found {len(projects_to_build)} projects with successful image builds "
+                 f"({skipped} skipped due to image build failures)")
 
-    # 并行模糊测试器构建
+    # Parallel fuzzer builds
     with Pool(args.workers) as pool:
         results = pool.starmap(
             build_fuzzers, 
-            [(p, args.sanitizer, oss_fuzz_dir) for p in projects]
+            [(p, args.sanitizer, oss_fuzz_dir) for p in projects_to_build]
         )
 
-    # 输出结果
+    # Output results
     fuzzer_results = {project: success for success, project in results}
-    failed = [p for p in projects if not fuzzer_results[p]]
+    failed = [p for p in projects_to_build if not fuzzer_results[p]]
     
-    success_count = len(projects) - len(failed)
-    logging.info(f"\n📊 构建完成: {success_count}/{len(projects)}")
+    logging.info(f"\n📊 Fuzzer builds completed: "
+                 f"Successful {len(projects_to_build)-len(failed)}/{len(projects_to_build)}")
     
     if failed:
-        logging.error("❌ 失败项目: " + ", ".join(failed))
+        logging.error("❌ Failed fuzzer builds: " + ", ".join(failed))
     
-    # 生成整体状态报告
-    logging.info("\n📊 整体构建状态:")
-    for project in projects:
-        status = "✅" if fuzzer_results[project] else "❌"
+    # Generate overall status report
+    overall_results = {}
+    for project in all_projects:
+        status = "❌"
+        if project in image_results and image_results[project]:
+            if project in fuzzer_results and fuzzer_results[project]:
+                status = "✅"
+            elif project in fuzzer_results:
+                status = "❌ (fuzzer failed)"
+            else:
+                status = "❌ (image ok but not built)"
+        else:
+            status = "❌ (image failed)"
+        overall_results[project] = status
+
+    logging.info("\n📊 Overall build status:")
+    for project, status in overall_results.items():
         logging.info(f"  {project}: {status}")
 
 if __name__ == "__main__":
     try:
         main()
     except KeyboardInterrupt:
-        print("\n🛑 操作被中断")
+        print("\n🛑 Operation interrupted by user")
         sys.exit(1)
     except Exception as e:
-        print(f"💥 严重错误: {e}")
-        sys.exit(1)
+        print(f"💥 Critical error: {e}")
+        sys.exit(1)
\ No newline at end of file
diff --git a/image_build_results.json b/image_build_results.json
new file mode 100644
index 0000000..a5c3591
--- /dev/null
+++ b/image_build_results.json
@@ -0,0 +1 @@
+{"abseil-py": true, "adal": true, "aiohttp": true, "aniso8601": true, "ansible": true, "argcomplete": true, "arrow-py": true, "asn1crypto": true}
\ No newline at end of file

From 5b12877a6ad2639402fe4bcd8fc1715010ac7994 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Wed, 23 Jul 2025 20:55:57 +0000
Subject: [PATCH 033/134] build scripts test successfully

---
 fuzz/{build_oss_fuzz.py => build_oss_fuzz_whole.py} | 6 +++---
 image_build_results.json                            | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)
 rename fuzz/{build_oss_fuzz.py => build_oss_fuzz_whole.py} (96%)

diff --git a/fuzz/build_oss_fuzz.py b/fuzz/build_oss_fuzz_whole.py
similarity index 96%
rename from fuzz/build_oss_fuzz.py
rename to fuzz/build_oss_fuzz_whole.py
index 14b79e7..59d3bea 100644
--- a/fuzz/build_oss_fuzz.py
+++ b/fuzz/build_oss_fuzz_whole.py
@@ -2,14 +2,14 @@
 # -*- coding: utf-8 -*-
 
 """
-build_oss_fuzz.py
+build_oss_fuzz_whole.py
 
 Parallel build of OSS-Fuzz projects (Docker images and Fuzzer compilation).
 Uses multiprocessing.Pool to distribute projects across multiple CPU cores.
 
-Usage: python3 build_oss_fuzz.py [project_list_file] --oss-fuzz-dir /path/to/oss-fuzz \
+Usage: python3 build_oss_fuzz_whole.py [project_list_file] --oss-fuzz-dir /path/to/oss-fuzz \
     [--sanitizer type] [--workers N]
-Example: python3 fuzz/build_oss_fuzz.py data/valid_projects.txt \
+Example: python3 fuzz/build_oss_fuzz_whole.py data/valid_projects.txt \
     --oss-fuzz-dir ./fuzz/oss-fuzz \
     --sanitizer address \
     --workers 8
diff --git a/image_build_results.json b/image_build_results.json
index a5c3591..93d383e 100644
--- a/image_build_results.json
+++ b/image_build_results.json
@@ -1 +1 @@
-{"abseil-py": true, "adal": true, "aiohttp": true, "aniso8601": true, "ansible": true, "argcomplete": true, "arrow-py": true, "asn1crypto": true}
\ No newline at end of file
+{"abseil-py": true, "adal": true, "aiohttp": true, "aniso8601": true, "ansible": true, "argcomplete": true, "arrow-py": true, "asn1crypto": true, "asteval": true, "astroid": true, "asttokens": true, "attrs": true, "autoflake": true, "autopep8": true, "azure-sdk-for-python": true, "babel": true, "black": true, "botocore": true, "bottleneck": true, "bz2file": true, "cachetools": true, "cffi": true, "chardet": true, "charset_normalizer": true, "click": true, "cloud-custodian": true, "configparser": true, "connexion": true, "coveragepy": true, "croniter": true, "cryptography": true, "cssselect": true, "dask": true, "decorator": true, "defusedxml": true, "digest": true, "dill": true, "distlib": true, "dnspython": true, "docutils": true, "ecdsa-python": true, "et-xmlfile": true, "face": true, "filelock": true, "filesystem_spec": true, "flask": true, "flask-jwt-extended": true, "flask-restx": true, "flask-wtf": true, "fonttools": true, "ftfy": true, "g-api-auth-httplib2": true, "g-api-auth-library-python": true, "g-api-pubsub": true, "g-api-py-api-common-protos": true, "g-api-py-oauthlib": true, "g-api-python-bigquery-storage": true, "g-api-python-client": true, "g-api-python-cloud-core": true, "g-api-python-firestore": true, "g-api-python-tasks": true, "g-api-resource-manager": true, "g-api-resumable-media-python": true, "g-api-secret-manager": true, "g-apis-py-api-core": true, "gast": true, "gc-iam": true, "gcloud-error-py": true, "g-cloud-logging-py": true, "gcp-python-cloud-storage": true, "genshi": true, "gitdb": true, "glom": true, "gprof2dot": true, "g-py-bigquery": true, "g-py-crc32c": true, "grpc-py": true, "gunicorn": true, "h11": true, "h5py": true, "hiredis-py": true, "html2text": true, "html5lib-python": true, "httpcore": true, "httpretty": false, "httpx": true, "idna": true, "ijson": true, "importlib_metadata": true, "iniconfig": true, "ipaddress": true, "ipykernel": true, "ipython": true, "isodate": true, "itsdangerous": true, "jedi": true, "jinja2": true, "jmespathpy": true, "joblib": true, "jsmin": true, "jupyter-nbconvert": true, "jupyter_server": true, "kafka": true, "keras": false, "kiwisolver": true, "lark-parser": true, "libcst": true, "looker-sdk": true, "lxml": true, "mako": true, "markupsafe": true, "matplotlib": true, "mccabe": true, "mdit-py-plugins": true, "mdurl": true, "more-itertools": false, "mrab-regex": true, "msal": true, "msgpack-python": true, "multidict": true, "mutagen": true, "nbclassic": true, "nbformat": true, "netaddr-py": true, "networkx": true, "ntlm2": true, "ntlm-auth": true, "numexpr": true, "numpy": true, "oauth2": true, "oauthlib": true, "olefile": true, "openapi-schema-validator": true, "opencensus-python": true, "openpyxl": true, "opt_einsum": true, "oracle-py-cx": true, "orjson": true, "oscrypto": true, "packaging": true, "pandas": true, "paramiko": true, "parse": true, "parsimonious": true, "pasta": true, "pathlib2": true, "pdoc": true, "pem": true, "pendulum": true, "pip": true, "ply": true, "protobuf-python": true, "proto-plus-python": true, "psqlparse": true, "psutil": true, "psycopg2": true, "pyasn1": true, "pyasn1-modules": true, "pycparser": true, "pycrypto": true, "pydantic": true, "pydateutil": true, "pygments": true, "pyjson5": true, "pyjwt": true, "pymysql": true, "pynacl": true, "pyodbc": false, "pyparsing": false, "pyrsistent": true, "py-serde": true, "pytables": true, "pytest-py": true, "python3-openid": true, "python-ecdsa": true, "python-email-validator": true, "python-fastjsonschema": true, "python-future": true, "python-graphviz": true, "python-hyperlink": true, "python-jose": true, "python-lz4": true, "python-markdown": true, "python-markdownify": true, "python-nameparser": true, "python-nvd3": true, "python-pathspec": true, "python-prompt-toolkit": true, "python-pypdf": true, "python-rison": true, "python-rsa": true, "python-tabulate": true, "pytz": true, "pyxdg": true, "pyyaml": true, "pyzmq": true, "redis-py": true, "requests": true, "retry": true, "rfc3967": true, "rich": true, "sacremoses": true, "scikit-learn": true, "scipy": true, "setuptools": true, "sigstore-python": true, "simplejson": true, "six": true, "smart_open": true, "soupsieve": true, "sqlalchemy_jsonfield": true, "sqlalchemy-utils": true, "sqlparse": true, "stack_data": true, "tensorflow-addons": false, "tinycss2": true, "toml": true, "tomlkit": true, "toolbelt": true, "toolz": true, "tqdm": true, "typing_extensions": true, "underscore": true, "uritemplate": true, "urlextract": true, "urllib3": true, "validators": true, "w3lib": true, "websocket-client": true, "wheel": true, "wtforms": true, "xlrd": true, "yarl": true, "zipp": true}
\ No newline at end of file

From 40588d456c8e515463298a70982d3a95a74a1aad Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Wed, 23 Jul 2025 22:20:19 +0000
Subject: [PATCH 034/134] build.py

---
 fuzz/build.py         | 329 ++++++++++++++++++++++++++++++++++++++++++
 fuzz/build_fuzzers.py | 128 +++++++++++-----
 2 files changed, 418 insertions(+), 39 deletions(-)
 create mode 100644 fuzz/build.py

diff --git a/fuzz/build.py b/fuzz/build.py
new file mode 100644
index 0000000..e87f33b
--- /dev/null
+++ b/fuzz/build.py
@@ -0,0 +1,329 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+OSS-Fuzz Build System
+
+Combines Docker image building and fuzzer building capabilities.
+Supports three modes: 'image', 'fuzzer', or 'both'.
+
+Usage:
+  Build images: 
+    python3 build.py --mode image [project_list] --oss-fuzz-dir /path/to/oss-fuzz
+  
+  Build fuzzers: 
+    python3 build.py --mode fuzzer [project_list] --oss-fuzz-dir /path/to/oss-fuzz --image-results results.json
+  
+  Build both: 
+    python3 build.py --mode both [project_list] --oss-fuzz-dir /path/to/oss-fuzz --sanitizer address
+
+Example:
+    python3 ./fuzz/build.py --mode both data/valid_projects.txt --oss-fuzz-dir ./fuzz/oss-fuzz --sanitizer address --workers 8
+
+"""
+
+import os
+import sys
+import subprocess
+import argparse
+import logging
+import json
+from pathlib import Path
+from typing import List, Optional, Tuple, Dict
+from multiprocessing import Pool, cpu_count
+
+# ========================================================================================
+# Custom Exceptions
+# ========================================================================================
+class BuildError(Exception):
+    """Base exception for build failures"""
+    def __init__(self, message: str, project: str = "", exit_code: int = None):
+        super().__init__(message)
+        self.project = project
+        self.exit_code = exit_code
+
+class CommandError(BuildError):
+    """Exception for command execution failures"""
+    pass
+
+class PathError(BuildError):
+    """Exception for missing paths or files"""
+    pass
+
+class ConfigError(BuildError):
+    """Exception for configuration errors"""
+    pass
+
+# ========================================================================================
+# Helper Functions
+# ========================================================================================
+def run_command(
+    cmd: str,
+    oss_fuzz_dir: Path,
+    project: str = "",
+    allowed_exit_codes: Optional[List[int]] = None,
+    skip_yes: bool = False
+) -> int:
+    """Execute a command and return the exit code"""
+    allowed_exit_codes = allowed_exit_codes or [0]
+    cmd_str = f"yes | {cmd}" if not skip_yes else cmd
+    logging.debug(f"Executing command [{project}]: {cmd_str}")
+    
+    try:
+        process = subprocess.Popen(
+            cmd_str if skip_yes else f"yes | {cmd}",
+            shell=True,
+            cwd=str(oss_fuzz_dir),
+            stdout=subprocess.PIPE if skip_yes else subprocess.DEVNULL,
+            stderr=subprocess.PIPE if skip_yes else subprocess.DEVNULL,
+            text=True if skip_yes else False
+        )
+        
+        if skip_yes:
+            stdout, stderr = process.communicate()
+        else:
+            process.wait()
+        exit_code = process.returncode
+        
+        if exit_code in allowed_exit_codes:
+            return exit_code
+            
+        error_msg = f"Command failed (exit code: {exit_code})"
+        if project:
+            error_msg += f" for project: {project}"
+            
+        if skip_yes and stderr.strip():
+            error_msg += f"\nError output:\n{stderr.strip()}"
+            
+        if skip_yes and stdout.strip():
+            error_msg += f"\nOutput:\n{stdout.strip()}"
+            
+        raise CommandError(error_msg, project=project, exit_code=exit_code)
+    
+    except FileNotFoundError as e:
+        raise CommandError(f"Command not found: {cmd.split()[0]}", project=project) from e
+    except OSError as e:
+        raise CommandError(f"System error: {e}", project=project) from e
+    except subprocess.SubprocessError as e:
+        raise CommandError(f"Subprocess error: {e}", project=project) from e
+
+# ========================================================================================
+# Build Functions
+# ========================================================================================
+def build_image(project_name: str, oss_fuzz_dir: Path) -> Tuple[bool, str]:
+    """Docker image build workflow"""
+    try:
+        logging.info(f"Building Docker image: {project_name}")
+        
+        # Validate paths
+        helper_script = oss_fuzz_dir / "infra" / "helper.py"
+        if not helper_script.exists():
+            raise PathError(f"Missing helper script: {helper_script}", project=project_name)
+        
+        # Execute image build command
+        run_command(
+            f"python3 infra/helper.py build_image {project_name}",
+            oss_fuzz_dir,
+            project=project_name
+        )
+        
+        logging.info(f"✅ Docker image built: {project_name}")
+        return (True, project_name)
+    
+    except CommandError as e:
+        logging.error(f"❌ Docker build failed: {project_name} - {str(e)}")
+        return (False, project_name)
+    except Exception as e:
+        logging.error(f"💥 Unhandled exception: {project_name} - {repr(e)}")
+        return (False, project_name)
+
+def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> Tuple[bool, str]:
+    """Fuzzer build workflow"""
+    try:
+        logging.info(f"Building fuzzers: {project_name} ({sanitizer} sanitizer)")
+        
+        # Validate paths
+        helper_script = oss_fuzz_dir / "infra" / "helper.py"
+        if not helper_script.exists():
+            raise PathError(f"Missing helper script: {helper_script}", project=project_name)
+        
+        # Execute fuzzer build command
+        run_command(
+            f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}",
+            oss_fuzz_dir,
+            project=project_name,
+            skip_yes=True
+        )
+        
+        logging.info(f"✅ Fuzzers built: {project_name}")
+        return (True, project_name)
+    
+    except BuildError as e:
+        logging.error(f"❌ Fuzzer build failed: {project_name} - {str(e)}")
+        return (False, project_name)
+    except Exception as e:
+        logging.error(f"💥 Unhandled exception: {project_name} - {repr(e)}")
+        return (False, project_name)
+
+# ========================================================================================
+# Main Execution
+# ========================================================================================
+def load_projects(file_path: Path) -> List[str]:
+    """Load project list from file"""
+    if not file_path.exists():
+        raise FileNotFoundError(f"Project list not found: {file_path}")
+    
+    with open(file_path, "r", encoding="utf-8") as f:
+        projects = [line.strip() for line in f if line.strip()]
+    
+    if not projects:
+        raise ConfigError("Project list is empty")
+    
+    logging.info(f"Loaded {len(projects)} projects from {file_path}")
+    return projects
+
+def execute_builds(
+    func,
+    args_list: List[tuple],
+    worker_count: int,
+    success_msg: str,
+    failure_msg: str
+) -> Tuple[Dict[str, bool], List[str]]:
+    """Execute build tasks in parallel and return results"""
+    results = {}
+    with Pool(worker_count) as pool:
+        for success, project in pool.starmap(func, args_list):
+            results[project] = success
+
+    failed = [p for p, success in results.items() if not success]
+    success_count = len(results) - len(failed)
+    
+    if failed:
+        logging.error(f"\n❌ {failure_msg}: {len(failed)}/{len(results)} projects")
+    logging.info(f"\n📊 {success_msg}: {success_count}/{len(results)} projects")
+    
+    return results, failed
+
+def main():
+    parser = argparse.ArgumentParser(description="OSS-Fuzz Build System")
+    parser.add_argument("project_list", help="Project list file path")
+    parser.add_argument("--oss-fuzz-dir", required=True, type=str, 
+                        help="OSS-Fuzz directory path")
+    parser.add_argument("--mode", choices=['image', 'fuzzer', 'both'], default='both',
+                        help="Build mode: 'image', 'fuzzer', or 'both'")
+    parser.add_argument("--workers", type=int, default=cpu_count(),
+                        help="Number of parallel worker processes")
+    parser.add_argument("--sanitizer", default="address", 
+                        choices=["address", "memory", "undefined"],
+                        help="Fuzzer sanitizer type")
+    parser.add_argument("--image-results", default="image_build_results.json",
+                        help="Image build results file (JSON)")
+    parser.add_argument("--log-level", default="INFO", 
+                        choices=["DEBUG", "INFO", "WARNING", "ERROR"],
+                        help="Logging detail level")
+    args = parser.parse_args()
+
+    # Configure logging
+    logging.basicConfig(
+        level=getattr(logging, args.log_level),
+        format='[%(levelname)s] [PID:%(process)d] %(message)s'
+    )
+    
+    oss_fuzz_dir = Path(args.oss_fuzz_dir).resolve()
+    project_file = Path(args.project_list).resolve()
+    output_file = Path(args.image_results)
+    
+    # Sanity checks
+    if not oss_fuzz_dir.exists():
+        logging.critical(f"OSS-Fuzz directory not found: {oss_fuzz_dir}")
+        sys.exit(1)
+    
+    # Load projects
+    try:
+        projects = load_projects(project_file)
+    except Exception as e:
+        logging.critical(f"❌ Failed to load projects: {e}")
+        sys.exit(1)
+
+    # Image building workflow
+    image_results = {}
+    if args.mode in ['image', 'both']:
+        logging.info("\n" + "="*60)
+        logging.info(f"Starting Docker image builds for {len(projects)} projects")
+        logging.info("="*60 + "\n")
+        
+        image_args = [(p, oss_fuzz_dir) for p in projects]
+        image_results, image_failures = execute_builds(
+            build_image,
+            image_args,
+            args.workers,
+            "✅ Docker image builds succeeded",
+            "🚫 Docker image builds failed"
+        )
+        
+        # Save image build results
+        try:
+            with output_file.open("w") as f:
+                json.dump(image_results, f)
+            logging.info(f"💾 Image build results saved to: {output_file}")
+        except Exception as e:
+            logging.error(f"❌ Failed to save image results: {e}")
+    
+    # Fuzzer building workflow
+    fuzzer_results = {}
+    if args.mode in ['fuzzer', 'both']:
+        logging.info("\n" + "="*60)
+        logging.info(f"Starting fuzzer builds for {len(projects)} projects ({args.sanitizer} sanitizer)")
+        logging.info("="*60 + "\n")
+        
+        # Load image results for fuzzer mode
+        if args.mode == 'fuzzer':
+            try:
+                with output_file.open("r") as f:
+                    image_results = json.load(f)
+                logging.info(f"📋 Loaded image build results from: {output_file}")
+            except Exception as e:
+                logging.critical(f"❌ Failed to load image results: {e}")
+                sys.exit(1)
+        
+        # Filter projects with successful image builds
+        fuzz_projects = [p for p in projects if image_results.get(p, False)]
+        if not fuzz_projects:
+            logging.critical("❌ No projects with successful image builds")
+            sys.exit(1)
+        
+        fuzzer_args = [(p, args.sanitizer, oss_fuzz_dir) for p in fuzz_projects]
+        fuzzer_results, fuzzer_failures = execute_builds(
+            build_fuzzers,
+            fuzzer_args,
+            args.workers,
+            "✅ Fuzzer builds succeeded",
+            "🚫 Fuzzer builds failed"
+        )
+    
+    # Final summary
+    logging.info("\n" + "="*60)
+    logging.info("Build Summary")
+    logging.info("="*60)
+    
+    if args.mode in ['image', 'both']:
+        image_success = sum(1 for r in image_results.values() if r)
+        logging.info(f"📦 Docker Images: {image_success}/{len(projects)} succeeded")
+    
+    if args.mode in ['fuzzer', 'both']:
+        if args.mode == 'both':
+            fuzz_projects = list(fuzzer_results.keys())
+        fuzzer_success = sum(1 for r in fuzzer_results.values() if r)
+        logging.info(f"🔧 Fuzzers: {fuzzer_success}/{len(fuzz_projects)} succeeded")
+
+    logging.info("="*60)
+
+if __name__ == "__main__":
+    try:
+        main()
+    except KeyboardInterrupt:
+        print("\n🛑 Operation interrupted by user")
+        sys.exit(1)
+    except Exception as e:
+        logging.critical(f"💥 Critical error: {str(e)}")
+        sys.exit(1)
\ No newline at end of file
diff --git a/fuzz/build_fuzzers.py b/fuzz/build_fuzzers.py
index c02b436..1dfd825 100644
--- a/fuzz/build_fuzzers.py
+++ b/fuzz/build_fuzzers.py
@@ -10,7 +10,7 @@
 Usage: python3 build_fuzzers.py [project_list_file] --oss-fuzz-dir /path/to/oss-fuzz \
     --image-results image_build_results.json \
     [--sanitizer type] [--workers N]
-Example: python3 build_fuzzers.py data/valid_projects.txt \
+Example: python3 fuzz/build_fuzzers.py data/valid_projects.txt \
     --oss-fuzz-dir ./fuzz/oss-fuzz \
     --image-results image_build_results.json \
     --sanitizer address \
@@ -27,18 +27,32 @@
 from typing import List, Optional, Tuple
 from multiprocessing import Pool, cpu_count
 
-class CommandExecutionError(Exception):
-    """Custom command execution exception"""
-    def __init__(self, message: str, exit_code: Optional[int] = None):
+class BuildError(Exception):
+    """Base exception for build failures"""
+    def __init__(self, message: str, project: str = "", exit_code: int = None):
         super().__init__(message)
+        self.project = project
         self.exit_code = exit_code
 
+class CommandError(BuildError):
+    """Exception for command execution failures"""
+    pass
+
+class PathError(BuildError):
+    """Exception for missing paths or files"""
+    pass
+
+class ConfigError(BuildError):
+    """Exception for configuration errors"""
+    pass
+
 def run_command(
     cmd: str,
     oss_fuzz_dir: Path,
+    project: str = "",
     allowed_exit_codes: Optional[List[int]] = None
 ) -> int:
-    """Execute a command and return the exit code, throws CommandExecutionError on failure"""
+    """Execute a command and return the exit code"""
     allowed_exit_codes = allowed_exit_codes or [0]
     logging.info(f"▶️ Executing command: {cmd}")
     
@@ -47,54 +61,67 @@ def run_command(
             cmd,
             shell=True,
             cwd=str(oss_fuzz_dir),
-            stdout=subprocess.DEVNULL,
-            stderr=subprocess.DEVNULL
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True
         )
         
-        process.wait()
+        stdout, stderr = process.communicate()
         exit_code = process.returncode
         
         if exit_code in allowed_exit_codes:
             return exit_code
-        raise CommandExecutionError(
-            f"Command failed (exit code: {exit_code})", 
-            exit_code=exit_code
-        )
+            
+        # 构建详细的错误信息
+        error_msg = f"Command failed (exit code: {exit_code})"
+        if project:
+            error_msg += f" for project: {project}"
+            
+        if stderr.strip():
+            error_msg += f"\nError output:\n{stderr.strip()}"
+            
+        if stdout.strip():
+            error_msg += f"\nOutput:\n{stdout.strip()}"
+            
+        raise CommandError(error_msg, project=project, exit_code=exit_code)
     
     except FileNotFoundError as e:
-        raise CommandExecutionError(f"Command not found: {cmd.split()[0]}") from e
+        raise CommandError(f"Command not found: {cmd.split()[0]}", project=project) from e
     except OSError as e:
-        raise CommandExecutionError(f"System error: {e}") from e
+        raise CommandError(f"System error: {e}", project=project) from e
     except subprocess.SubprocessError as e:
-        raise CommandExecutionError(f"Subprocess error: {e}") from e
+        raise CommandError(f"Subprocess error: {e}", project=project) from e
 
 def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> Tuple[bool, str]:
     """Fuzzer build workflow"""
     try:
         logging.info("=" * 60)
-        logging.info(f"🔧 Starting fuzzer build for project: {project_name}")
+        logging.info(f"🔧 Building fuzzers for: {project_name}")
         logging.info(f"📁 OSS-Fuzz directory: {oss_fuzz_dir}")
         logging.info("=" * 60)
         
         # Validate paths
         helper_script = oss_fuzz_dir / "infra" / "helper.py"
         if not helper_script.exists():
-            raise FileNotFoundError(f"Critical script missing: {helper_script}")
+            raise PathError(f"Missing helper script: {helper_script}", project=project_name)
         
         # Execute fuzzer build command
         run_command(
             f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}",
-            oss_fuzz_dir
+            oss_fuzz_dir,
+            project=project_name
         )
         
-        logging.info(f"✅ Fuzzers for {project_name} built successfully")
+        logging.info(f"✅ Fuzzers built: {project_name}")
         return (True, project_name)
     
-    except CommandExecutionError as e:
-        logging.error(f"❌ Fuzzer build for {project_name} failed: {str(e)}")
+    except BuildError as e:
+        logging.error(f"❌ Build failed: {project_name}")
+        logging.error(f"   Reason: {str(e)}")
         return (False, project_name)
     except Exception as e:
-        logging.exception(f"🔥 Unhandled exception: {e}")
+        logging.error(f"🔥 Unhandled exception: {project_name}")
+        logging.exception(f"   Exception details: {e}")
         return (False, project_name)
 
 def main():
@@ -113,7 +140,7 @@ def main():
 
     logging.basicConfig(
         level=logging.INFO,
-        format='[%(levelname)s] [PID:%(process)d] %(message)s'
+        format='[%(levelname)s] %(message)s'
     )
 
     # Process paths
@@ -122,32 +149,53 @@ def main():
 
     # Read project list
     try:
-        with open(args.project_list, "r", encoding="utf-8") as f:
+        project_file = Path(args.project_list)
+        if not project_file.exists():
+            raise FileNotFoundError(f"Project list file not found: {project_file}")
+            
+        with open(project_file, "r", encoding="utf-8") as f:
             all_projects = [line.strip() for line in f if line.strip()]
-        logging.info(f"📋 Loaded {len(all_projects)} projects from list")
+            
+        if not all_projects:
+            raise ConfigError("Project list is empty")
+            
+        logging.info(f"📋 Loaded {len(all_projects)} projects")
     except Exception as e:
         logging.error(f"❌ Failed to read project list: {e}")
         sys.exit(1)
 
     # Load image build results
     try:
-        with open(args.image_results, "r") as f:
+        image_results_file = Path(args.image_results)
+        if not image_results_file.exists():
+            raise FileNotFoundError(f"Image results file not found: {image_results_file}")
+            
+        with open(image_results_file, "r") as f:
             image_results = json.load(f)
-        logging.info(f"📋 Loaded image build results from: {args.image_results}")
+            
+        if not isinstance(image_results, dict):
+            raise ConfigError("Image results should be a JSON object")
+            
+        logging.info(f"📋 Loaded image build results: {args.image_results}")
+    except json.JSONDecodeError as e:
+        logging.error(f"❌ Failed to parse image build results: {e}")
+        sys.exit(1)
     except Exception as e:
         logging.error(f"❌ Failed to load image build results: {e}")
         sys.exit(1)
 
     # Filter projects with successful image builds
     projects_to_build = [p for p in all_projects if p in image_results and image_results[p]]
+    image_failures = [p for p in all_projects if p not in image_results or not image_results[p]]
     
     if not projects_to_build:
-        logging.error("❌ No projects with successful image builds found")
+        logging.error("❌ No projects with successful image builds")
+        if image_failures:
+            logging.error(f"   Projects with image build failures: {', '.join(image_failures[:10])}{'...' if len(image_failures) > 10 else ''}")
         sys.exit(1)
         
     skipped = len(all_projects) - len(projects_to_build)
-    logging.info(f"🔍 Found {len(projects_to_build)} projects with successful image builds "
-                 f"({skipped} skipped due to image build failures)")
+    logging.info(f"🔍 Building {len(projects_to_build)} projects (skipped {skipped} due to image failures)")
 
     # Parallel fuzzer builds
     with Pool(args.workers) as pool:
@@ -160,12 +208,14 @@ def main():
     fuzzer_results = {project: success for success, project in results}
     failed = [p for p in projects_to_build if not fuzzer_results[p]]
     
-    logging.info(f"\n📊 Fuzzer builds completed: "
-                 f"Successful {len(projects_to_build)-len(failed)}/{len(projects_to_build)}")
+    success_count = len(projects_to_build) - len(failed)
+    logging.info(f"\n📊 Build completed: {success_count}/{len(projects_to_build)}")
     
     if failed:
-        logging.error("❌ Failed fuzzer builds: " + ", ".join(failed))
-    
+        logging.error(f"❌ Failed builds ({len(failed)} projects):")
+        for project in failed:
+            logging.error(f"   - {project}")
+
     # Generate overall status report
     overall_results = {}
     for project in all_projects:
@@ -174,14 +224,14 @@ def main():
             if project in fuzzer_results and fuzzer_results[project]:
                 status = "✅"
             elif project in fuzzer_results:
-                status = "❌ (fuzzer failed)"
+                status = "❌ (fuzzer)"
             else:
-                status = "❌ (image ok but not built)"
+                status = "❌ (not built)"
         else:
-            status = "❌ (image failed)"
+            status = "❌ (image)"
         overall_results[project] = status
 
-    logging.info("\n📊 Overall build status:")
+    logging.info("\n📊 Overall status:")
     for project, status in overall_results.items():
         logging.info(f"  {project}: {status}")
 
@@ -189,7 +239,7 @@ def main():
     try:
         main()
     except KeyboardInterrupt:
-        print("\n🛑 Operation interrupted by user")
+        print("\n🛑 Operation interrupted")
         sys.exit(1)
     except Exception as e:
         print(f"💥 Critical error: {e}")

From 5b52393098fd2c3e149724c9f148f56622d58fc1 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Wed, 23 Jul 2025 22:20:51 +0000
Subject: [PATCH 035/134] collect targets first and then run

---
 fuzz/run_fuzz_all_targets.py | 280 +++++++++++++++++++++++++++++++++++
 fuzz/run_fuzz_target.py      | 125 ++++++----------
 2 files changed, 328 insertions(+), 77 deletions(-)
 create mode 100644 fuzz/run_fuzz_all_targets.py

diff --git a/fuzz/run_fuzz_all_targets.py b/fuzz/run_fuzz_all_targets.py
new file mode 100644
index 0000000..d3ea3d8
--- /dev/null
+++ b/fuzz/run_fuzz_all_targets.py
@@ -0,0 +1,280 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+run_fuzz_all_targets.py
+
+该脚本采用两阶段方法进行模糊测试：
+1. 发现阶段：首先遍历所有指定的项目，收集每一个项目中所有可执行的模糊测试目标 (fuzz target)。
+2. 执行阶段：然后创建一个包含所有 (项目, target) 对的任务池，并使用多进程并行执行所有任务。
+
+这种方法可以最大化 CPU 利用率，并提供更清晰的整体进度。
+
+用法: python3 run_fuzz_all_targets.py [project_list_file] [--timeout seconds] [--workers N]
+示例: python3 fuzz/run_fuzz_all_targets.py data/valid_projects.txt --timeout 60 --workers 4
+"""
+
+import os
+import sys
+import subprocess
+import argparse
+import logging
+import time
+from datetime import datetime
+from pathlib import Path
+from typing import Optional, List, Tuple
+from multiprocessing import Pool, cpu_count
+
+# --- 全局配置 ---
+HOME_DIR = Path.home()
+OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz"
+LOG_DIR = OSS_FUZZ_DIR / "run_logs2"
+
+
+def run_command(
+    cmd: str,
+    log_msg: str,
+    logger: logging.Logger,
+    allowed_exit_codes: Optional[List[int]] = None,
+    timeout: int = 3600  # 默认1小时超时
+) -> bool:
+    """使用实时日志记录和精确的错误处理来执行命令"""
+    allowed_exit_codes = allowed_exit_codes or []
+    logger.info(f"▶️ {log_msg}...")
+    logger.debug(f"   $ {cmd}")
+
+    process = None
+    try:
+        process = subprocess.Popen(
+            cmd,
+            shell=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True,
+            encoding="utf-8",
+            errors="replace"
+        )
+
+        start_time = time.time()
+        while process.poll() is None:
+            # 检查命令是否超时
+            if time.time() - start_time > timeout:
+                logger.error(f"⌛ 命令在 {timeout} 秒后超时")
+                process.terminate()
+                try:
+                    process.wait(timeout=5)
+                except subprocess.TimeoutExpired:
+                    process.kill()
+                return False
+
+            if process.stdout:
+                line = process.stdout.readline()
+                if line:
+                    logger.debug(line.strip())
+            else:
+                # 在某些情况下 stdout 可能暂时为 None
+                time.sleep(0.1)
+
+        exit_code = process.returncode
+        if exit_code not in [0, *allowed_exit_codes]:
+            logger.error(f"❌ 命令执行失败，退出码: {exit_code}")
+            return False
+        return True
+
+    except FileNotFoundError:
+        logger.error(f"🔍 命令未找到: {cmd.split()[0]}")
+        return False
+    except PermissionError:
+        logger.error(f"🔒 执行命令权限不足: {cmd}")
+        return False
+    except subprocess.SubprocessError as e:
+        logger.exception(f"💥 子进程错误: {e}")
+        return False
+    except OSError as e:
+        logger.exception(f"💥 执行命令时发生操作系统错误: {e}")
+        return False
+    finally:
+        if process and process.poll() is None:
+            try:
+                process.terminate()
+                process.wait(timeout=5)
+            except Exception:
+                pass
+
+
+def discover_targets(project_name: str, logger: logging.Logger) -> List[str]:
+    """发现项目的 Fuzz Targets (以 'fuzz_' 开头，无扩展名，且可执行)"""
+    out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name
+    targets: List[str] = []
+
+    if not out_dir.is_dir():
+        logger.warning(f"项目 {project_name} 的构建输出目录不存在: {out_dir}")
+        return targets
+
+    try:
+        for f in out_dir.iterdir():
+            try:
+                if (f.is_file() and
+                        f.name.startswith("fuzz_") and
+                        '.' not in f.name and
+                        os.access(f, os.X_OK)):
+                    targets.append(f.name)
+            except OSError as e:
+                logger.warning(f"⚠️ 检查文件 {f.name} 时出错，已跳过: {e}")
+
+    except PermissionError:
+        logger.error(f"🔒 访问目录权限不足: {out_dir}")
+    except OSError as e:
+        logger.exception(f"💥 发现 Target 时发生操作系统错误: {e}")
+
+    return targets
+
+
+def run_single_target(project_name: str, target_name: str, timeout: int) -> Tuple[bool, str, str]:
+    """为单个 (项目, target) 对执行模糊测试工作流"""
+    task_id = f"{project_name}_{target_name}"
+    logger = logging.getLogger(task_id)
+
+    try:
+        # 为每个任务配置独立的日志记录器
+        logger.setLevel(logging.DEBUG)
+        LOG_DIR.mkdir(parents=True, exist_ok=True)
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        log_file = LOG_DIR / f"run_{task_id}_{timestamp}.log"
+        file_handler = logging.FileHandler(log_file, encoding="utf-8")
+        formatter = logging.Formatter(
+            "%(asctime)s [PID:%(process)d] %(levelname)s: %(message)s",
+            datefmt="%Y-%m-%d %H:%M:%S"
+        )
+        file_handler.setFormatter(formatter)
+        logger.addHandler(file_handler)
+
+        os.chdir(OSS_FUZZ_DIR)
+
+    except (OSError, PermissionError) as e:
+        # 如果日志设置失败，直接打印到控制台
+        print(f"❌ 任务 {task_id} 初始化时发生严重错误: {e}")
+        return False, project_name, target_name
+
+    logger.info(f"🚀 开始测试 -> 项目: {project_name}, Target: {target_name}")
+    try:
+        cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}"
+        success = run_command(
+            cmd,
+            f"运行 Target '{target_name}' (超时={timeout}s)",
+            logger,
+            allowed_exit_codes=[1, 124],  # 1=发现崩溃, 124=超时，都视为模糊测试的正常退出
+            timeout=timeout + 300  # 增加300秒的额外缓冲时间给 helper.py
+        )
+
+        if success:
+            logger.info(f"✅ Target '{target_name}' 运行完成。")
+        else:
+            logger.error(f"❌ Target '{target_name}' 运行失败。")
+
+        return success, project_name, target_name
+
+    except Exception as e:
+        logger.exception(f"💥 运行 target '{target_name}' 时发生意外错误: {e}")
+        return False, project_name, target_name
+    finally:
+        # 清理日志处理器以释放文件句柄
+        for handler in logger.handlers[:]:
+            handler.close()
+            logger.removeHandler(handler)
+
+
+def main():
+    # 主进程日志配置
+    logging.basicConfig(
+        level=logging.INFO,
+        format="[%(levelname)s] %(message)s",
+        stream=sys.stdout
+    )
+    logger = logging.getLogger("Main")
+
+    parser = argparse.ArgumentParser(description="OSS-Fuzz 并行模糊测试工具")
+    parser.add_argument("project_list", help="包含项目名称列表的文件路径")
+    parser.add_argument("--timeout", type=int, default=60, help="每个 Fuzz Target 的运行超时时间 (秒)")
+    parser.add_argument("--workers", type=int, default=cpu_count(), help="并行执行的工作进程数")
+    args = parser.parse_args()
+
+    # --- 1. 读取项目列表 ---
+    try:
+        project_path = Path(args.project_list)
+        with open(project_path, "r", encoding="utf-8") as f:
+            projects = [line.strip() for line in f if line.strip()]
+        logger.info(f"📋 从 {project_path.name} 加载了 {len(projects)} 个项目。")
+    except FileNotFoundError:
+        logger.error(f"❌ 项目列表文件未找到: {args.project_list}")
+        sys.exit(1)
+    except (OSError, PermissionError) as e:
+        logger.exception(f"💥 读取项目列表时出错: {e}")
+        sys.exit(1)
+
+    # --- 2. 发现阶段: 收集所有项目的 Fuzz Targets ---
+    logger.info("\n" + "=" * 20 + " 阶段 1: 发现所有 Fuzz Targets " + "=" * 20)
+    all_fuzz_tasks = []
+    try:
+        original_cwd = Path.cwd()
+        os.chdir(OSS_FUZZ_DIR)
+        for project_name in projects:
+            targets = discover_targets(project_name, logger)
+            if targets:
+                logger.info(f"🔍 在项目 '{project_name}' 中发现 {len(targets)} 个 targets: {', '.join(targets)}")
+                for target in targets:
+                    all_fuzz_tasks.append((project_name, target))
+            else:
+                logger.warning(f"⚠️ 在项目 '{project_name}' 中未找到任何 Fuzz Targets。")
+        os.chdir(original_cwd)
+    except FileNotFoundError:
+        logger.error(f"❌ OSS-Fuzz 目录不存在: {OSS_FUZZ_DIR}")
+        sys.exit(1)
+    except Exception as e:
+        logger.exception(f"💥 在发现阶段发生未知错误: {e}")
+        sys.exit(1)
+
+
+    if not all_fuzz_tasks:
+        logger.info("🤷 未发现任何可执行的 Fuzz Targets。程序退出。")
+        sys.exit(0)
+
+    # --- 3. 执行阶段: 并行运行所有 Fuzzing 任务 ---
+    logger.info(f"\n✅ 发现阶段完成。共找到 {len(all_fuzz_tasks)} 个模糊测试任务。")
+    logger.info("=" * 20 + " 阶段 2: 并行执行 Fuzzing " + "=" * 23)
+    logger.info(f"🚀 使用 {args.workers} 个工作进程开始并行测试 (每个 Target 超时: {args.timeout}s)...")
+
+    tasks_with_args = [(p, t, args.timeout) for p, t in all_fuzz_tasks]
+    results = []
+    
+    with Pool(args.workers) as pool:
+        try:
+            results = pool.starmap(run_single_target, tasks_with_args)
+        except Exception as e:
+            logger.error(f"💥 并行执行过程中发生严重错误: {e}")
+            pool.terminate()
+            pool.join()
+
+    # --- 4. 汇总阶段 ---
+    logger.info("\n" + "=" * 20 + " 阶段 3: 结果汇总 " + "=" * 28)
+    failed_tasks = [(p, t) for success, p, t in results if not success]
+    total_tasks = len(all_fuzz_tasks)
+    failed_count = len(failed_tasks)
+    success_count = total_tasks - failed_count
+
+    logger.info(f"📊 Fuzzing 完成: 成功 {success_count}/{total_tasks}, 失败 {failed_count}/{total_tasks}")
+    if failed_tasks:
+        logger.error("❌ 以下 Fuzz Targets 运行失败:")
+        for project, target in failed_tasks:
+            logger.error(f"  - 项目: {project}, Target: {target}")
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except KeyboardInterrupt:
+        print("\n🛑 操作被用户中断。")
+        sys.exit(1)
+    except Exception as e:
+        print(f"\n💥 主程序发生致命错误: {e}")
+        sys.exit(1)
\ No newline at end of file
diff --git a/fuzz/run_fuzz_target.py b/fuzz/run_fuzz_target.py
index 20f3e67..ee47b63 100644
--- a/fuzz/run_fuzz_target.py
+++ b/fuzz/run_fuzz_target.py
@@ -17,63 +17,21 @@
 import argparse
 import logging
 import time
-import shutil
 from datetime import datetime
 from pathlib import Path
-from typing import List, Optional, Tuple
-from multiprocessing import Pool, cpu_count, current_process
+from typing import Optional
+from multiprocessing import Pool, cpu_count
 
 # --- Global configuration ---
 HOME_DIR = Path.home()
 OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz"
 LOG_DIR = OSS_FUZZ_DIR / "run_logs"
 
-def setup_logging(project_name: str) -> logging.Logger:
-    """Configure hierarchical logger with file and console handlers"""
-    try:
-        LOG_DIR.mkdir(parents=True, exist_ok=True)
-        timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
-        log_file = LOG_DIR / f"run_{project_name}_{timestamp}.log"
-        
-        # Create process-specific logger
-        logger = logging.getLogger(f"{project_name}.{current_process().name}")
-        logger.setLevel(logging.DEBUG)
-        
-        # File handler (all levels)
-        file_handler = logging.FileHandler(log_file, encoding="utf-8")
-        file_handler.setLevel(logging.DEBUG)
-        file_formatter = logging.Formatter(
-            "%(asctime)s [PID:%(process)d] %(levelname)s: %(message)s",
-            datefmt="%Y-%m-%d %H:%M:%S"
-        )
-        file_handler.setFormatter(file_formatter)
-        
-        # Console handler (INFO+ only)
-        console_handler = logging.StreamHandler()
-        console_handler.setLevel(logging.INFO)
-        console_formatter = logging.Formatter(
-            "[%(levelname)s] %(message)s"
-        )
-        console_handler.setFormatter(console_formatter)
-        
-        logger.addHandler(file_handler)
-        logger.addHandler(console_handler)
-        
-        # Capture uncaught exceptions
-        sys.excepthook = lambda exc_type, exc_value, exc_traceback: (
-            logger.critical("Unhandled exception", exc_info=(exc_type, exc_value, exc_traceback))
-        )
-        
-        return logger
-    except (PermissionError, OSError) as e:
-        print(f"❌ Critical logging setup error: {e}")
-        sys.exit(1)
-
 def run_command(
     cmd: str, 
     log_msg: str, 
     logger: logging.Logger,
-    allowed_exit_codes: Optional[List[int]] = None,
+    allowed_exit_codes: Optional[list[int]] = None,
     timeout: int = 3600  # 1 hour default timeout
 ) -> bool:
     """Execute command with real-time logging and precise error handling"""
@@ -138,10 +96,11 @@ def run_command(
             except:
                 pass
 
-def discover_targets(project_name: str, logger: logging.Logger) -> List[str]:
+
+def discover_targets(project_name: str, logger: logging.Logger) -> list[str]:
     """Discover fuzz targets (fuzz_ prefix, no extension, executable)"""
     out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name
-    targets: List[str] = []
+    targets: list[str] = []
     
     try:
         if not out_dir.exists():
@@ -150,11 +109,10 @@ def discover_targets(project_name: str, logger: logging.Logger) -> List[str]:
             
         for f in out_dir.iterdir():
             try:
-                # 核心修改：检查无后缀的可执行文件
                 if (f.is_file() and 
                     f.name.startswith("fuzz_") and 
-                    '.' not in f.name and  # 确保无文件后缀
-                    os.access(f, os.X_OK)):  # 确保可执行权限
+                    '.' not in f.name and 
+                    os.access(f, os.X_OK)):
                     targets.append(f.name)
             except OSError as e:
                 logger.warning(f"⚠️ Skipping file {f.name} due to access error: {e}")
@@ -168,10 +126,33 @@ def discover_targets(project_name: str, logger: logging.Logger) -> List[str]:
     
     return targets
 
-def run_project(project_name: str, timeout: int) -> Tuple[bool, str]:
+
+def run_project(project_name: str, timeout: int) -> tuple[bool, str]:
     """Testing workflow for a single project with precise error handling"""
     try:
-        logger = setup_logging(project_name)
+        
+        logger = logging.getLogger(project_name)
+        logger.setLevel(logging.DEBUG)
+        
+        # 创建日志文件
+        LOG_DIR.mkdir(parents=True, exist_ok=True)
+        timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
+        log_file = LOG_DIR / f"run_{project_name}_{timestamp}.log"
+        file_handler = logging.FileHandler(log_file, encoding="utf-8")
+        
+        # 配置日志格式
+        formatter = logging.Formatter(
+            "%(asctime)s [PID:%(process)d] %(levelname)s: %(message)s",
+            datefmt="%Y-%m-%d %H:%M:%S"
+        )
+        file_handler.setFormatter(formatter)
+        logger.addHandler(file_handler)
+        
+        # 添加控制台输出
+        console_handler = logging.StreamHandler()
+        console_handler.setFormatter(logging.Formatter("[%(levelname)s] %(message)s"))
+        logger.addHandler(console_handler)
+        
         os.chdir(OSS_FUZZ_DIR)
     except (OSError, PermissionError) as e:
         print(f"❌ Critical error initializing project {project_name}: {e}")
@@ -179,7 +160,6 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]:
     
     logger.info("=" * 60)
     logger.info(f"🚀 Starting testing for project: {project_name}")
-    logger.debug(f"📝 Log path: {[h.baseFilename for h in logger.handlers if isinstance(h, logging.FileHandler)]}")
     logger.info("=" * 60)
     
     try:
@@ -215,16 +195,16 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]:
     else:
         logger.error(f"❌ One or more targets failed for {project_name}")
     
+    # 清理日志处理器
+    for handler in logger.handlers[:]:
+        handler.close()
+        logger.removeHandler(handler)
+    
     return (all_success, project_name)
 
-def _create_fake_async_result(result: bool, project_name: str):
-    """模拟 Pool.apply_async 返回值，便于错误恢复"""
-    class FakeApplyResult:
-        def get(self, timeout=None):
-            return (result, project_name)
-    return FakeApplyResult()
 
 def main():
+    # 主进程日志配置
     logging.basicConfig(
         level=logging.INFO,
         format="[%(levelname)s] %(message)s"
@@ -262,30 +242,21 @@ def main():
         sys.exit(1)
 
     with Pool(args.workers) as pool:
-        async_results = []
-        for p in projects:
-            try:
-                async_results.append(pool.apply_async(run_project, (p, args.timeout)))
-            except Exception as e:
-                logger.error(f"💥 Failed to schedule project {p}: {e}")
-                async_results.append(_create_fake_async_result(False, p))
-        
-        final_results: List[Tuple[bool, str]] = []
-        for res in async_results:
-            try:
-                final_results.append(res.get(timeout=args.timeout * 2))
-            except TimeoutError:
-                logger.error("⌛ Project execution timed out")
-                final_results.append((False, "unknown"))
-            except Exception as e:
-                logger.error(f"💥 Error collecting result: {e}")
-                final_results.append((False, "unknown"))
+        try:
+            # 使用starmap同步执行所有任务
+            final_results = pool.starmap(run_project, [(p, args.timeout) for p in projects])
+        except Exception as e:
+            logger.error(f"💥 Parallel execution failed: {e}")
+            # 出错时返回所有项目失败状态
+            final_results = [(False, p) for p in projects]
 
+    # 汇总结果
     failed = [p for success, p in final_results if not success]
     logger.info(f"\n📊 Testing completed: Success {len(projects)-len(failed)}/{len(projects)}")
     if failed:
         logger.error("❌ Failed projects: " + ", ".join(failed))
 
+
 if __name__ == "__main__":
     try:
         main()

From 8285e3f93360cb74a95d02c249be1f1112ce3631 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Wed, 23 Jul 2025 22:29:25 +0000
Subject: [PATCH 036/134] list, tuple, ptional

---
 fuzz/build.py | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/fuzz/build.py b/fuzz/build.py
index e87f33b..ebaf087 100644
--- a/fuzz/build.py
+++ b/fuzz/build.py
@@ -18,8 +18,10 @@
     python3 build.py --mode both [project_list] --oss-fuzz-dir /path/to/oss-fuzz --sanitizer address
 
 Example:
-    python3 ./fuzz/build.py --mode both data/valid_projects.txt --oss-fuzz-dir ./fuzz/oss-fuzz --sanitizer address --workers 8
-
+    python3 ./fuzz/build.py --mode both data/valid_projects.txt \
+        --oss-fuzz-dir ./fuzz/oss-fuzz \
+        --sanitizer address \
+        --workers 8
 """
 
 import os
@@ -29,8 +31,7 @@
 import logging
 import json
 from pathlib import Path
-from typing import List, Optional, Tuple, Dict
-from multiprocessing import Pool, cpu_count
+from returns.maybe import Maybe
 
 # ========================================================================================
 # Custom Exceptions
@@ -61,11 +62,11 @@ def run_command(
     cmd: str,
     oss_fuzz_dir: Path,
     project: str = "",
-    allowed_exit_codes: Optional[List[int]] = None,
+    allowed_exit_codes: Maybe[list[int]] = Maybe.empty,
     skip_yes: bool = False
 ) -> int:
     """Execute a command and return the exit code"""
-    allowed_exit_codes = allowed_exit_codes or [0]
+    allowed_exit_codes = allowed_exit_codes.or_else([0])
     cmd_str = f"yes | {cmd}" if not skip_yes else cmd
     logging.debug(f"Executing command [{project}]: {cmd_str}")
     
@@ -110,7 +111,7 @@ def run_command(
 # ========================================================================================
 # Build Functions
 # ========================================================================================
-def build_image(project_name: str, oss_fuzz_dir: Path) -> Tuple[bool, str]:
+def build_image(project_name: str, oss_fuzz_dir: Path) -> tuple[bool, str]:
     """Docker image build workflow"""
     try:
         logging.info(f"Building Docker image: {project_name}")
@@ -137,7 +138,7 @@ def build_image(project_name: str, oss_fuzz_dir: Path) -> Tuple[bool, str]:
         logging.error(f"💥 Unhandled exception: {project_name} - {repr(e)}")
         return (False, project_name)
 
-def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> Tuple[bool, str]:
+def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> tuple[bool, str]:
     """Fuzzer build workflow"""
     try:
         logging.info(f"Building fuzzers: {project_name} ({sanitizer} sanitizer)")
@@ -168,7 +169,7 @@ def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> Tupl
 # ========================================================================================
 # Main Execution
 # ========================================================================================
-def load_projects(file_path: Path) -> List[str]:
+def load_projects(file_path: Path) -> list[str]:
     """Load project list from file"""
     if not file_path.exists():
         raise FileNotFoundError(f"Project list not found: {file_path}")
@@ -184,11 +185,11 @@ def load_projects(file_path: Path) -> List[str]:
 
 def execute_builds(
     func,
-    args_list: List[tuple],
+    args_list: list[tuple],
     worker_count: int,
     success_msg: str,
     failure_msg: str
-) -> Tuple[Dict[str, bool], List[str]]:
+) -> tuple[dict[str, bool], list[str]]:
     """Execute build tasks in parallel and return results"""
     results = {}
     with Pool(worker_count) as pool:
@@ -211,7 +212,7 @@ def main():
                         help="OSS-Fuzz directory path")
     parser.add_argument("--mode", choices=['image', 'fuzzer', 'both'], default='both',
                         help="Build mode: 'image', 'fuzzer', or 'both'")
-    parser.add_argument("--workers", type=int, default=cpu_count(),
+    parser.add_argument("--workers", type=int, default=os.cpu_count(),
                         help="Number of parallel worker processes")
     parser.add_argument("--sanitizer", default="address", 
                         choices=["address", "memory", "undefined"],

From 7e9add9fee1692b1f478df8882f4d4a7bff18957 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Wed, 23 Jul 2025 22:34:11 +0000
Subject: [PATCH 037/134] list,tuple,optional

---
 fuzz/run_fuzz_all_targets.py | 74 +++++++++++++++---------------------
 1 file changed, 31 insertions(+), 43 deletions(-)

diff --git a/fuzz/run_fuzz_all_targets.py b/fuzz/run_fuzz_all_targets.py
index d3ea3d8..27385f0 100644
--- a/fuzz/run_fuzz_all_targets.py
+++ b/fuzz/run_fuzz_all_targets.py
@@ -5,10 +5,10 @@
 run_fuzz_all_targets.py
 
 该脚本采用两阶段方法进行模糊测试：
-1. 发现阶段：首先遍历所有指定的项目，收集每一个项目中所有可执行的模糊测试目标 (fuzz target)。
-2. 执行阶段：然后创建一个包含所有 (项目, target) 对的任务池，并使用多进程并行执行所有任务。
+1. 发现阶段：遍历所有指定项目，收集每个项目中所有可执行的模糊测试目标(fuzz target)
+2. 执行阶段：创建包含所有(项目, target)对的任务池，使用多进程并行执行所有任务
 
-这种方法可以最大化 CPU 利用率，并提供更清晰的整体进度。
+这种方法最大化CPU利用率并提供清晰的整体进度[2](@ref)。
 
 用法: python3 run_fuzz_all_targets.py [project_list_file] [--timeout seconds] [--workers N]
 示例: python3 fuzz/run_fuzz_all_targets.py data/valid_projects.txt --timeout 60 --workers 4
@@ -22,8 +22,8 @@
 import time
 from datetime import datetime
 from pathlib import Path
-from typing import Optional, List, Tuple
 from multiprocessing import Pool, cpu_count
+from returns.maybe import Maybe, Nothing, Some
 
 # --- 全局配置 ---
 HOME_DIR = Path.home()
@@ -35,11 +35,11 @@ def run_command(
     cmd: str,
     log_msg: str,
     logger: logging.Logger,
-    allowed_exit_codes: Optional[List[int]] = None,
+    allowed_exit_codes: Maybe[list[int]] = Nothing,
     timeout: int = 3600  # 默认1小时超时
 ) -> bool:
     """使用实时日志记录和精确的错误处理来执行命令"""
-    allowed_exit_codes = allowed_exit_codes or []
+    allowed_codes = allowed_exit_codes.value_or([])
     logger.info(f"▶️ {log_msg}...")
     logger.debug(f"   $ {cmd}")
 
@@ -57,7 +57,6 @@ def run_command(
 
         start_time = time.time()
         while process.poll() is None:
-            # 检查命令是否超时
             if time.time() - start_time > timeout:
                 logger.error(f"⌛ 命令在 {timeout} 秒后超时")
                 process.terminate()
@@ -72,11 +71,10 @@ def run_command(
                 if line:
                     logger.debug(line.strip())
             else:
-                # 在某些情况下 stdout 可能暂时为 None
                 time.sleep(0.1)
 
         exit_code = process.returncode
-        if exit_code not in [0, *allowed_exit_codes]:
+        if exit_code not in [0, *allowed_codes]:
             logger.error(f"❌ 命令执行失败，退出码: {exit_code}")
             return False
         return True
@@ -102,10 +100,10 @@ def run_command(
                 pass
 
 
-def discover_targets(project_name: str, logger: logging.Logger) -> List[str]:
-    """发现项目的 Fuzz Targets (以 'fuzz_' 开头，无扩展名，且可执行)"""
+def discover_targets(project_name: str, logger: logging.Logger) -> list[str]:
+    """发现项目的Fuzz Targets(以'fuzz_'开头，无扩展名，且可执行)"""
     out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name
-    targets: List[str] = []
+    targets: list[str] = []
 
     if not out_dir.is_dir():
         logger.warning(f"项目 {project_name} 的构建输出目录不存在: {out_dir}")
@@ -125,18 +123,17 @@ def discover_targets(project_name: str, logger: logging.Logger) -> List[str]:
     except PermissionError:
         logger.error(f"🔒 访问目录权限不足: {out_dir}")
     except OSError as e:
-        logger.exception(f"💥 发现 Target 时发生操作系统错误: {e}")
+        logger.exception(f"💥 发现Target时发生操作系统错误: {e}")
 
     return targets
 
 
-def run_single_target(project_name: str, target_name: str, timeout: int) -> Tuple[bool, str, str]:
-    """为单个 (项目, target) 对执行模糊测试工作流"""
+def run_single_target(project_name: str, target_name: str, timeout: int) -> tuple[bool, str, str]:
+    """为单个(项目, target)对执行模糊测试工作流"""
     task_id = f"{project_name}_{target_name}"
     logger = logging.getLogger(task_id)
 
     try:
-        # 为每个任务配置独立的日志记录器
         logger.setLevel(logging.DEBUG)
         LOG_DIR.mkdir(parents=True, exist_ok=True)
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
@@ -148,11 +145,9 @@ def run_single_target(project_name: str, target_name: str, timeout: int) -> Tupl
         )
         file_handler.setFormatter(formatter)
         logger.addHandler(file_handler)
-
         os.chdir(OSS_FUZZ_DIR)
 
     except (OSError, PermissionError) as e:
-        # 如果日志设置失败，直接打印到控制台
         print(f"❌ 任务 {task_id} 初始化时发生严重错误: {e}")
         return False, project_name, target_name
 
@@ -161,10 +156,10 @@ def run_single_target(project_name: str, target_name: str, timeout: int) -> Tupl
         cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}"
         success = run_command(
             cmd,
-            f"运行 Target '{target_name}' (超时={timeout}s)",
+            f"运行Target '{target_name}' (超时={timeout}s)",
             logger,
-            allowed_exit_codes=[1, 124],  # 1=发现崩溃, 124=超时，都视为模糊测试的正常退出
-            timeout=timeout + 300  # 增加300秒的额外缓冲时间给 helper.py
+            allowed_exit_codes=Some([1, 124]),  # 1=发现崩溃, 124=超时
+            timeout=timeout + 300
         )
 
         if success:
@@ -175,17 +170,15 @@ def run_single_target(project_name: str, target_name: str, timeout: int) -> Tupl
         return success, project_name, target_name
 
     except Exception as e:
-        logger.exception(f"💥 运行 target '{target_name}' 时发生意外错误: {e}")
+        logger.exception(f"💥 运行target '{target_name}' 时发生意外错误: {e}")
         return False, project_name, target_name
     finally:
-        # 清理日志处理器以释放文件句柄
         for handler in logger.handlers[:]:
             handler.close()
             logger.removeHandler(handler)
 
 
 def main():
-    # 主进程日志配置
     logging.basicConfig(
         level=logging.INFO,
         format="[%(levelname)s] %(message)s",
@@ -193,13 +186,12 @@ def main():
     )
     logger = logging.getLogger("Main")
 
-    parser = argparse.ArgumentParser(description="OSS-Fuzz 并行模糊测试工具")
+    parser = argparse.ArgumentParser(description="OSS-Fuzz并行模糊测试工具")
     parser.add_argument("project_list", help="包含项目名称列表的文件路径")
-    parser.add_argument("--timeout", type=int, default=60, help="每个 Fuzz Target 的运行超时时间 (秒)")
+    parser.add_argument("--timeout", type=int, default=60, help="每个Fuzz Target的运行超时时间(秒)")
     parser.add_argument("--workers", type=int, default=cpu_count(), help="并行执行的工作进程数")
     args = parser.parse_args()
 
-    # --- 1. 读取项目列表 ---
     try:
         project_path = Path(args.project_list)
         with open(project_path, "r", encoding="utf-8") as f:
@@ -212,40 +204,37 @@ def main():
         logger.exception(f"💥 读取项目列表时出错: {e}")
         sys.exit(1)
 
-    # --- 2. 发现阶段: 收集所有项目的 Fuzz Targets ---
-    logger.info("\n" + "=" * 20 + " 阶段 1: 发现所有 Fuzz Targets " + "=" * 20)
-    all_fuzz_tasks = []
+    logger.info("\n" + "=" * 20 + " 阶段1: 发现所有Fuzz Targets " + "=" * 20)
+    all_fuzz_tasks: list[tuple[str, str]] = []
     try:
         original_cwd = Path.cwd()
         os.chdir(OSS_FUZZ_DIR)
         for project_name in projects:
             targets = discover_targets(project_name, logger)
             if targets:
-                logger.info(f"🔍 在项目 '{project_name}' 中发现 {len(targets)} 个 targets: {', '.join(targets)}")
+                logger.info(f"🔍 在项目 '{project_name}' 中发现 {len(targets)} 个targets: {', '.join(targets)}")
                 for target in targets:
                     all_fuzz_tasks.append((project_name, target))
             else:
-                logger.warning(f"⚠️ 在项目 '{project_name}' 中未找到任何 Fuzz Targets。")
+                logger.warning(f"⚠️ 在项目 '{project_name}' 中未找到任何Fuzz Targets。")
         os.chdir(original_cwd)
     except FileNotFoundError:
-        logger.error(f"❌ OSS-Fuzz 目录不存在: {OSS_FUZZ_DIR}")
+        logger.error(f"❌ OSS-Fuzz目录不存在: {OSS_FUZZ_DIR}")
         sys.exit(1)
     except Exception as e:
         logger.exception(f"💥 在发现阶段发生未知错误: {e}")
         sys.exit(1)
 
-
     if not all_fuzz_tasks:
-        logger.info("🤷 未发现任何可执行的 Fuzz Targets。程序退出。")
+        logger.info("🤷 未发现任何可执行的Fuzz Targets。程序退出。")
         sys.exit(0)
 
-    # --- 3. 执行阶段: 并行运行所有 Fuzzing 任务 ---
     logger.info(f"\n✅ 发现阶段完成。共找到 {len(all_fuzz_tasks)} 个模糊测试任务。")
-    logger.info("=" * 20 + " 阶段 2: 并行执行 Fuzzing " + "=" * 23)
-    logger.info(f"🚀 使用 {args.workers} 个工作进程开始并行测试 (每个 Target 超时: {args.timeout}s)...")
+    logger.info("=" * 20 + " 阶段2: 并行执行Fuzzing " + "=" * 23)
+    logger.info(f"🚀 使用 {args.workers} 个工作进程开始并行测试(每个Target超时: {args.timeout}s)...")
 
     tasks_with_args = [(p, t, args.timeout) for p, t in all_fuzz_tasks]
-    results = []
+    results: list[tuple[bool, str, str]] = []
     
     with Pool(args.workers) as pool:
         try:
@@ -255,16 +244,15 @@ def main():
             pool.terminate()
             pool.join()
 
-    # --- 4. 汇总阶段 ---
-    logger.info("\n" + "=" * 20 + " 阶段 3: 结果汇总 " + "=" * 28)
+    logger.info("\n" + "=" * 20 + " 阶段3: 结果汇总 " + "=" * 28)
     failed_tasks = [(p, t) for success, p, t in results if not success]
     total_tasks = len(all_fuzz_tasks)
     failed_count = len(failed_tasks)
     success_count = total_tasks - failed_count
 
-    logger.info(f"📊 Fuzzing 完成: 成功 {success_count}/{total_tasks}, 失败 {failed_count}/{total_tasks}")
+    logger.info(f"📊 Fuzzing完成: 成功 {success_count}/{total_tasks}, 失败 {failed_count}/{total_tasks}")
     if failed_tasks:
-        logger.error("❌ 以下 Fuzz Targets 运行失败:")
+        logger.error("❌ 以下Fuzz Targets运行失败:")
         for project, target in failed_tasks:
             logger.error(f"  - 项目: {project}, Target: {target}")
 

From 6afc91982d311a2b99c16403f9bc099906ffae92 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Thu, 24 Jul 2025 00:26:19 +0000
Subject: [PATCH 038/134] translate

---
 fuzz/run_fuzz_all_targets.py | 124 +++++++++++++++++++----------------
 1 file changed, 67 insertions(+), 57 deletions(-)

diff --git a/fuzz/run_fuzz_all_targets.py b/fuzz/run_fuzz_all_targets.py
index 27385f0..74bed97 100644
--- a/fuzz/run_fuzz_all_targets.py
+++ b/fuzz/run_fuzz_all_targets.py
@@ -4,14 +4,14 @@
 """
 run_fuzz_all_targets.py
 
-该脚本采用两阶段方法进行模糊测试：
-1. 发现阶段：遍历所有指定项目，收集每个项目中所有可执行的模糊测试目标(fuzz target)
-2. 执行阶段：创建包含所有(项目, target)对的任务池，使用多进程并行执行所有任务
+This script employs a two-phase approach for fuzz testing:
+1. Discovery phase: Traverse all specified projects to collect all executable fuzz targets in each project
+2. Execution phase: Create a task pool containing all (project, target) pairs and execute them in parallel using multiprocessing
 
-这种方法最大化CPU利用率并提供清晰的整体进度[2](@ref)。
+This approach maximizes CPU utilization and provides clear overall progress[2](@ref).
 
-用法: python3 run_fuzz_all_targets.py [project_list_file] [--timeout seconds] [--workers N]
-示例: python3 fuzz/run_fuzz_all_targets.py data/valid_projects.txt --timeout 60 --workers 4
+Usage: python3 run_fuzz_all_targets.py [project_list_file] [--timeout seconds] [--workers N]
+Example: python3 fuzz/run_fuzz_all_targets.py data/valid_projects.txt --timeout 60 --workers 4
 """
 
 import os
@@ -25,7 +25,7 @@
 from multiprocessing import Pool, cpu_count
 from returns.maybe import Maybe, Nothing, Some
 
-# --- 全局配置 ---
+# --- Global configuration ---
 HOME_DIR = Path.home()
 OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz"
 LOG_DIR = OSS_FUZZ_DIR / "run_logs2"
@@ -36,9 +36,9 @@ def run_command(
     log_msg: str,
     logger: logging.Logger,
     allowed_exit_codes: Maybe[list[int]] = Nothing,
-    timeout: int = 3600  # 默认1小时超时
+    timeout: int = 3600  # Default 1-hour timeout
 ) -> bool:
-    """使用实时日志记录和精确的错误处理来执行命令"""
+    """Execute commands with real-time logging and precise error handling"""
     allowed_codes = allowed_exit_codes.value_or([])
     logger.info(f"▶️ {log_msg}...")
     logger.debug(f"   $ {cmd}")
@@ -58,7 +58,7 @@ def run_command(
         start_time = time.time()
         while process.poll() is None:
             if time.time() - start_time > timeout:
-                logger.error(f"⌛ 命令在 {timeout} 秒后超时")
+                logger.error(f"⌛ Command timed out after {timeout} seconds")
                 process.terminate()
                 try:
                     process.wait(timeout=5)
@@ -75,21 +75,21 @@ def run_command(
 
         exit_code = process.returncode
         if exit_code not in [0, *allowed_codes]:
-            logger.error(f"❌ 命令执行失败，退出码: {exit_code}")
+            logger.error(f"❌ Command execution failed, exit code: {exit_code}")
             return False
         return True
 
     except FileNotFoundError:
-        logger.error(f"🔍 命令未找到: {cmd.split()[0]}")
+        logger.error(f"🔍 Command not found: {cmd.split()[0]}")
         return False
     except PermissionError:
-        logger.error(f"🔒 执行命令权限不足: {cmd}")
+        logger.error(f"🔒 Insufficient permissions to execute command: {cmd}")
         return False
     except subprocess.SubprocessError as e:
-        logger.exception(f"💥 子进程错误: {e}")
+        logger.exception(f"💥 Subprocess error: {e}")
         return False
     except OSError as e:
-        logger.exception(f"💥 执行命令时发生操作系统错误: {e}")
+        logger.exception(f"💥 Operating system error during command execution: {e}")
         return False
     finally:
         if process and process.poll() is None:
@@ -101,12 +101,12 @@ def run_command(
 
 
 def discover_targets(project_name: str, logger: logging.Logger) -> list[str]:
-    """发现项目的Fuzz Targets(以'fuzz_'开头，无扩展名，且可执行)"""
+    """Discover fuzz targets for a project (starting with 'fuzz_', no extension, and executable)"""
     out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name
     targets: list[str] = []
 
     if not out_dir.is_dir():
-        logger.warning(f"项目 {project_name} 的构建输出目录不存在: {out_dir}")
+        logger.warning(f"Build output directory for project {project_name} does not exist: {out_dir}")
         return targets
 
     try:
@@ -118,18 +118,18 @@ def discover_targets(project_name: str, logger: logging.Logger) -> list[str]:
                         os.access(f, os.X_OK)):
                     targets.append(f.name)
             except OSError as e:
-                logger.warning(f"⚠️ 检查文件 {f.name} 时出错，已跳过: {e}")
+                logger.warning(f"⚠️ Error while checking file {f.name}, skipped: {e}")
 
     except PermissionError:
-        logger.error(f"🔒 访问目录权限不足: {out_dir}")
+        logger.error(f"🔒 Insufficient permissions to access directory: {out_dir}")
     except OSError as e:
-        logger.exception(f"💥 发现Target时发生操作系统错误: {e}")
+        logger.exception(f"💥 Operating system error occurred while discovering targets: {e}")
 
     return targets
 
 
 def run_single_target(project_name: str, target_name: str, timeout: int) -> tuple[bool, str, str]:
-    """为单个(项目, target)对执行模糊测试工作流"""
+    """Execute fuzz testing workflow for a single (project, target) pair"""
     task_id = f"{project_name}_{target_name}"
     logger = logging.getLogger(task_id)
 
@@ -148,29 +148,29 @@ def run_single_target(project_name: str, target_name: str, timeout: int) -> tupl
         os.chdir(OSS_FUZZ_DIR)
 
     except (OSError, PermissionError) as e:
-        print(f"❌ 任务 {task_id} 初始化时发生严重错误: {e}")
+        print(f"❌ Critical error occurred during initialization of task {task_id}: {e}")
         return False, project_name, target_name
 
-    logger.info(f"🚀 开始测试 -> 项目: {project_name}, Target: {target_name}")
+    logger.info(f"🚀 Starting test -> Project: {project_name}, Target: {target_name}")
     try:
         cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}"
         success = run_command(
             cmd,
-            f"运行Target '{target_name}' (超时={timeout}s)",
+            f"Running Target '{target_name}' (timeout={timeout}s)",
             logger,
-            allowed_exit_codes=Some([1, 124]),  # 1=发现崩溃, 124=超时
+            allowed_exit_codes=Some([1, 124]),  # 1=Crashes found, 124=Timeout
             timeout=timeout + 300
         )
 
         if success:
-            logger.info(f"✅ Target '{target_name}' 运行完成。")
+            logger.info(f"✅ Target '{target_name}' completed successfully.")
         else:
-            logger.error(f"❌ Target '{target_name}' 运行失败。")
+            logger.error(f"❌ Target '{target_name}' failed.")
 
         return success, project_name, target_name
 
     except Exception as e:
-        logger.exception(f"💥 运行target '{target_name}' 时发生意外错误: {e}")
+        logger.exception(f"💥 Unexpected error occurred while running target '{target_name}': {e}")
         return False, project_name, target_name
     finally:
         for handler in logger.handlers[:]:
@@ -179,6 +179,7 @@ def run_single_target(project_name: str, target_name: str, timeout: int) -> tupl
 
 
 def main():
+    # Configure main process logging
     logging.basicConfig(
         level=logging.INFO,
         format="[%(levelname)s] %(message)s",
@@ -186,83 +187,92 @@ def main():
     )
     logger = logging.getLogger("Main")
 
-    parser = argparse.ArgumentParser(description="OSS-Fuzz并行模糊测试工具")
-    parser.add_argument("project_list", help="包含项目名称列表的文件路径")
-    parser.add_argument("--timeout", type=int, default=60, help="每个Fuzz Target的运行超时时间(秒)")
-    parser.add_argument("--workers", type=int, default=cpu_count(), help="并行执行的工作进程数")
+    # Set up command line argument parsing
+    parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Fuzz Testing Tool")
+    parser.add_argument("project_list", help="File path containing list of project names")
+    parser.add_argument("--timeout", type=int, default=60, help="Timeout duration per Fuzz Target (seconds)")
+    parser.add_argument("--workers", type=int, default=cpu_count(), help="Number of parallel worker processes")
     args = parser.parse_args()
 
+    # 1. Read project list file
     try:
         project_path = Path(args.project_list)
         with open(project_path, "r", encoding="utf-8") as f:
             projects = [line.strip() for line in f if line.strip()]
-        logger.info(f"📋 从 {project_path.name} 加载了 {len(projects)} 个项目。")
+        logger.info(f"📋 Loaded {len(projects)} projects from {project_path.name}.")
     except FileNotFoundError:
-        logger.error(f"❌ 项目列表文件未找到: {args.project_list}")
+        logger.error(f"❌ Project list file not found: {args.project_list}")
         sys.exit(1)
     except (OSError, PermissionError) as e:
-        logger.exception(f"💥 读取项目列表时出错: {e}")
+        logger.exception(f"💥 Error occurred while reading project list: {e}")
         sys.exit(1)
 
-    logger.info("\n" + "=" * 20 + " 阶段1: 发现所有Fuzz Targets " + "=" * 20)
-    all_fuzz_tasks: list[tuple[str, str]] = []
+    # 2. Discovery phase: Collect all fuzz targets
+    logger.info("\n" + "=" * 20 + " Phase 1: Discover all Fuzz Targets " + "=" * 20)
+    all_fuzz_tasks: list[tuple[str, str]] = []  # Store (project, target) tuples
     try:
-        original_cwd = Path.cwd()
-        os.chdir(OSS_FUZZ_DIR)
+        original_cwd = Path.cwd()  # Save current working directory
+        os.chdir(OSS_FUZZ_DIR)     # Switch to OSS-Fuzz directory
         for project_name in projects:
             targets = discover_targets(project_name, logger)
             if targets:
-                logger.info(f"🔍 在项目 '{project_name}' 中发现 {len(targets)} 个targets: {', '.join(targets)}")
+                logger.info(f"🔍 Discovered {len(targets)} targets in project '{project_name}': {', '.join(targets)}")
                 for target in targets:
                     all_fuzz_tasks.append((project_name, target))
             else:
-                logger.warning(f"⚠️ 在项目 '{project_name}' 中未找到任何Fuzz Targets。")
-        os.chdir(original_cwd)
+                logger.warning(f"⚠️ No Fuzz Targets found in project '{project_name}'.")
+        os.chdir(original_cwd)  # Restore original working directory
     except FileNotFoundError:
-        logger.error(f"❌ OSS-Fuzz目录不存在: {OSS_FUZZ_DIR}")
+        logger.error(f"❌ OSS-Fuzz directory does not exist: {OSS_FUZZ_DIR}")
         sys.exit(1)
     except Exception as e:
-        logger.exception(f"💥 在发现阶段发生未知错误: {e}")
+        logger.exception(f"💥 Unknown error occurred during discovery phase: {e}")
         sys.exit(1)
 
+    # Check if any valid targets were found
     if not all_fuzz_tasks:
-        logger.info("🤷 未发现任何可执行的Fuzz Targets。程序退出。")
+        logger.info("🤷 No executable Fuzz Targets found. Program exits.")
         sys.exit(0)
 
-    logger.info(f"\n✅ 发现阶段完成。共找到 {len(all_fuzz_tasks)} 个模糊测试任务。")
-    logger.info("=" * 20 + " 阶段2: 并行执行Fuzzing " + "=" * 23)
-    logger.info(f"🚀 使用 {args.workers} 个工作进程开始并行测试(每个Target超时: {args.timeout}s)...")
+    # 3. Execution phase: Parallel fuzz testing
+    logger.info(f"\n✅ Discovery phase completed. Found a total of {len(all_fuzz_tasks)} fuzz tasks.")
+    logger.info("=" * 20 + " Phase 2: Parallel Fuzzing Execution " + "=" * 23)
+    logger.info(f"🚀 Starting parallel testing with {args.workers} worker processes (timeout per target: {args.timeout}s)...")
 
+    # Prepare task parameters (project, target, timeout)
     tasks_with_args = [(p, t, args.timeout) for p, t in all_fuzz_tasks]
-    results: list[tuple[bool, str, str]] = []
+    results: list[tuple[bool, str, str]] = []  # Store results (success, project, target)
     
+    # Execute in parallel using process pool
     with Pool(args.workers) as pool:
         try:
             results = pool.starmap(run_single_target, tasks_with_args)
         except Exception as e:
-            logger.error(f"💥 并行执行过程中发生严重错误: {e}")
+            logger.error(f"💥 Critical error occurred during parallel execution: {e}")
             pool.terminate()
             pool.join()
 
-    logger.info("\n" + "=" * 20 + " 阶段3: 结果汇总 " + "=" * 28)
-    failed_tasks = [(p, t) for success, p, t in results if not success]
+    # 4. Result summary and reporting
+    logger.info("\n" + "=" * 20 + " Phase 3: Result Summary " + "=" * 28)
+    failed_tasks = [(p, t) for success, p, t in results if not success]  # List of failed tasks
     total_tasks = len(all_fuzz_tasks)
     failed_count = len(failed_tasks)
     success_count = total_tasks - failed_count
 
-    logger.info(f"📊 Fuzzing完成: 成功 {success_count}/{total_tasks}, 失败 {failed_count}/{total_tasks}")
+    # Output statistical summary
+    logger.info(f"📊 Fuzzing completed: Success {success_count}/{total_tasks}, Failed {failed_count}/{total_tasks}")
     if failed_tasks:
-        logger.error("❌ 以下Fuzz Targets运行失败:")
+        logger.error("❌ The following Fuzz Targets failed:")
         for project, target in failed_tasks:
-            logger.error(f"  - 项目: {project}, Target: {target}")
+            logger.error(f"  - Project: {project}, Target: {target}")  # List detailed failures
 
 
 if __name__ == "__main__":
     try:
         main()
     except KeyboardInterrupt:
-        print("\n🛑 操作被用户中断。")
+        print("\n🛑 Operation interrupted by user.")
         sys.exit(1)
     except Exception as e:
-        print(f"\n💥 主程序发生致命错误: {e}")
+        print(f"\n💥 Fatal error in main program: {e}")
         sys.exit(1)
\ No newline at end of file

From ea76d9dcc85c0c4e763bb4d168cef37301ceb065 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Thu, 24 Jul 2025 00:30:32 +0000
Subject: [PATCH 039/134] build_fuzz.py, run_fuzz_all_target.py

---
 fuzz/{build.py => build_fuzz.py} |   8 +-
 fuzz/build_fuzzers.py            | 246 ----------------------------
 fuzz/build_images.py             | 158 ------------------
 fuzz/build_oss_fuzz_whole.py     | 154 ------------------
 fuzz/run_fuzz_target.py          | 268 -------------------------------
 5 files changed, 4 insertions(+), 830 deletions(-)
 rename fuzz/{build.py => build_fuzz.py} (96%)
 delete mode 100644 fuzz/build_fuzzers.py
 delete mode 100644 fuzz/build_images.py
 delete mode 100644 fuzz/build_oss_fuzz_whole.py
 delete mode 100644 fuzz/run_fuzz_target.py

diff --git a/fuzz/build.py b/fuzz/build_fuzz.py
similarity index 96%
rename from fuzz/build.py
rename to fuzz/build_fuzz.py
index ebaf087..87c5a44 100644
--- a/fuzz/build.py
+++ b/fuzz/build_fuzz.py
@@ -9,16 +9,16 @@
 
 Usage:
   Build images: 
-    python3 build.py --mode image [project_list] --oss-fuzz-dir /path/to/oss-fuzz
+    python3 build_fuzz.py --mode image [project_list] --oss-fuzz-dir /path/to/oss-fuzz
   
   Build fuzzers: 
-    python3 build.py --mode fuzzer [project_list] --oss-fuzz-dir /path/to/oss-fuzz --image-results results.json
+    python3 build_fuzz.py --mode fuzzer [project_list] --oss-fuzz-dir /path/to/oss-fuzz --image-results results.json
   
   Build both: 
-    python3 build.py --mode both [project_list] --oss-fuzz-dir /path/to/oss-fuzz --sanitizer address
+    python3 build_fuzz.py --mode both [project_list] --oss-fuzz-dir /path/to/oss-fuzz --sanitizer address
 
 Example:
-    python3 ./fuzz/build.py --mode both data/valid_projects.txt \
+    python3 ./fuzz/build_fuzz.py --mode both data/valid_projects.txt \
         --oss-fuzz-dir ./fuzz/oss-fuzz \
         --sanitizer address \
         --workers 8
diff --git a/fuzz/build_fuzzers.py b/fuzz/build_fuzzers.py
deleted file mode 100644
index 1dfd825..0000000
--- a/fuzz/build_fuzzers.py
+++ /dev/null
@@ -1,246 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-"""
-build_fuzzers.py
-
-Parallel build of OSS-Fuzz fuzzers.
-Requires Docker images to be built first (using build_images.py).
-
-Usage: python3 build_fuzzers.py [project_list_file] --oss-fuzz-dir /path/to/oss-fuzz \
-    --image-results image_build_results.json \
-    [--sanitizer type] [--workers N]
-Example: python3 fuzz/build_fuzzers.py data/valid_projects.txt \
-    --oss-fuzz-dir ./fuzz/oss-fuzz \
-    --image-results image_build_results.json \
-    --sanitizer address \
-    --workers 8
-"""
-
-import os
-import sys
-import subprocess
-import argparse
-import logging
-import json
-from pathlib import Path
-from typing import List, Optional, Tuple
-from multiprocessing import Pool, cpu_count
-
-class BuildError(Exception):
-    """Base exception for build failures"""
-    def __init__(self, message: str, project: str = "", exit_code: int = None):
-        super().__init__(message)
-        self.project = project
-        self.exit_code = exit_code
-
-class CommandError(BuildError):
-    """Exception for command execution failures"""
-    pass
-
-class PathError(BuildError):
-    """Exception for missing paths or files"""
-    pass
-
-class ConfigError(BuildError):
-    """Exception for configuration errors"""
-    pass
-
-def run_command(
-    cmd: str,
-    oss_fuzz_dir: Path,
-    project: str = "",
-    allowed_exit_codes: Optional[List[int]] = None
-) -> int:
-    """Execute a command and return the exit code"""
-    allowed_exit_codes = allowed_exit_codes or [0]
-    logging.info(f"▶️ Executing command: {cmd}")
-    
-    try:
-        process = subprocess.Popen(
-            cmd,
-            shell=True,
-            cwd=str(oss_fuzz_dir),
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True
-        )
-        
-        stdout, stderr = process.communicate()
-        exit_code = process.returncode
-        
-        if exit_code in allowed_exit_codes:
-            return exit_code
-            
-        # 构建详细的错误信息
-        error_msg = f"Command failed (exit code: {exit_code})"
-        if project:
-            error_msg += f" for project: {project}"
-            
-        if stderr.strip():
-            error_msg += f"\nError output:\n{stderr.strip()}"
-            
-        if stdout.strip():
-            error_msg += f"\nOutput:\n{stdout.strip()}"
-            
-        raise CommandError(error_msg, project=project, exit_code=exit_code)
-    
-    except FileNotFoundError as e:
-        raise CommandError(f"Command not found: {cmd.split()[0]}", project=project) from e
-    except OSError as e:
-        raise CommandError(f"System error: {e}", project=project) from e
-    except subprocess.SubprocessError as e:
-        raise CommandError(f"Subprocess error: {e}", project=project) from e
-
-def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> Tuple[bool, str]:
-    """Fuzzer build workflow"""
-    try:
-        logging.info("=" * 60)
-        logging.info(f"🔧 Building fuzzers for: {project_name}")
-        logging.info(f"📁 OSS-Fuzz directory: {oss_fuzz_dir}")
-        logging.info("=" * 60)
-        
-        # Validate paths
-        helper_script = oss_fuzz_dir / "infra" / "helper.py"
-        if not helper_script.exists():
-            raise PathError(f"Missing helper script: {helper_script}", project=project_name)
-        
-        # Execute fuzzer build command
-        run_command(
-            f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}",
-            oss_fuzz_dir,
-            project=project_name
-        )
-        
-        logging.info(f"✅ Fuzzers built: {project_name}")
-        return (True, project_name)
-    
-    except BuildError as e:
-        logging.error(f"❌ Build failed: {project_name}")
-        logging.error(f"   Reason: {str(e)}")
-        return (False, project_name)
-    except Exception as e:
-        logging.error(f"🔥 Unhandled exception: {project_name}")
-        logging.exception(f"   Exception details: {e}")
-        return (False, project_name)
-
-def main():
-    parser = argparse.ArgumentParser(description="OSS-Fuzz Fuzzer Builder")
-    parser.add_argument("project_list", help="Project list file path")
-    parser.add_argument("--oss-fuzz-dir", required=True, type=str, 
-                        help="OSS-Fuzz directory path")
-    parser.add_argument("--sanitizer", default="address", 
-                        choices=["address", "memory", "undefined"],
-                        help="Fuzzer sanitizer type")
-    parser.add_argument("--workers", type=int, default=cpu_count(),
-                        help="Number of parallel worker processes")
-    parser.add_argument("--image-results", required=True,
-                        help="JSON file with image build results from build_images.py")
-    args = parser.parse_args()
-
-    logging.basicConfig(
-        level=logging.INFO,
-        format='[%(levelname)s] %(message)s'
-    )
-
-    # Process paths
-    oss_fuzz_dir = Path(args.oss_fuzz_dir).resolve()
-    logging.info(f"📁 Using OSS-Fuzz directory: {oss_fuzz_dir}")
-
-    # Read project list
-    try:
-        project_file = Path(args.project_list)
-        if not project_file.exists():
-            raise FileNotFoundError(f"Project list file not found: {project_file}")
-            
-        with open(project_file, "r", encoding="utf-8") as f:
-            all_projects = [line.strip() for line in f if line.strip()]
-            
-        if not all_projects:
-            raise ConfigError("Project list is empty")
-            
-        logging.info(f"📋 Loaded {len(all_projects)} projects")
-    except Exception as e:
-        logging.error(f"❌ Failed to read project list: {e}")
-        sys.exit(1)
-
-    # Load image build results
-    try:
-        image_results_file = Path(args.image_results)
-        if not image_results_file.exists():
-            raise FileNotFoundError(f"Image results file not found: {image_results_file}")
-            
-        with open(image_results_file, "r") as f:
-            image_results = json.load(f)
-            
-        if not isinstance(image_results, dict):
-            raise ConfigError("Image results should be a JSON object")
-            
-        logging.info(f"📋 Loaded image build results: {args.image_results}")
-    except json.JSONDecodeError as e:
-        logging.error(f"❌ Failed to parse image build results: {e}")
-        sys.exit(1)
-    except Exception as e:
-        logging.error(f"❌ Failed to load image build results: {e}")
-        sys.exit(1)
-
-    # Filter projects with successful image builds
-    projects_to_build = [p for p in all_projects if p in image_results and image_results[p]]
-    image_failures = [p for p in all_projects if p not in image_results or not image_results[p]]
-    
-    if not projects_to_build:
-        logging.error("❌ No projects with successful image builds")
-        if image_failures:
-            logging.error(f"   Projects with image build failures: {', '.join(image_failures[:10])}{'...' if len(image_failures) > 10 else ''}")
-        sys.exit(1)
-        
-    skipped = len(all_projects) - len(projects_to_build)
-    logging.info(f"🔍 Building {len(projects_to_build)} projects (skipped {skipped} due to image failures)")
-
-    # Parallel fuzzer builds
-    with Pool(args.workers) as pool:
-        results = pool.starmap(
-            build_fuzzers, 
-            [(p, args.sanitizer, oss_fuzz_dir) for p in projects_to_build]
-        )
-
-    # Output results
-    fuzzer_results = {project: success for success, project in results}
-    failed = [p for p in projects_to_build if not fuzzer_results[p]]
-    
-    success_count = len(projects_to_build) - len(failed)
-    logging.info(f"\n📊 Build completed: {success_count}/{len(projects_to_build)}")
-    
-    if failed:
-        logging.error(f"❌ Failed builds ({len(failed)} projects):")
-        for project in failed:
-            logging.error(f"   - {project}")
-
-    # Generate overall status report
-    overall_results = {}
-    for project in all_projects:
-        status = "❌"
-        if project in image_results and image_results[project]:
-            if project in fuzzer_results and fuzzer_results[project]:
-                status = "✅"
-            elif project in fuzzer_results:
-                status = "❌ (fuzzer)"
-            else:
-                status = "❌ (not built)"
-        else:
-            status = "❌ (image)"
-        overall_results[project] = status
-
-    logging.info("\n📊 Overall status:")
-    for project, status in overall_results.items():
-        logging.info(f"  {project}: {status}")
-
-if __name__ == "__main__":
-    try:
-        main()
-    except KeyboardInterrupt:
-        print("\n🛑 Operation interrupted")
-        sys.exit(1)
-    except Exception as e:
-        print(f"💥 Critical error: {e}")
-        sys.exit(1)
\ No newline at end of file
diff --git a/fuzz/build_images.py b/fuzz/build_images.py
deleted file mode 100644
index 17c7bfc..0000000
--- a/fuzz/build_images.py
+++ /dev/null
@@ -1,158 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-"""
-build_images.py
-
-Parallel build of OSS-Fuzz Docker images.
-Uses multiprocessing.Pool to distribute projects across multiple CPU cores.
-
-Usage: python3 build_images.py [project_list_file] --oss-fuzz-dir /path/to/oss-fuzz [--workers N]
-Example: python3 fuzz/build_images.py data/valid_projects.txt \
-    --oss-fuzz-dir ./fuzz/oss-fuzz \
-    --workers 4
-"""
-
-import os
-import sys
-import subprocess
-import argparse
-import logging
-import json
-from pathlib import Path
-from typing import List, Optional, Tuple
-from multiprocessing import Pool, cpu_count
-
-class CommandExecutionError(Exception):
-    """Custom command execution exception"""
-    def __init__(self, message: str, exit_code: Optional[int] = None):
-        super().__init__(message)
-        self.exit_code = exit_code
-
-def run_command(
-    cmd: str,
-    oss_fuzz_dir: Path,
-    allowed_exit_codes: Optional[List[int]] = None
-) -> int:
-    """Execute a command and return the exit code, throws CommandExecutionError on failure"""
-    allowed_exit_codes = allowed_exit_codes or [0]
-    logging.info(f"▶️ Executing command: {cmd}")
-    
-    try:
-        process = subprocess.Popen(
-            f"yes | {cmd}",
-            shell=True,
-            cwd=str(oss_fuzz_dir),
-            stdout=subprocess.DEVNULL,
-            stderr=subprocess.DEVNULL
-        )
-        
-        process.wait()
-        exit_code = process.returncode
-        
-        if exit_code in allowed_exit_codes:
-            return exit_code
-        raise CommandExecutionError(
-            f"Command failed (exit code: {exit_code})", 
-            exit_code=exit_code
-        )
-    
-    except FileNotFoundError as e:
-        raise CommandExecutionError(f"Command not found: {cmd.split()[0]}") from e
-    except OSError as e:
-        raise CommandExecutionError(f"System error: {e}") from e
-    except subprocess.SubprocessError as e:
-        raise CommandExecutionError(f"Subprocess error: {e}") from e
-
-def build_image(project_name: str, oss_fuzz_dir: Path) -> Tuple[bool, str]:
-    """Docker image build workflow"""
-    try:
-        logging.info("=" * 60)
-        logging.info(f"🔨 Starting Docker build for project: {project_name}")
-        logging.info(f"📁 OSS-Fuzz directory: {oss_fuzz_dir}")
-        logging.info("=" * 60)
-        
-        # Validate paths
-        helper_script = oss_fuzz_dir / "infra" / "helper.py"
-        if not helper_script.exists():
-            raise FileNotFoundError(f"Critical script missing: {helper_script}")
-        
-        # Execute image build command
-        run_command(
-            f"python3 infra/helper.py build_image {project_name}",
-            oss_fuzz_dir
-        )
-        
-        logging.info(f"✅ Docker image for {project_name} built successfully")
-        return (True, project_name)
-    
-    except CommandExecutionError as e:
-        logging.error(f"❌ Docker build for {project_name} failed: {str(e)}")
-        return (False, project_name)
-    except Exception as e:
-        logging.exception(f"🔥 Unhandled exception: {e}")
-        return (False, project_name)
-
-def main():
-    parser = argparse.ArgumentParser(description="OSS-Fuzz Docker Image Builder")
-    parser.add_argument("project_list", help="Project list file path")
-    parser.add_argument("--oss-fuzz-dir", required=True, type=str, 
-                        help="OSS-Fuzz directory path")
-    parser.add_argument("--workers", type=int, default=cpu_count(),
-                        help="Number of parallel worker processes")
-    parser.add_argument("--output", default="image_build_results.json",
-                        help="Output file for build results")
-    args = parser.parse_args()
-
-    logging.basicConfig(
-        level=logging.INFO,
-        format='[%(levelname)s] [PID:%(process)d] %(message)s'
-    )
-
-    # Process paths
-    oss_fuzz_dir = Path(args.oss_fuzz_dir).resolve()
-    logging.info(f"📁 Using OSS-Fuzz directory: {oss_fuzz_dir}")
-
-    # Read project list
-    try:
-        with open(args.project_list, "r", encoding="utf-8") as f:
-            projects = [line.strip() for line in f if line.strip()]
-        logging.info(f"📋 Loaded {len(projects)} projects")
-    except Exception as e:
-        logging.error(f"❌ Failed to read project list: {e}")
-        sys.exit(1)
-
-    # Parallel image builds
-    with Pool(args.workers) as pool:
-        results = pool.starmap(
-            build_image, 
-            [(p, oss_fuzz_dir) for p in projects]
-        )
-
-    # Output results
-    build_results = {project: success for success, project in results}
-    failed = [p for p in projects if not build_results[p]]
-    
-    logging.info(f"\n📊 Docker image builds completed: "
-                 f"Successful {len(projects)-len(failed)}/{len(projects)}")
-    
-    if failed:
-        logging.error("❌ Failed projects: " + ", ".join(failed))
-    
-    # Save build results to JSON file
-    try:
-        with open(args.output, "w") as f:
-            json.dump(build_results, f)
-        logging.info(f"💾 Build results saved to: {args.output}")
-    except Exception as e:
-        logging.error(f"❌ Failed to save build results: {e}")
-
-if __name__ == "__main__":
-    try:
-        main()
-    except KeyboardInterrupt:
-        print("\n🛑 Operation interrupted by user")
-        sys.exit(1)
-    except Exception as e:
-        print(f"💥 Critical error: {e}")
-        sys.exit(1)
\ No newline at end of file
diff --git a/fuzz/build_oss_fuzz_whole.py b/fuzz/build_oss_fuzz_whole.py
deleted file mode 100644
index 59d3bea..0000000
--- a/fuzz/build_oss_fuzz_whole.py
+++ /dev/null
@@ -1,154 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-"""
-build_oss_fuzz_whole.py
-
-Parallel build of OSS-Fuzz projects (Docker images and Fuzzer compilation).
-Uses multiprocessing.Pool to distribute projects across multiple CPU cores.
-
-Usage: python3 build_oss_fuzz_whole.py [project_list_file] --oss-fuzz-dir /path/to/oss-fuzz \
-    [--sanitizer type] [--workers N]
-Example: python3 fuzz/build_oss_fuzz_whole.py data/valid_projects.txt \
-    --oss-fuzz-dir ./fuzz/oss-fuzz \
-    --sanitizer address \
-    --workers 8
-"""
-
-import os
-import sys
-import subprocess
-import argparse
-import logging
-from pathlib import Path
-from typing import List, Optional, Tuple
-from multiprocessing import Pool, cpu_count
-
-class CommandExecutionError(Exception):
-    """Custom command execution exception"""
-    def __init__(self, message: str, exit_code: Optional[int] = None):
-        super().__init__(message)
-        self.exit_code = exit_code
-
-def run_command(
-    cmd: str,
-    oss_fuzz_dir: Path,
-    allowed_exit_codes: Optional[List[int]] = None
-) -> int:
-    """Execute a command and return the exit code, throws CommandExecutionError on failure"""
-    allowed_exit_codes = allowed_exit_codes or [0]
-    logging.info(f"▶️ Executing command: {cmd}")
-    
-    try:
-        # Remove all stdout/stderr capture logic and execute the command directly
-        process = subprocess.Popen(
-            f"yes | {cmd}",
-            shell=True,
-            cwd=str(oss_fuzz_dir),
-            # Redirect the output to an empty device without retaining any output
-            stdout=subprocess.DEVNULL,
-            stderr=subprocess.DEVNULL
-        )
-        
-        process.wait()
-        exit_code = process.returncode
-        
-        if exit_code in allowed_exit_codes:
-            return exit_code
-        raise CommandExecutionError(
-            f"Command failed (exit code: {exit_code})", 
-            exit_code=exit_code
-        )
-    
-    except FileNotFoundError as e:
-        raise CommandExecutionError(f"Command not found: {cmd.split()[0]}") from e
-    except OSError as e:
-        raise CommandExecutionError(f"System error: {e}") from e
-    except subprocess.SubprocessError as e:
-        raise CommandExecutionError(f"Subprocess error: {e}") from e
-
-def build_project(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> Tuple[bool, str]:
-    """Project build workflow"""
-    try:
-        logging.info("=" * 60)
-        logging.info(f"🔨 Starting build for project: {project_name}")
-        logging.info(f"📁 OSS-Fuzz directory: {oss_fuzz_dir}")
-        logging.info("=" * 60)
-        
-        # Validate paths
-        helper_script = oss_fuzz_dir / "infra" / "helper.py"
-        if not helper_script.exists():
-            raise FileNotFoundError(f"Critical script missing: {helper_script}")
-        
-        # Execute build commands (The output has been disabled)
-        run_command(
-            f"python3 infra/helper.py build_image {project_name}",
-            oss_fuzz_dir
-        )
-        run_command(
-            f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}",
-            oss_fuzz_dir
-        )
-        
-        logging.info(f"✅ Project {project_name} built successfully")
-        return (True, project_name)
-    
-    except CommandExecutionError as e:
-        logging.error(f"❌ Project {project_name} build failed: {str(e)}")
-        return (False, project_name)
-    except Exception as e:
-        logging.exception(f"🔥 Unhandled exception: {e}")
-        return (False, project_name)
-
-def main():
-    parser = argparse.ArgumentParser(description="OSS-Fuzz parallel build tool")
-    parser.add_argument("project_list", help="Project list file path")
-    parser.add_argument("--oss-fuzz-dir", required=True, type=str, 
-                        help="OSS-Fuzz directory path")
-    parser.add_argument("--sanitizer", default="address", 
-                        choices=["address", "memory", "undefined"],
-                        help="Fuzzer sanitizer type")
-    parser.add_argument("--workers", type=int, default=cpu_count(),
-                        help="Number of parallel worker processes")
-    args = parser.parse_args()
-
-    logging.basicConfig(
-        level=logging.INFO,
-        format='[%(levelname)s] [PID:%(process)d] %(message)s'
-    )
-
-    # Process paths
-    oss_fuzz_dir = Path(args.oss_fuzz_dir).resolve()
-    logging.info(f"📁 Using OSS-Fuzz directory: {oss_fuzz_dir}")
-
-    # Read project list
-    try:
-        with open(args.project_list, "r", encoding="utf-8") as f:
-            projects = [line.strip() for line in f if line.strip()]
-        logging.info(f"📋 Loaded {len(projects)} projects")
-    except Exception as e:
-        logging.error(f"❌ Failed to read project list: {e}")
-        sys.exit(1)
-
-    # Parallel build
-    with Pool(args.workers) as pool:
-        results = pool.starmap(
-            build_project, 
-            [(p, args.sanitizer, oss_fuzz_dir) for p in projects]
-        )
-
-    # Output results
-    failed = [p for success, p in results if not success]
-    logging.info(f"\n📊 Build completed: Successful {len(projects)-len(failed)}/{len(projects)}")
-    if failed:
-        logging.error("❌ Failed projects: " + ", ".join(failed))
-
-if __name__ == "__main__":
-    try:
-        main()
-    except KeyboardInterrupt:
-        print("\n🛑 Operation interrupted by user")
-        sys.exit(1)
-    except Exception as e:
-        print(f"💥 Critical error: {e}")
-        sys.exit(1)
\ No newline at end of file
diff --git a/fuzz/run_fuzz_target.py b/fuzz/run_fuzz_target.py
deleted file mode 100644
index ee47b63..0000000
--- a/fuzz/run_fuzz_target.py
+++ /dev/null
@@ -1,268 +0,0 @@
-#!/usr/bin/env python3 
-# -*- coding: utf-8 -*-
-
-"""
-run_fuzz_target.py
-
-Run OSS-Fuzz test targets in parallel with enhanced logging and precise exception handling.
-Uses multiprocessing.Pool and logging module for robust task management.
-
-Usage: python3 run_fuzz_target.py [project_list_file] [--timeout seconds] [--workers N]
-Example: python3 fuzz/run_fuzz_target.py data/valid_projects.txt --timeout 60 --workers 4
-"""
-
-import os
-import sys
-import subprocess
-import argparse
-import logging
-import time
-from datetime import datetime
-from pathlib import Path
-from typing import Optional
-from multiprocessing import Pool, cpu_count
-
-# --- Global configuration ---
-HOME_DIR = Path.home()
-OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz"
-LOG_DIR = OSS_FUZZ_DIR / "run_logs"
-
-def run_command(
-    cmd: str, 
-    log_msg: str, 
-    logger: logging.Logger,
-    allowed_exit_codes: Optional[list[int]] = None,
-    timeout: int = 3600  # 1 hour default timeout
-) -> bool:
-    """Execute command with real-time logging and precise error handling"""
-    allowed_exit_codes = allowed_exit_codes or []
-    logger.info(f"▶️ {log_msg}...")
-    logger.debug(f"   $ {cmd}")
-
-    process = None
-    try:
-        process = subprocess.Popen(
-            cmd,
-            shell=True,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.STDOUT,
-            text=True,
-            encoding="utf-8",
-            errors="replace"
-        )
-        
-        start_time = time.time()
-        while process.poll() is None:
-            if time.time() - start_time > timeout:
-                logger.error(f"⌛ Command timed out after {timeout} seconds")
-                process.terminate()
-                try:
-                    process.wait(timeout=5)
-                except subprocess.TimeoutExpired:
-                    process.kill()
-                return False
-                
-            if process.stdout:
-                line = process.stdout.readline()
-                if line:
-                    logger.debug(line.strip())
-            else:
-                logger.warning("Process stdout is None")
-                time.sleep(0.1)
-        
-        exit_code = process.returncode
-        if exit_code not in [0, *allowed_exit_codes]:
-            logger.error(f"❌ Command failed with exit code: {exit_code}")
-            return False
-        return True
-        
-    except FileNotFoundError:
-        logger.error(f"🔍 Command not found: {cmd.split()[0]}")
-        return False
-    except PermissionError:
-        logger.error(f"🔒 Permission denied for command: {cmd}")
-        return False
-    except subprocess.SubprocessError as e:
-        logger.exception(f"💥 Subprocess error: {e}")
-        return False
-    except OSError as e:
-        logger.exception(f"💥 OS error during command execution: {e}")
-        return False
-    finally:
-        if process and process.poll() is None:
-            try:
-                process.terminate()
-                process.wait(timeout=5)
-            except:
-                pass
-
-
-def discover_targets(project_name: str, logger: logging.Logger) -> list[str]:
-    """Discover fuzz targets (fuzz_ prefix, no extension, executable)"""
-    out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name
-    targets: list[str] = []
-    
-    try:
-        if not out_dir.exists():
-            logger.warning(f"⚠️ Build directory not found: {out_dir}")
-            return targets
-            
-        for f in out_dir.iterdir():
-            try:
-                if (f.is_file() and 
-                    f.name.startswith("fuzz_") and 
-                    '.' not in f.name and 
-                    os.access(f, os.X_OK)):
-                    targets.append(f.name)
-            except OSError as e:
-                logger.warning(f"⚠️ Skipping file {f.name} due to access error: {e}")
-                
-    except FileNotFoundError:
-        logger.error(f"❌ Directory not found: {out_dir}")
-    except PermissionError:
-        logger.error(f"🔒 Permission denied accessing: {out_dir}")
-    except OSError as e:
-        logger.exception(f"💥 OS error during target discovery: {e}")
-    
-    return targets
-
-
-def run_project(project_name: str, timeout: int) -> tuple[bool, str]:
-    """Testing workflow for a single project with precise error handling"""
-    try:
-        
-        logger = logging.getLogger(project_name)
-        logger.setLevel(logging.DEBUG)
-        
-        # 创建日志文件
-        LOG_DIR.mkdir(parents=True, exist_ok=True)
-        timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
-        log_file = LOG_DIR / f"run_{project_name}_{timestamp}.log"
-        file_handler = logging.FileHandler(log_file, encoding="utf-8")
-        
-        # 配置日志格式
-        formatter = logging.Formatter(
-            "%(asctime)s [PID:%(process)d] %(levelname)s: %(message)s",
-            datefmt="%Y-%m-%d %H:%M:%S"
-        )
-        file_handler.setFormatter(formatter)
-        logger.addHandler(file_handler)
-        
-        # 添加控制台输出
-        console_handler = logging.StreamHandler()
-        console_handler.setFormatter(logging.Formatter("[%(levelname)s] %(message)s"))
-        logger.addHandler(console_handler)
-        
-        os.chdir(OSS_FUZZ_DIR)
-    except (OSError, PermissionError) as e:
-        print(f"❌ Critical error initializing project {project_name}: {e}")
-        return (False, project_name)
-    
-    logger.info("=" * 60)
-    logger.info(f"🚀 Starting testing for project: {project_name}")
-    logger.info("=" * 60)
-    
-    try:
-        targets = discover_targets(project_name, logger)
-        if not targets:
-            logger.error("⚠️ No test targets found")
-            return (False, project_name)
-        logger.info(f"🔍 Discovered {len(targets)} test targets: {', '.join(targets)}")
-    except Exception as e:
-        logger.exception(f"💥 Target discovery failed unexpectedly: {e}")
-        return (False, project_name)
-    
-    all_success = True
-    for i, target in enumerate(targets, 1):
-        try:
-            cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target} -- -max_total_time={timeout}"
-            success = run_command(
-                cmd,
-                f"Running target [{i}/{len(targets)}] {target} (timeout={timeout}s)",
-                logger,
-                allowed_exit_codes=[1, 124],
-                timeout=timeout + 300
-            )
-            all_success &= success
-            if not success:
-                logger.error(f"❌ Target failed: {target}")
-        except Exception as e:
-            logger.exception(f"💥 Unexpected error running target {target}: {e}")
-            all_success = False
-    
-    if all_success:
-        logger.info(f"✅ All targets completed successfully for {project_name}")
-    else:
-        logger.error(f"❌ One or more targets failed for {project_name}")
-    
-    # 清理日志处理器
-    for handler in logger.handlers[:]:
-        handler.close()
-        logger.removeHandler(handler)
-    
-    return (all_success, project_name)
-
-
-def main():
-    # 主进程日志配置
-    logging.basicConfig(
-        level=logging.INFO,
-        format="[%(levelname)s] %(message)s"
-    )
-    logger = logging.getLogger("Main")
-    
-    parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Testing Tool")
-    parser.add_argument("project_list", help="Project list file path")
-    parser.add_argument("--timeout", type=int, default=60, help="Timeout per target test (seconds)")
-    parser.add_argument("--workers", type=int, default=cpu_count())
-    args = parser.parse_args()
-    
-    logger.info(f"🚀 Starting parallel fuzzing (workers={args.workers}, timeout={args.timeout}s)")
-
-    try:
-        project_path = Path(args.project_list)
-        if not project_path.exists():
-            raise FileNotFoundError(f"Project list file not found: {project_path}")
-            
-        if not project_path.is_file():
-            raise ValueError(f"Path is not a file: {project_path}")
-            
-        with open(project_path, "r") as f:
-            projects = [line.strip() for line in f if line.strip()]
-            
-        logger.info(f"📋 Loaded {len(projects)} projects from {project_path}")
-    except FileNotFoundError as e:
-        logger.error(f"❌ {e}")
-        sys.exit(1)
-    except PermissionError as e:
-        logger.error(f"🔒 Permission denied: {e}")
-        sys.exit(1)
-    except (OSError, ValueError) as e:
-        logger.exception(f"💥 Error reading project list: {e}")
-        sys.exit(1)
-
-    with Pool(args.workers) as pool:
-        try:
-            # 使用starmap同步执行所有任务
-            final_results = pool.starmap(run_project, [(p, args.timeout) for p in projects])
-        except Exception as e:
-            logger.error(f"💥 Parallel execution failed: {e}")
-            # 出错时返回所有项目失败状态
-            final_results = [(False, p) for p in projects]
-
-    # 汇总结果
-    failed = [p for success, p in final_results if not success]
-    logger.info(f"\n📊 Testing completed: Success {len(projects)-len(failed)}/{len(projects)}")
-    if failed:
-        logger.error("❌ Failed projects: " + ", ".join(failed))
-
-
-if __name__ == "__main__":
-    try:
-        main()
-    except KeyboardInterrupt:
-        print("\n🛑 Operation cancelled by user")
-        sys.exit(1)
-    except Exception as e:
-        print(f"💥 Critical error in main: {e}")
-        sys.exit(1)
\ No newline at end of file

From b0f7b86fb789efbe9e2c047b07a3dae53e91f3f9 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Thu, 24 Jul 2025 00:54:25 +0000
Subject: [PATCH 040/134] correct

---
 fuzz/build_fuzz.py | 136 ++++++++++++++++++++++-----------------------
 1 file changed, 68 insertions(+), 68 deletions(-)

diff --git a/fuzz/build_fuzz.py b/fuzz/build_fuzz.py
index 87c5a44..51a1a79 100644
--- a/fuzz/build_fuzz.py
+++ b/fuzz/build_fuzz.py
@@ -8,13 +8,13 @@
 Supports three modes: 'image', 'fuzzer', or 'both'.
 
 Usage:
-  Build images: 
+  Build images:
     python3 build_fuzz.py --mode image [project_list] --oss-fuzz-dir /path/to/oss-fuzz
-  
-  Build fuzzers: 
+
+  Build fuzzers:
     python3 build_fuzz.py --mode fuzzer [project_list] --oss-fuzz-dir /path/to/oss-fuzz --image-results results.json
-  
-  Build both: 
+
+  Build both:
     python3 build_fuzz.py --mode both [project_list] --oss-fuzz-dir /path/to/oss-fuzz --sanitizer address
 
 Example:
@@ -32,13 +32,15 @@
 import json
 from pathlib import Path
 from returns.maybe import Maybe
+from multiprocessing import Pool
+from typing import Dict, List, Tuple
 
 # ========================================================================================
 # Custom Exceptions
 # ========================================================================================
 class BuildError(Exception):
     """Base exception for build failures"""
-    def __init__(self, message: str, project: str = "", exit_code: int = None):
+    def __init__(self, message: str, project: str = "", exit_code: int | None = None):
         super().__init__(message)
         self.project = project
         self.exit_code = exit_code
@@ -62,45 +64,42 @@ def run_command(
     cmd: str,
     oss_fuzz_dir: Path,
     project: str = "",
-    allowed_exit_codes: Maybe[list[int]] = Maybe.empty,
+    allowed_exit_codes: Maybe[List[int]] = Maybe.empty,
     skip_yes: bool = False
 ) -> int:
     """Execute a command and return the exit code"""
-    allowed_exit_codes = allowed_exit_codes.or_else([0])
+    allowed_codes = allowed_exit_codes.value_or([0])
     cmd_str = f"yes | {cmd}" if not skip_yes else cmd
     logging.debug(f"Executing command [{project}]: {cmd_str}")
-    
+
     try:
         process = subprocess.Popen(
-            cmd_str if skip_yes else f"yes | {cmd}",
+            cmd_str,
             shell=True,
             cwd=str(oss_fuzz_dir),
             stdout=subprocess.PIPE if skip_yes else subprocess.DEVNULL,
             stderr=subprocess.PIPE if skip_yes else subprocess.DEVNULL,
-            text=True if skip_yes else False
+            text=True
         )
-        
-        if skip_yes:
-            stdout, stderr = process.communicate()
-        else:
-            process.wait()
+
+        stdout, stderr = process.communicate()
         exit_code = process.returncode
-        
-        if exit_code in allowed_exit_codes:
+
+        if exit_code in allowed_codes:
             return exit_code
-            
+
         error_msg = f"Command failed (exit code: {exit_code})"
         if project:
             error_msg += f" for project: {project}"
-            
-        if skip_yes and stderr.strip():
+
+        if stderr and stderr.strip():
             error_msg += f"\nError output:\n{stderr.strip()}"
-            
-        if skip_yes and stdout.strip():
+
+        if stdout and stdout.strip():
             error_msg += f"\nOutput:\n{stdout.strip()}"
-            
+
         raise CommandError(error_msg, project=project, exit_code=exit_code)
-    
+
     except FileNotFoundError as e:
         raise CommandError(f"Command not found: {cmd.split()[0]}", project=project) from e
     except OSError as e:
@@ -111,26 +110,26 @@ def run_command(
 # ========================================================================================
 # Build Functions
 # ========================================================================================
-def build_image(project_name: str, oss_fuzz_dir: Path) -> tuple[bool, str]:
+def build_image(project_name: str, oss_fuzz_dir: Path) -> Tuple[bool, str]:
     """Docker image build workflow"""
     try:
         logging.info(f"Building Docker image: {project_name}")
-        
+
         # Validate paths
         helper_script = oss_fuzz_dir / "infra" / "helper.py"
         if not helper_script.exists():
             raise PathError(f"Missing helper script: {helper_script}", project=project_name)
-        
+
         # Execute image build command
         run_command(
             f"python3 infra/helper.py build_image {project_name}",
             oss_fuzz_dir,
             project=project_name
         )
-        
+
         logging.info(f"✅ Docker image built: {project_name}")
         return (True, project_name)
-    
+
     except CommandError as e:
         logging.error(f"❌ Docker build failed: {project_name} - {str(e)}")
         return (False, project_name)
@@ -138,16 +137,16 @@ def build_image(project_name: str, oss_fuzz_dir: Path) -> tuple[bool, str]:
         logging.error(f"💥 Unhandled exception: {project_name} - {repr(e)}")
         return (False, project_name)
 
-def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> tuple[bool, str]:
+def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> Tuple[bool, str]:
     """Fuzzer build workflow"""
     try:
         logging.info(f"Building fuzzers: {project_name} ({sanitizer} sanitizer)")
-        
+
         # Validate paths
         helper_script = oss_fuzz_dir / "infra" / "helper.py"
         if not helper_script.exists():
             raise PathError(f"Missing helper script: {helper_script}", project=project_name)
-        
+
         # Execute fuzzer build command
         run_command(
             f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}",
@@ -155,10 +154,10 @@ def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> tupl
             project=project_name,
             skip_yes=True
         )
-        
+
         logging.info(f"✅ Fuzzers built: {project_name}")
         return (True, project_name)
-    
+
     except BuildError as e:
         logging.error(f"❌ Fuzzer build failed: {project_name} - {str(e)}")
         return (False, project_name)
@@ -169,57 +168,57 @@ def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> tupl
 # ========================================================================================
 # Main Execution
 # ========================================================================================
-def load_projects(file_path: Path) -> list[str]:
+def load_projects(file_path: Path) -> List[str]:
     """Load project list from file"""
     if not file_path.exists():
         raise FileNotFoundError(f"Project list not found: {file_path}")
-    
+
     with open(file_path, "r", encoding="utf-8") as f:
         projects = [line.strip() for line in f if line.strip()]
-    
+
     if not projects:
         raise ConfigError("Project list is empty")
-    
-    logging.info(f"Loaded {len(projects)} projects from {file_path}")
+
+    logging.info(f"Loaded {len(projects)} projects from {file_path.name}")
     return projects
 
 def execute_builds(
     func,
-    args_list: list[tuple],
+    args_list: List[Tuple],
     worker_count: int,
     success_msg: str,
     failure_msg: str
-) -> tuple[dict[str, bool], list[str]]:
+) -> Tuple[Dict[str, bool], List[str]]:
     """Execute build tasks in parallel and return results"""
-    results = {}
+    results: Dict[str, bool] = {}
     with Pool(worker_count) as pool:
         for success, project in pool.starmap(func, args_list):
             results[project] = success
 
     failed = [p for p, success in results.items() if not success]
     success_count = len(results) - len(failed)
-    
+
     if failed:
         logging.error(f"\n❌ {failure_msg}: {len(failed)}/{len(results)} projects")
     logging.info(f"\n📊 {success_msg}: {success_count}/{len(results)} projects")
-    
+
     return results, failed
 
 def main():
     parser = argparse.ArgumentParser(description="OSS-Fuzz Build System")
     parser.add_argument("project_list", help="Project list file path")
-    parser.add_argument("--oss-fuzz-dir", required=True, type=str, 
+    parser.add_argument("--oss-fuzz-dir", required=True, type=str,
                         help="OSS-Fuzz directory path")
     parser.add_argument("--mode", choices=['image', 'fuzzer', 'both'], default='both',
                         help="Build mode: 'image', 'fuzzer', or 'both'")
     parser.add_argument("--workers", type=int, default=os.cpu_count(),
                         help="Number of parallel worker processes")
-    parser.add_argument("--sanitizer", default="address", 
+    parser.add_argument("--sanitizer", default="address",
                         choices=["address", "memory", "undefined"],
                         help="Fuzzer sanitizer type")
     parser.add_argument("--image-results", default="image_build_results.json",
                         help="Image build results file (JSON)")
-    parser.add_argument("--log-level", default="INFO", 
+    parser.add_argument("--log-level", default="INFO",
                         choices=["DEBUG", "INFO", "WARNING", "ERROR"],
                         help="Logging detail level")
     args = parser.parse_args()
@@ -229,16 +228,16 @@ def main():
         level=getattr(logging, args.log_level),
         format='[%(levelname)s] [PID:%(process)d] %(message)s'
     )
-    
+
     oss_fuzz_dir = Path(args.oss_fuzz_dir).resolve()
     project_file = Path(args.project_list).resolve()
     output_file = Path(args.image_results)
-    
+
     # Sanity checks
     if not oss_fuzz_dir.exists():
         logging.critical(f"OSS-Fuzz directory not found: {oss_fuzz_dir}")
         sys.exit(1)
-    
+
     # Load projects
     try:
         projects = load_projects(project_file)
@@ -247,12 +246,12 @@ def main():
         sys.exit(1)
 
     # Image building workflow
-    image_results = {}
+    image_results: Dict[str, bool] = {}
     if args.mode in ['image', 'both']:
         logging.info("\n" + "="*60)
         logging.info(f"Starting Docker image builds for {len(projects)} projects")
         logging.info("="*60 + "\n")
-        
+
         image_args = [(p, oss_fuzz_dir) for p in projects]
         image_results, image_failures = execute_builds(
             build_image,
@@ -261,22 +260,23 @@ def main():
             "✅ Docker image builds succeeded",
             "🚫 Docker image builds failed"
         )
-        
+
         # Save image build results
         try:
             with output_file.open("w") as f:
-                json.dump(image_results, f)
+                json.dump(image_results, f, indent=4)
             logging.info(f"💾 Image build results saved to: {output_file}")
         except Exception as e:
             logging.error(f"❌ Failed to save image results: {e}")
-    
+
     # Fuzzer building workflow
-    fuzzer_results = {}
+    fuzzer_results: Dict[str, bool] = {}
+    fuzz_projects: List[str] = []
     if args.mode in ['fuzzer', 'both']:
         logging.info("\n" + "="*60)
-        logging.info(f"Starting fuzzer builds for {len(projects)} projects ({args.sanitizer} sanitizer)")
+        logging.info(f"Starting fuzzer builds ({args.sanitizer} sanitizer)")
         logging.info("="*60 + "\n")
-        
+
         # Load image results for fuzzer mode
         if args.mode == 'fuzzer':
             try:
@@ -286,13 +286,15 @@ def main():
             except Exception as e:
                 logging.critical(f"❌ Failed to load image results: {e}")
                 sys.exit(1)
-        
+
         # Filter projects with successful image builds
         fuzz_projects = [p for p in projects if image_results.get(p, False)]
         if not fuzz_projects:
-            logging.critical("❌ No projects with successful image builds")
-            sys.exit(1)
+            logging.critical("❌ No projects with successful image builds to fuzz.")
+            sys.exit(0)
         
+        logging.info(f"Attempting to build fuzzers for {len(fuzz_projects)} projects with successful image builds.")
+
         fuzzer_args = [(p, args.sanitizer, oss_fuzz_dir) for p in fuzz_projects]
         fuzzer_results, fuzzer_failures = execute_builds(
             build_fuzzers,
@@ -301,19 +303,17 @@ def main():
             "✅ Fuzzer builds succeeded",
             "🚫 Fuzzer builds failed"
         )
-    
+
     # Final summary
     logging.info("\n" + "="*60)
     logging.info("Build Summary")
     logging.info("="*60)
-    
+
     if args.mode in ['image', 'both']:
         image_success = sum(1 for r in image_results.values() if r)
         logging.info(f"📦 Docker Images: {image_success}/{len(projects)} succeeded")
-    
-    if args.mode in ['fuzzer', 'both']:
-        if args.mode == 'both':
-            fuzz_projects = list(fuzzer_results.keys())
+
+    if args.mode in ['fuzzer', 'both'] and fuzz_projects:
         fuzzer_success = sum(1 for r in fuzzer_results.values() if r)
         logging.info(f"🔧 Fuzzers: {fuzzer_success}/{len(fuzz_projects)} succeeded")
 

From 19fa83e2a68074b96ecc8f31646b356edd5db2aa Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Thu, 24 Jul 2025 07:08:44 +0000
Subject: [PATCH 041/134] original

---
 fuzz/run_fuzz_all_targets_input.py | 278 +++++++++++++++++++++++++++++
 1 file changed, 278 insertions(+)
 create mode 100644 fuzz/run_fuzz_all_targets_input.py

diff --git a/fuzz/run_fuzz_all_targets_input.py b/fuzz/run_fuzz_all_targets_input.py
new file mode 100644
index 0000000..74bed97
--- /dev/null
+++ b/fuzz/run_fuzz_all_targets_input.py
@@ -0,0 +1,278 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+run_fuzz_all_targets.py
+
+This script employs a two-phase approach for fuzz testing:
+1. Discovery phase: Traverse all specified projects to collect all executable fuzz targets in each project
+2. Execution phase: Create a task pool containing all (project, target) pairs and execute them in parallel using multiprocessing
+
+This approach maximizes CPU utilization and provides clear overall progress[2](@ref).
+
+Usage: python3 run_fuzz_all_targets.py [project_list_file] [--timeout seconds] [--workers N]
+Example: python3 fuzz/run_fuzz_all_targets.py data/valid_projects.txt --timeout 60 --workers 4
+"""
+
+import os
+import sys
+import subprocess
+import argparse
+import logging
+import time
+from datetime import datetime
+from pathlib import Path
+from multiprocessing import Pool, cpu_count
+from returns.maybe import Maybe, Nothing, Some
+
+# --- Global configuration ---
+HOME_DIR = Path.home()
+OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz"
+LOG_DIR = OSS_FUZZ_DIR / "run_logs2"
+
+
+def run_command(
+    cmd: str,
+    log_msg: str,
+    logger: logging.Logger,
+    allowed_exit_codes: Maybe[list[int]] = Nothing,
+    timeout: int = 3600  # Default 1-hour timeout
+) -> bool:
+    """Execute commands with real-time logging and precise error handling"""
+    allowed_codes = allowed_exit_codes.value_or([])
+    logger.info(f"▶️ {log_msg}...")
+    logger.debug(f"   $ {cmd}")
+
+    process = None
+    try:
+        process = subprocess.Popen(
+            cmd,
+            shell=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True,
+            encoding="utf-8",
+            errors="replace"
+        )
+
+        start_time = time.time()
+        while process.poll() is None:
+            if time.time() - start_time > timeout:
+                logger.error(f"⌛ Command timed out after {timeout} seconds")
+                process.terminate()
+                try:
+                    process.wait(timeout=5)
+                except subprocess.TimeoutExpired:
+                    process.kill()
+                return False
+
+            if process.stdout:
+                line = process.stdout.readline()
+                if line:
+                    logger.debug(line.strip())
+            else:
+                time.sleep(0.1)
+
+        exit_code = process.returncode
+        if exit_code not in [0, *allowed_codes]:
+            logger.error(f"❌ Command execution failed, exit code: {exit_code}")
+            return False
+        return True
+
+    except FileNotFoundError:
+        logger.error(f"🔍 Command not found: {cmd.split()[0]}")
+        return False
+    except PermissionError:
+        logger.error(f"🔒 Insufficient permissions to execute command: {cmd}")
+        return False
+    except subprocess.SubprocessError as e:
+        logger.exception(f"💥 Subprocess error: {e}")
+        return False
+    except OSError as e:
+        logger.exception(f"💥 Operating system error during command execution: {e}")
+        return False
+    finally:
+        if process and process.poll() is None:
+            try:
+                process.terminate()
+                process.wait(timeout=5)
+            except Exception:
+                pass
+
+
+def discover_targets(project_name: str, logger: logging.Logger) -> list[str]:
+    """Discover fuzz targets for a project (starting with 'fuzz_', no extension, and executable)"""
+    out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name
+    targets: list[str] = []
+
+    if not out_dir.is_dir():
+        logger.warning(f"Build output directory for project {project_name} does not exist: {out_dir}")
+        return targets
+
+    try:
+        for f in out_dir.iterdir():
+            try:
+                if (f.is_file() and
+                        f.name.startswith("fuzz_") and
+                        '.' not in f.name and
+                        os.access(f, os.X_OK)):
+                    targets.append(f.name)
+            except OSError as e:
+                logger.warning(f"⚠️ Error while checking file {f.name}, skipped: {e}")
+
+    except PermissionError:
+        logger.error(f"🔒 Insufficient permissions to access directory: {out_dir}")
+    except OSError as e:
+        logger.exception(f"💥 Operating system error occurred while discovering targets: {e}")
+
+    return targets
+
+
+def run_single_target(project_name: str, target_name: str, timeout: int) -> tuple[bool, str, str]:
+    """Execute fuzz testing workflow for a single (project, target) pair"""
+    task_id = f"{project_name}_{target_name}"
+    logger = logging.getLogger(task_id)
+
+    try:
+        logger.setLevel(logging.DEBUG)
+        LOG_DIR.mkdir(parents=True, exist_ok=True)
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        log_file = LOG_DIR / f"run_{task_id}_{timestamp}.log"
+        file_handler = logging.FileHandler(log_file, encoding="utf-8")
+        formatter = logging.Formatter(
+            "%(asctime)s [PID:%(process)d] %(levelname)s: %(message)s",
+            datefmt="%Y-%m-%d %H:%M:%S"
+        )
+        file_handler.setFormatter(formatter)
+        logger.addHandler(file_handler)
+        os.chdir(OSS_FUZZ_DIR)
+
+    except (OSError, PermissionError) as e:
+        print(f"❌ Critical error occurred during initialization of task {task_id}: {e}")
+        return False, project_name, target_name
+
+    logger.info(f"🚀 Starting test -> Project: {project_name}, Target: {target_name}")
+    try:
+        cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}"
+        success = run_command(
+            cmd,
+            f"Running Target '{target_name}' (timeout={timeout}s)",
+            logger,
+            allowed_exit_codes=Some([1, 124]),  # 1=Crashes found, 124=Timeout
+            timeout=timeout + 300
+        )
+
+        if success:
+            logger.info(f"✅ Target '{target_name}' completed successfully.")
+        else:
+            logger.error(f"❌ Target '{target_name}' failed.")
+
+        return success, project_name, target_name
+
+    except Exception as e:
+        logger.exception(f"💥 Unexpected error occurred while running target '{target_name}': {e}")
+        return False, project_name, target_name
+    finally:
+        for handler in logger.handlers[:]:
+            handler.close()
+            logger.removeHandler(handler)
+
+
+def main():
+    # Configure main process logging
+    logging.basicConfig(
+        level=logging.INFO,
+        format="[%(levelname)s] %(message)s",
+        stream=sys.stdout
+    )
+    logger = logging.getLogger("Main")
+
+    # Set up command line argument parsing
+    parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Fuzz Testing Tool")
+    parser.add_argument("project_list", help="File path containing list of project names")
+    parser.add_argument("--timeout", type=int, default=60, help="Timeout duration per Fuzz Target (seconds)")
+    parser.add_argument("--workers", type=int, default=cpu_count(), help="Number of parallel worker processes")
+    args = parser.parse_args()
+
+    # 1. Read project list file
+    try:
+        project_path = Path(args.project_list)
+        with open(project_path, "r", encoding="utf-8") as f:
+            projects = [line.strip() for line in f if line.strip()]
+        logger.info(f"📋 Loaded {len(projects)} projects from {project_path.name}.")
+    except FileNotFoundError:
+        logger.error(f"❌ Project list file not found: {args.project_list}")
+        sys.exit(1)
+    except (OSError, PermissionError) as e:
+        logger.exception(f"💥 Error occurred while reading project list: {e}")
+        sys.exit(1)
+
+    # 2. Discovery phase: Collect all fuzz targets
+    logger.info("\n" + "=" * 20 + " Phase 1: Discover all Fuzz Targets " + "=" * 20)
+    all_fuzz_tasks: list[tuple[str, str]] = []  # Store (project, target) tuples
+    try:
+        original_cwd = Path.cwd()  # Save current working directory
+        os.chdir(OSS_FUZZ_DIR)     # Switch to OSS-Fuzz directory
+        for project_name in projects:
+            targets = discover_targets(project_name, logger)
+            if targets:
+                logger.info(f"🔍 Discovered {len(targets)} targets in project '{project_name}': {', '.join(targets)}")
+                for target in targets:
+                    all_fuzz_tasks.append((project_name, target))
+            else:
+                logger.warning(f"⚠️ No Fuzz Targets found in project '{project_name}'.")
+        os.chdir(original_cwd)  # Restore original working directory
+    except FileNotFoundError:
+        logger.error(f"❌ OSS-Fuzz directory does not exist: {OSS_FUZZ_DIR}")
+        sys.exit(1)
+    except Exception as e:
+        logger.exception(f"💥 Unknown error occurred during discovery phase: {e}")
+        sys.exit(1)
+
+    # Check if any valid targets were found
+    if not all_fuzz_tasks:
+        logger.info("🤷 No executable Fuzz Targets found. Program exits.")
+        sys.exit(0)
+
+    # 3. Execution phase: Parallel fuzz testing
+    logger.info(f"\n✅ Discovery phase completed. Found a total of {len(all_fuzz_tasks)} fuzz tasks.")
+    logger.info("=" * 20 + " Phase 2: Parallel Fuzzing Execution " + "=" * 23)
+    logger.info(f"🚀 Starting parallel testing with {args.workers} worker processes (timeout per target: {args.timeout}s)...")
+
+    # Prepare task parameters (project, target, timeout)
+    tasks_with_args = [(p, t, args.timeout) for p, t in all_fuzz_tasks]
+    results: list[tuple[bool, str, str]] = []  # Store results (success, project, target)
+    
+    # Execute in parallel using process pool
+    with Pool(args.workers) as pool:
+        try:
+            results = pool.starmap(run_single_target, tasks_with_args)
+        except Exception as e:
+            logger.error(f"💥 Critical error occurred during parallel execution: {e}")
+            pool.terminate()
+            pool.join()
+
+    # 4. Result summary and reporting
+    logger.info("\n" + "=" * 20 + " Phase 3: Result Summary " + "=" * 28)
+    failed_tasks = [(p, t) for success, p, t in results if not success]  # List of failed tasks
+    total_tasks = len(all_fuzz_tasks)
+    failed_count = len(failed_tasks)
+    success_count = total_tasks - failed_count
+
+    # Output statistical summary
+    logger.info(f"📊 Fuzzing completed: Success {success_count}/{total_tasks}, Failed {failed_count}/{total_tasks}")
+    if failed_tasks:
+        logger.error("❌ The following Fuzz Targets failed:")
+        for project, target in failed_tasks:
+            logger.error(f"  - Project: {project}, Target: {target}")  # List detailed failures
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except KeyboardInterrupt:
+        print("\n🛑 Operation interrupted by user.")
+        sys.exit(1)
+    except Exception as e:
+        print(f"\n💥 Fatal error in main program: {e}")
+        sys.exit(1)
\ No newline at end of file

From 47cf6e9f282d622e6ddfe821b3a4b71419685308 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Thu, 24 Jul 2025 07:11:26 +0000
Subject: [PATCH 042/134] record input

---
 fuzz/run_fuzz_all_targets_input.py | 198 ++++++++++++++++++++++++++---
 image_build_results.json           |   2 +-
 2 files changed, 182 insertions(+), 18 deletions(-)

diff --git a/fuzz/run_fuzz_all_targets_input.py b/fuzz/run_fuzz_all_targets_input.py
index 74bed97..631f3e6 100644
--- a/fuzz/run_fuzz_all_targets_input.py
+++ b/fuzz/run_fuzz_all_targets_input.py
@@ -2,16 +2,23 @@
 # -*- coding: utf-8 -*-
 
 """
-run_fuzz_all_targets.py
+run_fuzz_all_targets_input.py
+
+Enhanced with input instrumentation to capture fuzzing inputs.
 
 This script employs a two-phase approach for fuzz testing:
 1. Discovery phase: Traverse all specified projects to collect all executable fuzz targets in each project
 2. Execution phase: Create a task pool containing all (project, target) pairs and execute them in parallel using multiprocessing
+3. Input capture: Instrument fuzz targets to record all inputs during fuzzing
 
-This approach maximizes CPU utilization and provides clear overall progress[2](@ref).
+Key Enhancements:
+- Added input instrumentation to capture fuzzing inputs
+- Created dedicated input storage directory structure
+- Added AST-based function instrumentation
+- Added input recording and analysis capabilities
 
-Usage: python3 run_fuzz_all_targets.py [project_list_file] [--timeout seconds] [--workers N]
-Example: python3 fuzz/run_fuzz_all_targets.py data/valid_projects.txt --timeout 60 --workers 4
+Usage: python3 run_fuzz_all_targets_input [project_list_file] [--timeout seconds] [--workers N] [--record-inputs]
+Example: python3 fuzz/run_fuzz_all_targets_input.py data/valid_projects.txt --timeout 60 --workers 4 --record-inputs
 """
 
 import os
@@ -20,25 +27,144 @@
 import argparse
 import logging
 import time
+import ast
+import astor
+import shutil
 from datetime import datetime
 from pathlib import Path
 from multiprocessing import Pool, cpu_count
 from returns.maybe import Maybe, Nothing, Some
+from typing import Optional, List, Tuple
 
 # --- Global configuration ---
 HOME_DIR = Path.home()
 OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz"
 LOG_DIR = OSS_FUZZ_DIR / "run_logs2"
+INPUT_DIR = OSS_FUZZ_DIR / "fuzz_inputs"  # Directory to store captured inputs
+
+class FunctionInstrumenter(ast.NodeTransformer):
+    """AST transformer to instrument function entries for input recording"""
+    def visit_FunctionDef(self, node):
+        """Instrument function definition to add input recording"""
+        # Add print statement at the beginning of the function
+        input_record_stmt = ast.Expr(
+            value=ast.Call(
+                func=ast.Name(id='print', ctx=ast.Load()),
+                args=[ast.Constant(value=f"INPUT_CAPTURE: {node.name} received input: {{data}}")],
+                keywords=[]
+            )
+        )
+        
+        # Insert the print statement at the top of the function body
+        if node.body:
+            node.body.insert(0, input_record_stmt)
+        
+        return node
+
+def instrument_code(source_code: str, target_function: str) -> str:
+    """
+    Instrument source code to record inputs for specific function
+    
+    Args:
+        source_code: Original source code
+        target_function: Name of the function to instrument
+        
+    Returns:
+        Instrumented source code
+    """
+    try:
+        # Parse the source code into an AST
+        tree = ast.parse(source_code)
+        
+        # Create instrumenter and apply transformations
+        instrumenter = FunctionInstrumenter()
+        modified_tree = instrumenter.visit(tree)
+        
+        # Add missing location information for generated nodes
+        ast.fix_missing_locations(modified_tree)
+        
+        # Generate the modified source code
+        return astor.to_source(modified_tree)
+    except Exception as e:
+        logging.error(f"🔧 Code instrumentation failed: {str(e)}")
+        return source_code  # Return original if instrumentation fails
 
+def prepare_target_for_input_capture(project_name: str, target_name: str) -> Path:
+    """
+    Prepare a fuzz target for input capture by instrumenting its code
+    
+    Args:
+        project_name: Name of the project
+        target_name: Name of the target to instrument
+        
+    Returns:
+        Path to the instrumented target executable
+    """
+    try:
+        # Create project-specific input directory
+        project_input_dir = INPUT_DIR / project_name
+        project_input_dir.mkdir(parents=True, exist_ok=True)
+        
+        # Create target-specific input directory
+        target_input_dir = project_input_dir / target_name
+        target_input_dir.mkdir(exist_ok=True)
+        
+        logging.info(f"📁 Created input directory: {target_input_dir}")
+        
+        # Original target path
+        original_target = OSS_FUZZ_DIR / "build" / "out" / project_name / target_name
+        
+        # Backup original target
+        backup_target = original_target.with_name(f"{target_name}_original")
+        if not backup_target.exists():
+            shutil.copy2(original_target, backup_target)
+        
+        # Read target source code (simplified for demonstration)
+        # In a real implementation, we'd need to locate the actual source files
+        # This is a placeholder to demonstrate the instrumentation concept
+        source_file = OSS_FUZZ_DIR / "projects" / project_name / "fuzzers" / f"{target_name}.c"
+        
+        if source_file.exists():
+            with open(source_file, "r") as f:
+                source_code = f.read()
+            
+            # Instrument the code
+            instrumented_code = instrument_code(source_code, "LLVMFuzzerTestOneInput")
+            
+            # Write instrumented code to a new file
+            instrumented_file = source_file.with_name(f"{target_name}_instrumented.c")
+            with open(instrumented_file, "w") as f:
+                f.write(instrumented_code)
+            
+            logging.info(f"🔧 Instrumented {target_name} for input capture")
+            
+            # Rebuild the target with instrumented code
+            # This step is simplified - in reality would use OSS-Fuzz build system
+            rebuild_cmd = f"python3 infra/helper.py build_fuzzers --sanitizer address {project_name}"
+            run_command(
+                rebuild_cmd,
+                f"Rebuilding {target_name} with instrumentation",
+                logging.getLogger("Main"),
+                timeout=1200
+            )
+            
+            return original_target
+        else:
+            logging.warning(f"⚠️ Source file not found for instrumentation: {source_file}")
+            return original_target
+    except Exception as e:
+        logging.error(f"❌ Failed to instrument {target_name}: {str(e)}")
+        return OSS_FUZZ_DIR / "build" / "out" / project_name / target_name
 
 def run_command(
     cmd: str,
     log_msg: str,
     logger: logging.Logger,
     allowed_exit_codes: Maybe[list[int]] = Nothing,
-    timeout: int = 3600  # Default 1-hour timeout
+    timeout: int = 3600,  # Default 1-hour timeout
+    env: Optional[dict] = None  # Added env parameter for input capture
 ) -> bool:
-    """Execute commands with real-time logging and precise error handling"""
+    """Execute commands with real-time logging, precise error handling, and input capture"""
     allowed_codes = allowed_exit_codes.value_or([])
     logger.info(f"▶️ {log_msg}...")
     logger.debug(f"   $ {cmd}")
@@ -52,7 +178,8 @@ def run_command(
             stderr=subprocess.STDOUT,
             text=True,
             encoding="utf-8",
-            errors="replace"
+            errors="replace",
+            env=env  # Pass environment variables
         )
 
         start_time = time.time()
@@ -69,7 +196,11 @@ def run_command(
             if process.stdout:
                 line = process.stdout.readline()
                 if line:
-                    logger.debug(line.strip())
+                    # Capture input data when detected
+                    if "INPUT_CAPTURE:" in line:
+                        logger.debug(f"📥 {line.strip()}")
+                    else:
+                        logger.debug(line.strip())
             else:
                 time.sleep(0.1)
 
@@ -99,7 +230,6 @@ def run_command(
             except Exception:
                 pass
 
-
 def discover_targets(project_name: str, logger: logging.Logger) -> list[str]:
     """Discover fuzz targets for a project (starting with 'fuzz_', no extension, and executable)"""
     out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name
@@ -127,9 +257,8 @@ def discover_targets(project_name: str, logger: logging.Logger) -> list[str]:
 
     return targets
 
-
-def run_single_target(project_name: str, target_name: str, timeout: int) -> tuple[bool, str, str]:
-    """Execute fuzz testing workflow for a single (project, target) pair"""
+def run_single_target(project_name: str, target_name: str, timeout: int, record_inputs: bool = False) -> tuple[bool, str, str]:
+    """Execute fuzz testing workflow for a single (project, target) pair with input capture"""
     task_id = f"{project_name}_{target_name}"
     logger = logging.getLogger(task_id)
 
@@ -152,6 +281,23 @@ def run_single_target(project_name: str, target_name: str, timeout: int) -> tupl
         return False, project_name, target_name
 
     logger.info(f"🚀 Starting test -> Project: {project_name}, Target: {target_name}")
+    
+    # Prepare environment for input capture if requested
+    env = None
+    if record_inputs:
+        # Prepare target for input capture
+        target_path = prepare_target_for_input_capture(project_name, target_name)
+        logger.info(f"🔧 Instrumented {target_name} for input capture")
+        
+        # Create input directory for this run
+        input_dir = INPUT_DIR / project_name / target_name / timestamp
+        input_dir.mkdir(parents=True, exist_ok=True)
+        
+        # Set environment variable for input storage
+        env = os.environ.copy()
+        env["FUZZ_INPUT_DIR"] = str(input_dir)
+        logger.info(f"📁 Inputs will be stored in: {input_dir}")
+
     try:
         cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}"
         success = run_command(
@@ -159,7 +305,8 @@ def run_single_target(project_name: str, target_name: str, timeout: int) -> tupl
             f"Running Target '{target_name}' (timeout={timeout}s)",
             logger,
             allowed_exit_codes=Some([1, 124]),  # 1=Crashes found, 124=Timeout
-            timeout=timeout + 300
+            timeout=timeout + 300,
+            env=env  # Pass environment for input capture
         )
 
         if success:
@@ -177,6 +324,12 @@ def run_single_target(project_name: str, target_name: str, timeout: int) -> tupl
             handler.close()
             logger.removeHandler(handler)
 
+        # Log input capture results
+        if record_inputs and env:
+            input_dir = Path(env["FUZZ_INPUT_DIR"])
+            if input_dir.exists():
+                input_count = len(list(input_dir.glob("*.bin")))
+                logger.info(f"📥 Captured {input_count} inputs for {target_name}")
 
 def main():
     # Configure main process logging
@@ -188,10 +341,11 @@ def main():
     logger = logging.getLogger("Main")
 
     # Set up command line argument parsing
-    parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Fuzz Testing Tool")
+    parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Fuzz Testing Tool with Input Capture")
     parser.add_argument("project_list", help="File path containing list of project names")
     parser.add_argument("--timeout", type=int, default=60, help="Timeout duration per Fuzz Target (seconds)")
     parser.add_argument("--workers", type=int, default=cpu_count(), help="Number of parallel worker processes")
+    parser.add_argument("--record-inputs", action="store_true", help="Enable input capture during fuzzing")
     args = parser.parse_args()
 
     # 1. Read project list file
@@ -234,13 +388,17 @@ def main():
         logger.info("🤷 No executable Fuzz Targets found. Program exits.")
         sys.exit(0)
 
-    # 3. Execution phase: Parallel fuzz testing
+    # 3. Execution phase: Parallel fuzz testing with input capture
     logger.info(f"\n✅ Discovery phase completed. Found a total of {len(all_fuzz_tasks)} fuzz tasks.")
+    if args.record_inputs:
+        logger.info("🔔 Input capture is ENABLED. All fuzzing inputs will be recorded.")
+        logger.info(f"📁 Inputs will be stored in: {INPUT_DIR}")
+    
     logger.info("=" * 20 + " Phase 2: Parallel Fuzzing Execution " + "=" * 23)
     logger.info(f"🚀 Starting parallel testing with {args.workers} worker processes (timeout per target: {args.timeout}s)...")
 
-    # Prepare task parameters (project, target, timeout)
-    tasks_with_args = [(p, t, args.timeout) for p, t in all_fuzz_tasks]
+    # Prepare task parameters (project, target, timeout, record_inputs)
+    tasks_with_args = [(p, t, args.timeout, args.record_inputs) for p, t in all_fuzz_tasks]
     results: list[tuple[bool, str, str]] = []  # Store results (success, project, target)
     
     # Execute in parallel using process pool
@@ -261,6 +419,12 @@ def main():
 
     # Output statistical summary
     logger.info(f"📊 Fuzzing completed: Success {success_count}/{total_tasks}, Failed {failed_count}/{total_tasks}")
+    
+    if args.record_inputs:
+        total_inputs = sum(len(list((INPUT_DIR / p / t).glob("*/*.bin"))) for p, t in all_fuzz_tasks)
+        logger.info(f"📥 Total inputs captured: {total_inputs}")
+        logger.info(f"💾 Inputs stored at: {INPUT_DIR}")
+    
     if failed_tasks:
         logger.error("❌ The following Fuzz Targets failed:")
         for project, target in failed_tasks:
diff --git a/image_build_results.json b/image_build_results.json
index 93d383e..4af9787 100644
--- a/image_build_results.json
+++ b/image_build_results.json
@@ -1 +1 @@
-{"abseil-py": true, "adal": true, "aiohttp": true, "aniso8601": true, "ansible": true, "argcomplete": true, "arrow-py": true, "asn1crypto": true, "asteval": true, "astroid": true, "asttokens": true, "attrs": true, "autoflake": true, "autopep8": true, "azure-sdk-for-python": true, "babel": true, "black": true, "botocore": true, "bottleneck": true, "bz2file": true, "cachetools": true, "cffi": true, "chardet": true, "charset_normalizer": true, "click": true, "cloud-custodian": true, "configparser": true, "connexion": true, "coveragepy": true, "croniter": true, "cryptography": true, "cssselect": true, "dask": true, "decorator": true, "defusedxml": true, "digest": true, "dill": true, "distlib": true, "dnspython": true, "docutils": true, "ecdsa-python": true, "et-xmlfile": true, "face": true, "filelock": true, "filesystem_spec": true, "flask": true, "flask-jwt-extended": true, "flask-restx": true, "flask-wtf": true, "fonttools": true, "ftfy": true, "g-api-auth-httplib2": true, "g-api-auth-library-python": true, "g-api-pubsub": true, "g-api-py-api-common-protos": true, "g-api-py-oauthlib": true, "g-api-python-bigquery-storage": true, "g-api-python-client": true, "g-api-python-cloud-core": true, "g-api-python-firestore": true, "g-api-python-tasks": true, "g-api-resource-manager": true, "g-api-resumable-media-python": true, "g-api-secret-manager": true, "g-apis-py-api-core": true, "gast": true, "gc-iam": true, "gcloud-error-py": true, "g-cloud-logging-py": true, "gcp-python-cloud-storage": true, "genshi": true, "gitdb": true, "glom": true, "gprof2dot": true, "g-py-bigquery": true, "g-py-crc32c": true, "grpc-py": true, "gunicorn": true, "h11": true, "h5py": true, "hiredis-py": true, "html2text": true, "html5lib-python": true, "httpcore": true, "httpretty": false, "httpx": true, "idna": true, "ijson": true, "importlib_metadata": true, "iniconfig": true, "ipaddress": true, "ipykernel": true, "ipython": true, "isodate": true, "itsdangerous": true, "jedi": true, "jinja2": true, "jmespathpy": true, "joblib": true, "jsmin": true, "jupyter-nbconvert": true, "jupyter_server": true, "kafka": true, "keras": false, "kiwisolver": true, "lark-parser": true, "libcst": true, "looker-sdk": true, "lxml": true, "mako": true, "markupsafe": true, "matplotlib": true, "mccabe": true, "mdit-py-plugins": true, "mdurl": true, "more-itertools": false, "mrab-regex": true, "msal": true, "msgpack-python": true, "multidict": true, "mutagen": true, "nbclassic": true, "nbformat": true, "netaddr-py": true, "networkx": true, "ntlm2": true, "ntlm-auth": true, "numexpr": true, "numpy": true, "oauth2": true, "oauthlib": true, "olefile": true, "openapi-schema-validator": true, "opencensus-python": true, "openpyxl": true, "opt_einsum": true, "oracle-py-cx": true, "orjson": true, "oscrypto": true, "packaging": true, "pandas": true, "paramiko": true, "parse": true, "parsimonious": true, "pasta": true, "pathlib2": true, "pdoc": true, "pem": true, "pendulum": true, "pip": true, "ply": true, "protobuf-python": true, "proto-plus-python": true, "psqlparse": true, "psutil": true, "psycopg2": true, "pyasn1": true, "pyasn1-modules": true, "pycparser": true, "pycrypto": true, "pydantic": true, "pydateutil": true, "pygments": true, "pyjson5": true, "pyjwt": true, "pymysql": true, "pynacl": true, "pyodbc": false, "pyparsing": false, "pyrsistent": true, "py-serde": true, "pytables": true, "pytest-py": true, "python3-openid": true, "python-ecdsa": true, "python-email-validator": true, "python-fastjsonschema": true, "python-future": true, "python-graphviz": true, "python-hyperlink": true, "python-jose": true, "python-lz4": true, "python-markdown": true, "python-markdownify": true, "python-nameparser": true, "python-nvd3": true, "python-pathspec": true, "python-prompt-toolkit": true, "python-pypdf": true, "python-rison": true, "python-rsa": true, "python-tabulate": true, "pytz": true, "pyxdg": true, "pyyaml": true, "pyzmq": true, "redis-py": true, "requests": true, "retry": true, "rfc3967": true, "rich": true, "sacremoses": true, "scikit-learn": true, "scipy": true, "setuptools": true, "sigstore-python": true, "simplejson": true, "six": true, "smart_open": true, "soupsieve": true, "sqlalchemy_jsonfield": true, "sqlalchemy-utils": true, "sqlparse": true, "stack_data": true, "tensorflow-addons": false, "tinycss2": true, "toml": true, "tomlkit": true, "toolbelt": true, "toolz": true, "tqdm": true, "typing_extensions": true, "underscore": true, "uritemplate": true, "urlextract": true, "urllib3": true, "validators": true, "w3lib": true, "websocket-client": true, "wheel": true, "wtforms": true, "xlrd": true, "yarl": true, "zipp": true}
\ No newline at end of file
+{"abseil-py": true, "adal": true, "aiohttp": true, "aniso8601": true, "ansible": true, "argcomplete": true, "arrow-py": true, "asn1crypto": true, "asteval": true, "astroid": true, "asttokens": true, "attrs": true, "autoflake": true, "autopep8": true, "azure-sdk-for-python": true, "babel": true, "black": true, "botocore": true, "bottleneck": true, "bz2file": true, "cachetools": true, "cffi": true, "chardet": true, "charset_normalizer": true, "click": true, "cloud-custodian": true, "configparser": true, "connexion": true, "coveragepy": true, "croniter": true, "cryptography": true, "cssselect": true, "dask": true, "decorator": true, "defusedxml": true, "digest": true, "dill": true, "distlib": true, "dnspython": true, "docutils": true, "ecdsa-python": true, "et-xmlfile": true, "face": true, "filelock": true, "filesystem_spec": true, "flask": true, "flask-jwt-extended": true, "flask-restx": true, "flask-wtf": true, "fonttools": true, "ftfy": true, "g-api-auth-httplib2": true, "g-api-auth-library-python": true, "g-api-pubsub": true, "g-api-py-api-common-protos": true, "g-api-py-oauthlib": true, "g-api-python-bigquery-storage": true, "g-api-python-client": true, "g-api-python-cloud-core": true, "g-api-python-firestore": true, "g-api-python-tasks": true, "g-api-resource-manager": true, "g-api-resumable-media-python": true, "g-api-secret-manager": true, "g-apis-py-api-core": true, "gast": true, "gc-iam": true, "gcloud-error-py": true, "g-cloud-logging-py": true, "gcp-python-cloud-storage": true, "genshi": true, "gitdb": true, "glom": true, "gprof2dot": true, "g-py-bigquery": true, "g-py-crc32c": true, "grpc-py": true, "gunicorn": true, "h11": true, "h5py": true, "hiredis-py": true, "html2text": true, "html5lib-python": true, "httpcore": true, "httpretty": true, "httpx": true, "idna": true, "ijson": true, "importlib_metadata": true, "iniconfig": true, "ipaddress": true, "ipykernel": true, "ipython": true, "isodate": true, "itsdangerous": true, "jedi": true, "jinja2": true, "jmespathpy": true, "joblib": true, "jsmin": true, "jupyter-nbconvert": true, "jupyter_server": true, "kafka": true, "keras": false, "kiwisolver": true, "lark-parser": true, "libcst": true, "looker-sdk": true, "lxml": true, "mako": true, "markupsafe": true, "matplotlib": true, "mccabe": true, "mdit-py-plugins": true, "mdurl": true, "more-itertools": true, "mrab-regex": true, "msal": true, "msgpack-python": true, "multidict": true, "mutagen": true, "nbclassic": true, "nbformat": true, "netaddr-py": true, "networkx": true, "ntlm2": true, "ntlm-auth": true, "numexpr": true, "numpy": true, "oauth2": true, "oauthlib": true, "olefile": true, "openapi-schema-validator": true, "opencensus-python": true, "openpyxl": true, "opt_einsum": true, "oracle-py-cx": true, "orjson": true, "oscrypto": true, "packaging": true, "pandas": true, "paramiko": true, "parse": true, "parsimonious": true, "pasta": true, "pathlib2": true, "pdoc": true, "pem": true, "pendulum": true, "pip": true, "ply": true, "protobuf-python": true, "proto-plus-python": true, "psqlparse": true, "psutil": true, "psycopg2": true, "pyasn1": true, "pyasn1-modules": true, "pycparser": true, "pycrypto": true, "pydantic": true, "pydateutil": true, "pygments": true, "pyjson5": true, "pyjwt": true, "pymysql": true, "pynacl": true, "pyodbc": true, "pyparsing": true, "pyrsistent": true, "py-serde": true, "pytables": true, "pytest-py": true, "python3-openid": true, "python-ecdsa": true, "python-email-validator": true, "python-fastjsonschema": true, "python-future": true, "python-graphviz": true, "python-hyperlink": true, "python-jose": true, "python-lz4": true, "python-markdown": true, "python-markdownify": true, "python-nameparser": true, "python-nvd3": true, "python-pathspec": true, "python-prompt-toolkit": true, "python-pypdf": true, "python-rison": true, "python-rsa": true, "python-tabulate": true, "pytz": true, "pyxdg": true, "pyyaml": true, "pyzmq": true, "redis-py": true, "requests": true, "retry": true, "rfc3967": true, "rich": true, "sacremoses": true, "scikit-learn": true, "scipy": true, "setuptools": true, "sigstore-python": true, "simplejson": true, "six": true, "smart_open": true, "soupsieve": true, "sqlalchemy_jsonfield": true, "sqlalchemy-utils": true, "sqlparse": true, "stack_data": true, "tensorflow-addons": false, "tinycss2": true, "toml": true, "tomlkit": true, "toolbelt": true, "toolz": true, "tqdm": true, "typing_extensions": true, "underscore": true, "uritemplate": true, "urlextract": true, "urllib3": true, "validators": true, "w3lib": true, "websocket-client": true, "wheel": true, "wtforms": true, "xlrd": true, "yarl": true, "zipp": true}
\ No newline at end of file

From 76c63ac68bf424ddacc564f65110f769962afb99 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Thu, 24 Jul 2025 10:15:39 +0000
Subject: [PATCH 043/134] Fatal error in main program: cannot unpack
 non-iterable NoneType object

---
 fuzz/run_fuzz_all_targets_input.py | 234 ++++++++++++++++-------------
 1 file changed, 132 insertions(+), 102 deletions(-)

diff --git a/fuzz/run_fuzz_all_targets_input.py b/fuzz/run_fuzz_all_targets_input.py
index 631f3e6..5c16d8d 100644
--- a/fuzz/run_fuzz_all_targets_input.py
+++ b/fuzz/run_fuzz_all_targets_input.py
@@ -34,33 +34,39 @@
 from pathlib import Path
 from multiprocessing import Pool, cpu_count
 from returns.maybe import Maybe, Nothing, Some
-from typing import Optional, List, Tuple
 
 # --- Global configuration ---
 HOME_DIR = Path.home()
 OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz"
-LOG_DIR = OSS_FUZZ_DIR / "run_logs2"
+LOG_DIR = OSS_FUZZ_DIR / "run_logs3"
 INPUT_DIR = OSS_FUZZ_DIR / "fuzz_inputs"  # Directory to store captured inputs
 
 class FunctionInstrumenter(ast.NodeTransformer):
     """AST transformer to instrument function entries for input recording"""
+    def __init__(self, target_functions: list[str]):
+        self.target_functions = target_functions
+        super().__init__()
+    
     def visit_FunctionDef(self, node):
         """Instrument function definition to add input recording"""
-        # Add print statement at the beginning of the function
-        input_record_stmt = ast.Expr(
-            value=ast.Call(
-                func=ast.Name(id='print', ctx=ast.Load()),
-                args=[ast.Constant(value=f"INPUT_CAPTURE: {node.name} received input: {{data}}")],
-                keywords=[]
+        # 只对目标函数进行插桩
+        if node.name in self.target_functions:
+            # Add print statement at the beginning of the function
+            input_record_stmt = ast.Expr(
+                value=ast.Call(
+                    func=ast.Name(id='print', ctx=ast.Load()),
+                    args=[ast.Constant(value=f"INPUT_CAPTURE: {node.name} received input: {{data}}")],
+                    keywords=[]
+                )
             )
-        )
-        
-        # Insert the print statement at the top of the function body
-        if node.body:
-            node.body.insert(0, input_record_stmt)
+            
+            # Insert the print statement at the top of the function body
+            if node.body:
+                node.body.insert(0, input_record_stmt)
         
         return node
 
+
 def instrument_code(source_code: str, target_function: str) -> str:
     """
     Instrument source code to record inputs for specific function
@@ -91,14 +97,14 @@ def instrument_code(source_code: str, target_function: str) -> str:
 
 def prepare_target_for_input_capture(project_name: str, target_name: str) -> Path:
     """
-    Prepare a fuzz target for input capture by instrumenting its code
+    Prepare a Python fuzz target for input capture by instrumenting its code
     
     Args:
         project_name: Name of the project
         target_name: Name of the target to instrument
         
     Returns:
-        Path to the instrumented target executable
+        Path to the instrumented Python script
     """
     try:
         # Create project-specific input directory
@@ -111,50 +117,49 @@ def prepare_target_for_input_capture(project_name: str, target_name: str) -> Pat
         
         logging.info(f"📁 Created input directory: {target_input_dir}")
         
-        # Original target path
-        original_target = OSS_FUZZ_DIR / "build" / "out" / project_name / target_name
+        # Locate Python source file (support multiple extensions)
+        possible_extensions = [".py", ".pyw"]
+        source_file = None
         
-        # Backup original target
-        backup_target = original_target.with_name(f"{target_name}_original")
-        if not backup_target.exists():
-            shutil.copy2(original_target, backup_target)
+        # Try possible file extensions
+        for ext in possible_extensions:
+            candidate = OSS_FUZZ_DIR / "projects" / project_name / f"{target_name}{ext}"
+            if candidate.exists():
+                source_file = candidate
+                break
         
-        # Read target source code (simplified for demonstration)
-        # In a real implementation, we'd need to locate the actual source files
-        # This is a placeholder to demonstrate the instrumentation concept
-        source_file = OSS_FUZZ_DIR / "projects" / project_name / "fuzzers" / f"{target_name}.c"
+        if not source_file:
+            logging.warning(f"⚠️ Python source file not found for: {target_name}")
+            return None
+        
+        # Backup original source file
+        backup_file = source_file.with_name(f"{target_name}_original{source_file.suffix}")
+        if not backup_file.exists():
+            shutil.copy2(source_file, backup_file)
+            logging.info(f"💾 Backed up original file: {backup_file}")
+        
+        # Read source code
+        with open(source_file, "r") as f:
+            source_code = f.read()
+        
+        # Instrument the code - use Python-specific entry function
+        possible_entry_functions = ["TestInput", "TestOneInput"]
+        instrumented_code = instrument_code(source_code, possible_entry_functions)
+
+        
+        # Write instrumented code to a new file with same extension
+        instrumented_file = source_file.with_name(f"{target_name}_instrumented{source_file.suffix}")
+        with open(instrumented_file, "w") as f:
+            f.write(instrumented_code)
+        
+        logging.info(f"🔧 Instrumented {target_name} for input capture")
+        
+        # Python doesn't need rebuilding - return instrumented script path
+        return instrumented_file
         
-        if source_file.exists():
-            with open(source_file, "r") as f:
-                source_code = f.read()
-            
-            # Instrument the code
-            instrumented_code = instrument_code(source_code, "LLVMFuzzerTestOneInput")
-            
-            # Write instrumented code to a new file
-            instrumented_file = source_file.with_name(f"{target_name}_instrumented.c")
-            with open(instrumented_file, "w") as f:
-                f.write(instrumented_code)
-            
-            logging.info(f"🔧 Instrumented {target_name} for input capture")
-            
-            # Rebuild the target with instrumented code
-            # This step is simplified - in reality would use OSS-Fuzz build system
-            rebuild_cmd = f"python3 infra/helper.py build_fuzzers --sanitizer address {project_name}"
-            run_command(
-                rebuild_cmd,
-                f"Rebuilding {target_name} with instrumentation",
-                logging.getLogger("Main"),
-                timeout=1200
-            )
-            
-            return original_target
-        else:
-            logging.warning(f"⚠️ Source file not found for instrumentation: {source_file}")
-            return original_target
     except Exception as e:
         logging.error(f"❌ Failed to instrument {target_name}: {str(e)}")
-        return OSS_FUZZ_DIR / "build" / "out" / project_name / target_name
+        return None
 
 def run_command(
     cmd: str,
@@ -162,7 +167,7 @@ def run_command(
     logger: logging.Logger,
     allowed_exit_codes: Maybe[list[int]] = Nothing,
     timeout: int = 3600,  # Default 1-hour timeout
-    env: Optional[dict] = None  # Added env parameter for input capture
+    env: Maybe[dict] = Nothing  # Use Maybe instead of Optional
 ) -> bool:
     """Execute commands with real-time logging, precise error handling, and input capture"""
     allowed_codes = allowed_exit_codes.value_or([])
@@ -171,6 +176,9 @@ def run_command(
 
     process = None
     try:
+        # Convert Maybe[dict] to actual environment or None
+        env_dict = env.value_or(None)
+        
         process = subprocess.Popen(
             cmd,
             shell=True,
@@ -179,7 +187,7 @@ def run_command(
             text=True,
             encoding="utf-8",
             errors="replace",
-            env=env  # Pass environment variables
+            env=env_dict
         )
 
         start_time = time.time()
@@ -233,7 +241,7 @@ def run_command(
 def discover_targets(project_name: str, logger: logging.Logger) -> list[str]:
     """Discover fuzz targets for a project (starting with 'fuzz_', no extension, and executable)"""
     out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name
-    targets: list[str] = []
+    targets = []  # Use built-in list type
 
     if not out_dir.is_dir():
         logger.warning(f"Build output directory for project {project_name} does not exist: {out_dir}")
@@ -280,56 +288,77 @@ def run_single_target(project_name: str, target_name: str, timeout: int, record_
         print(f"❌ Critical error occurred during initialization of task {task_id}: {e}")
         return False, project_name, target_name
 
-    logger.info(f"🚀 Starting test -> Project: {project_name}, Target: {target_name}")
+        logger.info(f"🚀 Starting test -> Project: {project_name}, Target: {target_name}")
     
-    # Prepare environment for input capture if requested
-    env = None
-    if record_inputs:
-        # Prepare target for input capture
-        target_path = prepare_target_for_input_capture(project_name, target_name)
-        logger.info(f"🔧 Instrumented {target_name} for input capture")
-        
-        # Create input directory for this run
-        input_dir = INPUT_DIR / project_name / target_name / timestamp
-        input_dir.mkdir(parents=True, exist_ok=True)
+     # Prepare environment for input capture if requested
+        env = Nothing  # Initialize as Maybe container
+        instrumented_file = None
         
-        # Set environment variable for input storage
-        env = os.environ.copy()
-        env["FUZZ_INPUT_DIR"] = str(input_dir)
-        logger.info(f"📁 Inputs will be stored in: {input_dir}")
-
-    try:
-        cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}"
-        success = run_command(
-            cmd,
-            f"Running Target '{target_name}' (timeout={timeout}s)",
-            logger,
-            allowed_exit_codes=Some([1, 124]),  # 1=Crashes found, 124=Timeout
-            timeout=timeout + 300,
-            env=env  # Pass environment for input capture
-        )
-
-        if success:
-            logger.info(f"✅ Target '{target_name}' completed successfully.")
+        if record_inputs:
+        # 准备输入捕获
+            instrumented_file = prepare_target_for_input_capture(project_name, target_name)
+            if not instrumented_file:
+                logger.error(f"❌ Failed to instrument {target_name}")
+                return False, project_name, target_name
+                
+            logger.info(f"🔧 Instrumented {target_name} for input capture")
+            
+            # 创建输入目录
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            input_dir = INPUT_DIR / project_name / target_name / timestamp
+            input_dir.mkdir(parents=True, exist_ok=True)
+            
+            # 设置环境变量
+            env_dict = os.environ.copy()
+            env_dict["FUZZ_INPUT_DIR"] = str(input_dir)
+            env = Some(env_dict)
+            logger.info(f"📁 Inputs will be stored in: {input_dir}")
+            
+            # 关键修改：使用插桩后的脚本运行
+            cmd = f"python3 {instrumented_file} -- -max_total_time={timeout}"
         else:
-            logger.error(f"❌ Target '{target_name}' failed.")
+            # 使用原始目标
+            cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}"
+        try:
+            # Use instrumented file if available, otherwise use original
+            target_to_run = instrumented_file.name if instrumented_file else target_name
+            
+            cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_to_run} -- -max_total_time={timeout}"
+            success = run_command(
+                cmd,
+                f"Running Target '{target_name}' (timeout={timeout}s)",
+                logger,
+                allowed_exit_codes=Some([1, 124]),  # 1=Crashes found, 124=Timeout
+                timeout=timeout + 300,
+                env=env  # Pass Maybe container
+            )
 
-        return success, project_name, target_name
+            if success:
+                logger.info(f"✅ Target '{target_name}' completed successfully.")
+            else:
+                logger.error(f"❌ Target '{target_name}' failed.")
 
-    except Exception as e:
-        logger.exception(f"💥 Unexpected error occurred while running target '{target_name}': {e}")
-        return False, project_name, target_name
-    finally:
-        for handler in logger.handlers[:]:
-            handler.close()
-            logger.removeHandler(handler)
+            return success, project_name, target_name
 
-        # Log input capture results
-        if record_inputs and env:
-            input_dir = Path(env["FUZZ_INPUT_DIR"])
-            if input_dir.exists():
-                input_count = len(list(input_dir.glob("*.bin")))
-                logger.info(f"📥 Captured {input_count} inputs for {target_name}")
+        except Exception as e:
+            logger.exception(f"💥 Unexpected error occurred while running target '{target_name}': {e}")
+            return False, project_name, target_name
+        finally:
+            for handler in logger.handlers[:]:
+                handler.close()
+                logger.removeHandler(handler)
+
+            # Log input capture results
+            if record_inputs and env.is_just:
+                input_dir = Path(env.unwrap()["FUZZ_INPUT_DIR"])
+                if input_dir.exists():
+                    # Count all input files, not just .bin
+                    input_count = len(list(input_dir.glob("*")))
+                    logger.info(f"📥 Captured {input_count} inputs for {target_name}")
+    
+    except (OSError, PermissionError) as e:
+        print(f"❌ Critical error occurred during initialization of task {task_id}: {e}")
+        return False, project_name, target_name
 
 def main():
     # Configure main process logging
@@ -363,7 +392,8 @@ def main():
 
     # 2. Discovery phase: Collect all fuzz targets
     logger.info("\n" + "=" * 20 + " Phase 1: Discover all Fuzz Targets " + "=" * 20)
-    all_fuzz_tasks: list[tuple[str, str]] = []  # Store (project, target) tuples
+    all_fuzz_tasks = []  # Use built-in list type
+    
     try:
         original_cwd = Path.cwd()  # Save current working directory
         os.chdir(OSS_FUZZ_DIR)     # Switch to OSS-Fuzz directory

From 1c608dc0719b0795ca696646b4e59b5e08464f91 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Thu, 24 Jul 2025 22:14:04 +0000
Subject: [PATCH 044/134] name 'target_functions' is not defined
 fuzz_util_instrumented.py does not seem to exist

---
 fuzz/run_fuzz_all_targets_input.py | 99 ++++++++++++++----------------
 1 file changed, 45 insertions(+), 54 deletions(-)

diff --git a/fuzz/run_fuzz_all_targets_input.py b/fuzz/run_fuzz_all_targets_input.py
index 5c16d8d..4d38ffb 100644
--- a/fuzz/run_fuzz_all_targets_input.py
+++ b/fuzz/run_fuzz_all_targets_input.py
@@ -67,7 +67,7 @@ def visit_FunctionDef(self, node):
         return node
 
 
-def instrument_code(source_code: str, target_function: str) -> str:
+def instrument_code(source_code: str, target_function: list[str]) -> str:
     """
     Instrument source code to record inputs for specific function
     
@@ -83,7 +83,7 @@ def instrument_code(source_code: str, target_function: str) -> str:
         tree = ast.parse(source_code)
         
         # Create instrumenter and apply transformations
-        instrumenter = FunctionInstrumenter()
+        instrumenter = FunctionInstrumenter(target_functions)
         modified_tree = instrumenter.visit(tree)
         
         # Add missing location information for generated nodes
@@ -284,81 +284,72 @@ def run_single_target(project_name: str, target_name: str, timeout: int, record_
         logger.addHandler(file_handler)
         os.chdir(OSS_FUZZ_DIR)
 
-    except (OSError, PermissionError) as e:
-        print(f"❌ Critical error occurred during initialization of task {task_id}: {e}")
-        return False, project_name, target_name
-
         logger.info(f"🚀 Starting test -> Project: {project_name}, Target: {target_name}")
-    
-     # Prepare environment for input capture if requested
-        env = Nothing  # Initialize as Maybe container
+
+        # Prepare environment for input capture if requested
+        env = Nothing
         instrumented_file = None
-        
+
         if record_inputs:
-        # 准备输入捕获
             instrumented_file = prepare_target_for_input_capture(project_name, target_name)
             if not instrumented_file:
                 logger.error(f"❌ Failed to instrument {target_name}")
                 return False, project_name, target_name
-                
+
             logger.info(f"🔧 Instrumented {target_name} for input capture")
-            
-            # 创建输入目录
+
             timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
             input_dir = INPUT_DIR / project_name / target_name / timestamp
             input_dir.mkdir(parents=True, exist_ok=True)
-            
-            # 设置环境变量
+
             env_dict = os.environ.copy()
             env_dict["FUZZ_INPUT_DIR"] = str(input_dir)
             env = Some(env_dict)
+
             logger.info(f"📁 Inputs will be stored in: {input_dir}")
-            
-            # 关键修改：使用插桩后的脚本运行
-            cmd = f"python3 {instrumented_file} -- -max_total_time={timeout}"
+
+        # Prepare command
+        target_to_run = instrumented_file.name if instrumented_file else target_name
+        cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_to_run} -- -max_total_time={timeout}"
+
+        # Execute command
+        success = run_command(
+            cmd,
+            f"Running Target '{target_name}' (timeout={timeout}s)",
+            logger,
+            allowed_exit_codes=Some([1, 124]),
+            timeout=timeout + 300,
+            env=env
+        )
+
+        if success:
+            logger.info(f"✅ Target '{target_name}' completed successfully.")
         else:
-            # 使用原始目标
-            cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}"
-        try:
-            # Use instrumented file if available, otherwise use original
-            target_to_run = instrumented_file.name if instrumented_file else target_name
-            
-            cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_to_run} -- -max_total_time={timeout}"
-            success = run_command(
-                cmd,
-                f"Running Target '{target_name}' (timeout={timeout}s)",
-                logger,
-                allowed_exit_codes=Some([1, 124]),  # 1=Crashes found, 124=Timeout
-                timeout=timeout + 300,
-                env=env  # Pass Maybe container
-            )
+            logger.error(f"❌ Target '{target_name}' failed.")
 
-            if success:
-                logger.info(f"✅ Target '{target_name}' completed successfully.")
-            else:
-                logger.error(f"❌ Target '{target_name}' failed.")
+        return success, project_name, target_name
 
-            return success, project_name, target_name
+    except Exception as e:
+        logger.exception(f"💥 Unexpected error occurred while running target '{target_name}': {e}")
+        return False, project_name, target_name
 
-        except Exception as e:
-            logger.exception(f"💥 Unexpected error occurred while running target '{target_name}': {e}")
-            return False, project_name, target_name
-        finally:
-            for handler in logger.handlers[:]:
-                handler.close()
-                logger.removeHandler(handler)
-
-            # Log input capture results
-            if record_inputs and env.is_just:
+    finally:
+        for handler in logger.handlers[:]:
+            handler.close()
+            logger.removeHandler(handler)
+
+        if record_inputs and env.is_just:
+            try:
                 input_dir = Path(env.unwrap()["FUZZ_INPUT_DIR"])
                 if input_dir.exists():
-                    # Count all input files, not just .bin
                     input_count = len(list(input_dir.glob("*")))
                     logger.info(f"📥 Captured {input_count} inputs for {target_name}")
-    
-    except (OSError, PermissionError) as e:
-        print(f"❌ Critical error occurred during initialization of task {task_id}: {e}")
-        return False, project_name, target_name
+            except Exception as e:
+                logger.warning(f"⚠️ Failed to summarize captured inputs: {e}")
+
+    # Final fallback (defensive)
+    return False, project_name, target_name
+
 
 def main():
     # Configure main process logging

From 34ef83304571b9b12326f539870d33dfae67c7e4 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Thu, 24 Jul 2025 22:20:09 +0000
Subject: [PATCH 045/134] =?UTF-8?q?=E5=87=86=E5=A4=87=E5=A4=A7=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 fuzz/run_fuzz_ds.py | 425 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 425 insertions(+)
 create mode 100644 fuzz/run_fuzz_ds.py

diff --git a/fuzz/run_fuzz_ds.py b/fuzz/run_fuzz_ds.py
new file mode 100644
index 0000000..b0c4c59
--- /dev/null
+++ b/fuzz/run_fuzz_ds.py
@@ -0,0 +1,425 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+run_fuzz_all_targets_input.py
+
+Enhanced with input instrumentation to capture fuzzing inputs.
+
+This script employs a two-phase approach for fuzz testing:
+1. Discovery phase: Traverse all specified projects to collect all executable fuzz targets in each project
+2. Execution phase: Create a task pool containing all (project, target) pairs and execute them in parallel using multiprocessing
+3. Input capture: Instrument fuzz targets to record all inputs during fuzzing
+
+Key Enhancements:
+- Added input instrumentation to capture fuzzing inputs
+- Created dedicated input storage directory structure
+- Added AST-based function instrumentation
+- Added input recording and analysis capabilities
+
+Usage: python3 run_fuzz_all_targets_input [project_list_file] [--timeout seconds] [--workers N] [--record-inputs]
+Example: python3 fuzz/run_fuzz_all_targets_input.py data/valid_projects.txt --timeout 60 --workers 4 --record-inputs
+"""
+
+import os
+import sys
+import subprocess
+import argparse
+import logging
+import time
+import ast
+import astor
+import shutil
+from datetime import datetime
+from pathlib import Path
+from multiprocessing import Pool, cpu_count
+from returns.maybe import Maybe, Nothing, Some
+
+# --- Global configuration ---
+HOME_DIR = Path.home()
+OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz"
+LOG_DIR = OSS_FUZZ_DIR / "run_logs3"
+INPUT_DIR = OSS_FUZZ_DIR / "fuzz_inputs"  # Directory to store captured inputs
+
+class FunctionInstrumenter(ast.NodeTransformer):
+    def __init__(self, target_functions: list[str]):
+        self.target_functions = target_functions
+
+    def visit_FunctionDef(self, node):
+        if node.name in self.target_functions:
+            print_stmt = ast.parse(f'print("INPUT_CAPTURE: {node.name} called")').body[0]
+            node.body.insert(0, print_stmt)
+        return self.generic_visit(node)
+
+def instrument_code(source_code: str, target_functions: list[str]) -> str:
+    tree = ast.parse(source_code)
+    instrumenter = FunctionInstrumenter(target_functions)
+    instrumented_tree = instrumenter.visit(tree)
+    ast.fix_missing_locations(instrumented_tree)
+    return ast.unparse(instrumented_tree)
+def prepare_target_for_input_capture(project_name: str, target_name: str) -> Path:
+    """
+    Prepare a Python fuzz target for input capture by instrumenting its code
+    
+    Args:
+        project_name: Name of the project
+        target_name: Name of the target to instrument
+        
+    Returns:
+        Path to the instrumented Python script
+    """
+    try:
+        # Create project-specific input directory
+        project_input_dir = INPUT_DIR / project_name
+        project_input_dir.mkdir(parents=True, exist_ok=True)
+        
+        # Create target-specific input directory
+        target_input_dir = project_input_dir / target_name
+        target_input_dir.mkdir(exist_ok=True)
+        
+        logging.info(f"📁 Created input directory: {target_input_dir}")
+        
+        # Locate Python source file (support multiple extensions)
+        possible_extensions = [".py", ".pyw"]
+        source_file = None
+        
+        # Try possible file extensions
+        for ext in possible_extensions:
+            candidate = OSS_FUZZ_DIR / "projects" / project_name / f"{target_name}{ext}"
+            if candidate.exists():
+                source_file = candidate
+                break
+        
+        if not source_file:
+            logging.warning(f"⚠️ Python source file not found for: {target_name}")
+            return None
+        
+        # Backup original source file
+        backup_file = source_file.with_name(f"{target_name}_original{source_file.suffix}")
+        if not backup_file.exists():
+            shutil.copy2(source_file, backup_file)
+            logging.info(f"💾 Backed up original file: {backup_file}")
+        
+        # Read source code
+        with open(source_file, "r") as f:
+            source_code = f.read()
+        
+        # Instrument the code - use Python-specific entry function
+        possible_entry_functions = ["TestInput", "TestOneInput"]
+        instrumented_code = instrument_code(source_code, possible_entry_functions)
+
+        
+        # Write instrumented code to a new file with same extension
+        instrumented_file = source_file.with_name(f"{target_name}_instrumented{source_file.suffix}")
+        with open(instrumented_file, "w") as f:
+            f.write(instrumented_code)
+        
+        logging.info(f"🔧 Instrumented {target_name} for input capture")
+        
+        # Python doesn't need rebuilding - return instrumented script path
+        return instrumented_file
+        
+    except Exception as e:
+        logging.error(f"❌ Failed to instrument {target_name}: {str(e)}")
+        return None
+
+def run_command(
+    cmd: str,
+    log_msg: str,
+    logger: logging.Logger,
+    allowed_exit_codes: Maybe[list[int]] = Nothing,
+    timeout: int = 3600,  # Default 1-hour timeout
+    env: Maybe[dict] = Nothing  # Use Maybe instead of Optional
+) -> bool:
+    """Execute commands with real-time logging, precise error handling, and input capture"""
+    allowed_codes = allowed_exit_codes.value_or([])
+    logger.info(f"▶️ {log_msg}...")
+    logger.debug(f"   $ {cmd}")
+
+    process = None
+    try:
+        # Convert Maybe[dict] to actual environment or None
+        env_dict = env.value_or(None)
+        
+        process = subprocess.Popen(
+            cmd,
+            shell=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True,
+            encoding="utf-8",
+            errors="replace",
+            env=env_dict
+        )
+
+        start_time = time.time()
+        while process.poll() is None:
+            if time.time() - start_time > timeout:
+                logger.error(f"⌛ Command timed out after {timeout} seconds")
+                process.terminate()
+                try:
+                    process.wait(timeout=5)
+                except subprocess.TimeoutExpired:
+                    process.kill()
+                return False
+
+            if process.stdout:
+                line = process.stdout.readline()
+                if line:
+                    # Capture input data when detected
+                    if "INPUT_CAPTURE:" in line:
+                        logger.debug(f"📥 {line.strip()}")
+                    else:
+                        logger.debug(line.strip())
+            else:
+                time.sleep(0.1)
+
+        exit_code = process.returncode
+        if exit_code not in [0, *allowed_codes]:
+            logger.error(f"❌ Command execution failed, exit code: {exit_code}")
+            return False
+        return True
+
+    except FileNotFoundError:
+        logger.error(f"🔍 Command not found: {cmd.split()[0]}")
+        return False
+    except PermissionError:
+        logger.error(f"🔒 Insufficient permissions to execute command: {cmd}")
+        return False
+    except subprocess.SubprocessError as e:
+        logger.exception(f"💥 Subprocess error: {e}")
+        return False
+    except OSError as e:
+        logger.exception(f"💥 Operating system error during command execution: {e}")
+        return False
+    finally:
+        if process and process.poll() is None:
+            try:
+                process.terminate()
+                process.wait(timeout=5)
+            except Exception:
+                pass
+
+def discover_targets(project_name: str, logger: logging.Logger) -> list[str]:
+    """Discover fuzz targets for a project (starting with 'fuzz_', no extension, and executable)"""
+    out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name
+    targets = []  # Use built-in list type
+
+    if not out_dir.is_dir():
+        logger.warning(f"Build output directory for project {project_name} does not exist: {out_dir}")
+        return targets
+
+    try:
+        for f in out_dir.iterdir():
+            try:
+                if (f.is_file() and
+                        f.name.startswith("fuzz_") and
+                        '.' not in f.name and
+                        os.access(f, os.X_OK)):
+                    targets.append(f.name)
+            except OSError as e:
+                logger.warning(f"⚠️ Error while checking file {f.name}, skipped: {e}")
+
+    except PermissionError:
+        logger.error(f"🔒 Insufficient permissions to access directory: {out_dir}")
+    except OSError as e:
+        logger.exception(f"💥 Operating system error occurred while discovering targets: {e}")
+
+    return targets
+
+def run_single_target(project_name: str, target_name: str, timeout: int, record_inputs: bool = False) -> tuple[bool, str, str]:
+    """Execute fuzz testing workflow for a single (project, target) pair with input capture"""
+    task_id = f"{project_name}_{target_name}"
+    logger = logging.getLogger(task_id)
+
+    try:
+        logger.setLevel(logging.DEBUG)
+        LOG_DIR.mkdir(parents=True, exist_ok=True)
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        log_file = LOG_DIR / f"run_{task_id}_{timestamp}.log"
+        file_handler = logging.FileHandler(log_file, encoding="utf-8")
+        formatter = logging.Formatter(
+            "%(asctime)s [PID:%(process)d] %(levelname)s: %(message)s",
+            datefmt="%Y-%m-%d %H:%M:%S"
+        )
+        file_handler.setFormatter(formatter)
+        logger.addHandler(file_handler)
+        os.chdir(OSS_FUZZ_DIR)
+
+        logger.info(f"🚀 Starting test -> Project: {project_name}, Target: {target_name}")
+
+        # Prepare environment for input capture if requested
+        env = Nothing
+        instrumented_file = None
+
+        if record_inputs:
+            instrumented_file = prepare_target_for_input_capture(project_name, target_name)
+            if not instrumented_file:
+                logger.error(f"❌ Failed to instrument {target_name}")
+                return False, project_name, target_name
+
+            logger.info(f"🔧 Instrumented {target_name} for input capture")
+
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            input_dir = INPUT_DIR / project_name / target_name / timestamp
+            input_dir.mkdir(parents=True, exist_ok=True)
+
+            env_dict = os.environ.copy()
+            env_dict["FUZZ_INPUT_DIR"] = str(input_dir)
+            env = Some(env_dict)
+
+            logger.info(f"📁 Inputs will be stored in: {input_dir}")
+
+        # Prepare command
+        target_to_run = instrumented_file.name if instrumented_file else target_name
+        cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_to_run} -- -max_total_time={timeout}"
+
+        # Execute command
+        success = run_command(
+            cmd,
+            f"Running Target '{target_name}' (timeout={timeout}s)",
+            logger,
+            allowed_exit_codes=Some([1, 124]),
+            timeout=timeout + 300,
+            env=env
+        )
+
+        if success:
+            logger.info(f"✅ Target '{target_name}' completed successfully.")
+        else:
+            logger.error(f"❌ Target '{target_name}' failed.")
+
+        return success, project_name, target_name
+
+    except Exception as e:
+        logger.exception(f"💥 Unexpected error occurred while running target '{target_name}': {e}")
+        return False, project_name, target_name
+
+    finally:
+        for handler in logger.handlers[:]:
+            handler.close()
+            logger.removeHandler(handler)
+
+        if record_inputs and env.is_just:
+            try:
+                input_dir = Path(env.unwrap()["FUZZ_INPUT_DIR"])
+                if input_dir.exists():
+                    input_count = len(list(input_dir.glob("*")))
+                    logger.info(f"📥 Captured {input_count} inputs for {target_name}")
+            except Exception as e:
+                logger.warning(f"⚠️ Failed to summarize captured inputs: {e}")
+
+    # Final fallback (defensive)
+    return False, project_name, target_name
+
+
+def main():
+    # Configure main process logging
+    logging.basicConfig(
+        level=logging.INFO,
+        format="[%(levelname)s] %(message)s",
+        stream=sys.stdout
+    )
+    logger = logging.getLogger("Main")
+
+    # Set up command line argument parsing
+    parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Fuzz Testing Tool with Input Capture")
+    parser.add_argument("project_list", help="File path containing list of project names")
+    parser.add_argument("--timeout", type=int, default=60, help="Timeout duration per Fuzz Target (seconds)")
+    parser.add_argument("--workers", type=int, default=cpu_count(), help="Number of parallel worker processes")
+    parser.add_argument("--record-inputs", action="store_true", help="Enable input capture during fuzzing")
+    args = parser.parse_args()
+
+    # 1. Read project list file
+    try:
+        project_path = Path(args.project_list)
+        with open(project_path, "r", encoding="utf-8") as f:
+            projects = [line.strip() for line in f if line.strip()]
+        logger.info(f"📋 Loaded {len(projects)} projects from {project_path.name}.")
+    except FileNotFoundError:
+        logger.error(f"❌ Project list file not found: {args.project_list}")
+        sys.exit(1)
+    except (OSError, PermissionError) as e:
+        logger.exception(f"💥 Error occurred while reading project list: {e}")
+        sys.exit(1)
+
+    # 2. Discovery phase: Collect all fuzz targets
+    logger.info("\n" + "=" * 20 + " Phase 1: Discover all Fuzz Targets " + "=" * 20)
+    all_fuzz_tasks = []  # Use built-in list type
+    
+    try:
+        original_cwd = Path.cwd()  # Save current working directory
+        os.chdir(OSS_FUZZ_DIR)     # Switch to OSS-Fuzz directory
+        for project_name in projects:
+            targets = discover_targets(project_name, logger)
+            if targets:
+                logger.info(f"🔍 Discovered {len(targets)} targets in project '{project_name}': {', '.join(targets)}")
+                for target in targets:
+                    all_fuzz_tasks.append((project_name, target))
+            else:
+                logger.warning(f"⚠️ No Fuzz Targets found in project '{project_name}'.")
+        os.chdir(original_cwd)  # Restore original working directory
+    except FileNotFoundError:
+        logger.error(f"❌ OSS-Fuzz directory does not exist: {OSS_FUZZ_DIR}")
+        sys.exit(1)
+    except Exception as e:
+        logger.exception(f"💥 Unknown error occurred during discovery phase: {e}")
+        sys.exit(1)
+
+    # Check if any valid targets were found
+    if not all_fuzz_tasks:
+        logger.info("🤷 No executable Fuzz Targets found. Program exits.")
+        sys.exit(0)
+
+    # 3. Execution phase: Parallel fuzz testing with input capture
+    logger.info(f"\n✅ Discovery phase completed. Found a total of {len(all_fuzz_tasks)} fuzz tasks.")
+    if args.record_inputs:
+        logger.info("🔔 Input capture is ENABLED. All fuzzing inputs will be recorded.")
+        logger.info(f"📁 Inputs will be stored in: {INPUT_DIR}")
+    
+    logger.info("=" * 20 + " Phase 2: Parallel Fuzzing Execution " + "=" * 23)
+    logger.info(f"🚀 Starting parallel testing with {args.workers} worker processes (timeout per target: {args.timeout}s)...")
+
+    # Prepare task parameters (project, target, timeout, record_inputs)
+    tasks_with_args = [(p, t, args.timeout, args.record_inputs) for p, t in all_fuzz_tasks]
+    results: list[tuple[bool, str, str]] = []  # Store results (success, project, target)
+    
+    # Execute in parallel using process pool
+    with Pool(args.workers) as pool:
+        try:
+            results = pool.starmap(run_single_target, tasks_with_args)
+        except Exception as e:
+            logger.error(f"💥 Critical error occurred during parallel execution: {e}")
+            pool.terminate()
+            pool.join()
+
+    # 4. Result summary and reporting
+    logger.info("\n" + "=" * 20 + " Phase 3: Result Summary " + "=" * 28)
+    failed_tasks = [(p, t) for success, p, t in results if not success]  # List of failed tasks
+    total_tasks = len(all_fuzz_tasks)
+    failed_count = len(failed_tasks)
+    success_count = total_tasks - failed_count
+
+    # Output statistical summary
+    logger.info(f"📊 Fuzzing completed: Success {success_count}/{total_tasks}, Failed {failed_count}/{total_tasks}")
+    
+    if args.record_inputs:
+        total_inputs = sum(len(list((INPUT_DIR / p / t).glob("*/*.bin"))) for p, t in all_fuzz_tasks)
+        logger.info(f"📥 Total inputs captured: {total_inputs}")
+        logger.info(f"💾 Inputs stored at: {INPUT_DIR}")
+    
+    if failed_tasks:
+        logger.error("❌ The following Fuzz Targets failed:")
+        for project, target in failed_tasks:
+            logger.error(f"  - Project: {project}, Target: {target}")  # List detailed failures
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except KeyboardInterrupt:
+        print("\n🛑 Operation interrupted by user.")
+        sys.exit(1)
+    except Exception as e:
+        print(f"\n💥 Fatal error in main program: {e}")
+        sys.exit(1)
\ No newline at end of file

From 510cbe7958cbc4d9a7afee9f325cb29032113483 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Fri, 25 Jul 2025 00:16:37 +0000
Subject: [PATCH 046/134] create modify file script  add"print(data)" to each
 fuzz_.py

---
 fuzz/modify_fuzz_files.py | 66 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 fuzz/modify_fuzz_files.py

diff --git a/fuzz/modify_fuzz_files.py b/fuzz/modify_fuzz_files.py
new file mode 100644
index 0000000..eb7b37e
--- /dev/null
+++ b/fuzz/modify_fuzz_files.py
@@ -0,0 +1,66 @@
+import os
+import re
+
+def add_print_to_testoneinput(file_path):
+    with open(file_path, 'r') as f:
+        content = f.read()
+
+    # 正则表达式匹配TestOneInput或TestInput函数定义及其函数体
+    pattern = r'(\bdef\s+(TestOneInput|TestInput)\(data\):\s*\n)((?:[ \t]+.*\n|\s*\n)*)'
+    matches = re.finditer(pattern, content, re.MULTILINE)
+
+    new_content = content
+    for match in reversed(list(matches)):
+        function_def = match.group(1)
+        function_body = match.group(3)
+        
+        # 在函数体开头添加print(data)语句
+        new_function_body = re.sub(
+            r'^([ \t]*)(.*\n)', 
+            r'\g<1>\2\g<1>print(data)\n', 
+            function_body, 
+            count=1
+        )
+        
+        # 只有在函数体非空且未添加过print时才替换
+        if new_function_body != function_body:
+            new_content = (
+                new_content[:match.start(3)] + 
+                new_function_body + 
+                new_content[match.end(3):]
+            )
+
+    return new_content
+
+def main():
+    projects_path = "/home/jiayiguo/FuzzAug/fuzz/oss-fuzz/projects"
+    valid_projects_file = "valid_projects.txt"
+
+    with open(valid_projects_file, 'r') as f:
+        projects = [line.strip() for line in f if line.strip()]
+
+    for project in projects:
+        project_dir = os.path.join(projects_path, project)
+        
+        if not os.path.isdir(project_dir):
+            continue
+
+        for root, _, files in os.walk(project_dir):
+            for file in files:
+                if file.startswith('fuzz_') and file.endswith('.py'):
+                    file_path = os.path.join(root, file)
+                    
+                    try:
+                        new_content = add_print_to_testoneinput(file_path)
+                        
+                        # 保存修改后的文件（添加_print后缀）
+                        new_file_path = file_path.rsplit('.', 1)[0] + '_print.py'
+                        with open(new_file_path, 'w') as f:
+                            f.write(new_content)
+                        print(f"Processed: {file_path} -> {new_file_path}")
+                            
+                    except Exception as e:
+                        print(f"Error processing {file_path}: {str(e)}")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file

From 61009ab0f91a57080a4e15c107619ab6bc173365 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Fri, 25 Jul 2025 00:29:27 +0000
Subject: [PATCH 047/134] build_fuzzer script

---
 fuzz/build_fuzzers.py | 246 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 246 insertions(+)
 create mode 100644 fuzz/build_fuzzers.py

diff --git a/fuzz/build_fuzzers.py b/fuzz/build_fuzzers.py
new file mode 100644
index 0000000..1dfd825
--- /dev/null
+++ b/fuzz/build_fuzzers.py
@@ -0,0 +1,246 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+build_fuzzers.py
+
+Parallel build of OSS-Fuzz fuzzers.
+Requires Docker images to be built first (using build_images.py).
+
+Usage: python3 build_fuzzers.py [project_list_file] --oss-fuzz-dir /path/to/oss-fuzz \
+    --image-results image_build_results.json \
+    [--sanitizer type] [--workers N]
+Example: python3 fuzz/build_fuzzers.py data/valid_projects.txt \
+    --oss-fuzz-dir ./fuzz/oss-fuzz \
+    --image-results image_build_results.json \
+    --sanitizer address \
+    --workers 8
+"""
+
+import os
+import sys
+import subprocess
+import argparse
+import logging
+import json
+from pathlib import Path
+from typing import List, Optional, Tuple
+from multiprocessing import Pool, cpu_count
+
+class BuildError(Exception):
+    """Base exception for build failures"""
+    def __init__(self, message: str, project: str = "", exit_code: int = None):
+        super().__init__(message)
+        self.project = project
+        self.exit_code = exit_code
+
+class CommandError(BuildError):
+    """Exception for command execution failures"""
+    pass
+
+class PathError(BuildError):
+    """Exception for missing paths or files"""
+    pass
+
+class ConfigError(BuildError):
+    """Exception for configuration errors"""
+    pass
+
+def run_command(
+    cmd: str,
+    oss_fuzz_dir: Path,
+    project: str = "",
+    allowed_exit_codes: Optional[List[int]] = None
+) -> int:
+    """Execute a command and return the exit code"""
+    allowed_exit_codes = allowed_exit_codes or [0]
+    logging.info(f"▶️ Executing command: {cmd}")
+    
+    try:
+        process = subprocess.Popen(
+            cmd,
+            shell=True,
+            cwd=str(oss_fuzz_dir),
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True
+        )
+        
+        stdout, stderr = process.communicate()
+        exit_code = process.returncode
+        
+        if exit_code in allowed_exit_codes:
+            return exit_code
+            
+        # 构建详细的错误信息
+        error_msg = f"Command failed (exit code: {exit_code})"
+        if project:
+            error_msg += f" for project: {project}"
+            
+        if stderr.strip():
+            error_msg += f"\nError output:\n{stderr.strip()}"
+            
+        if stdout.strip():
+            error_msg += f"\nOutput:\n{stdout.strip()}"
+            
+        raise CommandError(error_msg, project=project, exit_code=exit_code)
+    
+    except FileNotFoundError as e:
+        raise CommandError(f"Command not found: {cmd.split()[0]}", project=project) from e
+    except OSError as e:
+        raise CommandError(f"System error: {e}", project=project) from e
+    except subprocess.SubprocessError as e:
+        raise CommandError(f"Subprocess error: {e}", project=project) from e
+
+def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> Tuple[bool, str]:
+    """Fuzzer build workflow"""
+    try:
+        logging.info("=" * 60)
+        logging.info(f"🔧 Building fuzzers for: {project_name}")
+        logging.info(f"📁 OSS-Fuzz directory: {oss_fuzz_dir}")
+        logging.info("=" * 60)
+        
+        # Validate paths
+        helper_script = oss_fuzz_dir / "infra" / "helper.py"
+        if not helper_script.exists():
+            raise PathError(f"Missing helper script: {helper_script}", project=project_name)
+        
+        # Execute fuzzer build command
+        run_command(
+            f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}",
+            oss_fuzz_dir,
+            project=project_name
+        )
+        
+        logging.info(f"✅ Fuzzers built: {project_name}")
+        return (True, project_name)
+    
+    except BuildError as e:
+        logging.error(f"❌ Build failed: {project_name}")
+        logging.error(f"   Reason: {str(e)}")
+        return (False, project_name)
+    except Exception as e:
+        logging.error(f"🔥 Unhandled exception: {project_name}")
+        logging.exception(f"   Exception details: {e}")
+        return (False, project_name)
+
+def main():
+    parser = argparse.ArgumentParser(description="OSS-Fuzz Fuzzer Builder")
+    parser.add_argument("project_list", help="Project list file path")
+    parser.add_argument("--oss-fuzz-dir", required=True, type=str, 
+                        help="OSS-Fuzz directory path")
+    parser.add_argument("--sanitizer", default="address", 
+                        choices=["address", "memory", "undefined"],
+                        help="Fuzzer sanitizer type")
+    parser.add_argument("--workers", type=int, default=cpu_count(),
+                        help="Number of parallel worker processes")
+    parser.add_argument("--image-results", required=True,
+                        help="JSON file with image build results from build_images.py")
+    args = parser.parse_args()
+
+    logging.basicConfig(
+        level=logging.INFO,
+        format='[%(levelname)s] %(message)s'
+    )
+
+    # Process paths
+    oss_fuzz_dir = Path(args.oss_fuzz_dir).resolve()
+    logging.info(f"📁 Using OSS-Fuzz directory: {oss_fuzz_dir}")
+
+    # Read project list
+    try:
+        project_file = Path(args.project_list)
+        if not project_file.exists():
+            raise FileNotFoundError(f"Project list file not found: {project_file}")
+            
+        with open(project_file, "r", encoding="utf-8") as f:
+            all_projects = [line.strip() for line in f if line.strip()]
+            
+        if not all_projects:
+            raise ConfigError("Project list is empty")
+            
+        logging.info(f"📋 Loaded {len(all_projects)} projects")
+    except Exception as e:
+        logging.error(f"❌ Failed to read project list: {e}")
+        sys.exit(1)
+
+    # Load image build results
+    try:
+        image_results_file = Path(args.image_results)
+        if not image_results_file.exists():
+            raise FileNotFoundError(f"Image results file not found: {image_results_file}")
+            
+        with open(image_results_file, "r") as f:
+            image_results = json.load(f)
+            
+        if not isinstance(image_results, dict):
+            raise ConfigError("Image results should be a JSON object")
+            
+        logging.info(f"📋 Loaded image build results: {args.image_results}")
+    except json.JSONDecodeError as e:
+        logging.error(f"❌ Failed to parse image build results: {e}")
+        sys.exit(1)
+    except Exception as e:
+        logging.error(f"❌ Failed to load image build results: {e}")
+        sys.exit(1)
+
+    # Filter projects with successful image builds
+    projects_to_build = [p for p in all_projects if p in image_results and image_results[p]]
+    image_failures = [p for p in all_projects if p not in image_results or not image_results[p]]
+    
+    if not projects_to_build:
+        logging.error("❌ No projects with successful image builds")
+        if image_failures:
+            logging.error(f"   Projects with image build failures: {', '.join(image_failures[:10])}{'...' if len(image_failures) > 10 else ''}")
+        sys.exit(1)
+        
+    skipped = len(all_projects) - len(projects_to_build)
+    logging.info(f"🔍 Building {len(projects_to_build)} projects (skipped {skipped} due to image failures)")
+
+    # Parallel fuzzer builds
+    with Pool(args.workers) as pool:
+        results = pool.starmap(
+            build_fuzzers, 
+            [(p, args.sanitizer, oss_fuzz_dir) for p in projects_to_build]
+        )
+
+    # Output results
+    fuzzer_results = {project: success for success, project in results}
+    failed = [p for p in projects_to_build if not fuzzer_results[p]]
+    
+    success_count = len(projects_to_build) - len(failed)
+    logging.info(f"\n📊 Build completed: {success_count}/{len(projects_to_build)}")
+    
+    if failed:
+        logging.error(f"❌ Failed builds ({len(failed)} projects):")
+        for project in failed:
+            logging.error(f"   - {project}")
+
+    # Generate overall status report
+    overall_results = {}
+    for project in all_projects:
+        status = "❌"
+        if project in image_results and image_results[project]:
+            if project in fuzzer_results and fuzzer_results[project]:
+                status = "✅"
+            elif project in fuzzer_results:
+                status = "❌ (fuzzer)"
+            else:
+                status = "❌ (not built)"
+        else:
+            status = "❌ (image)"
+        overall_results[project] = status
+
+    logging.info("\n📊 Overall status:")
+    for project, status in overall_results.items():
+        logging.info(f"  {project}: {status}")
+
+if __name__ == "__main__":
+    try:
+        main()
+    except KeyboardInterrupt:
+        print("\n🛑 Operation interrupted")
+        sys.exit(1)
+    except Exception as e:
+        print(f"💥 Critical error: {e}")
+        sys.exit(1)
\ No newline at end of file

From d3bbc132c0a7a4dff6acbf8079097aa3efebcb32 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Fri, 25 Jul 2025 18:07:51 +0000
Subject: [PATCH 048/134] modify tuple dict list

---
 fuzz/build_fuzz.py | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/fuzz/build_fuzz.py b/fuzz/build_fuzz.py
index 51a1a79..96e1f18 100644
--- a/fuzz/build_fuzz.py
+++ b/fuzz/build_fuzz.py
@@ -33,7 +33,6 @@
 from pathlib import Path
 from returns.maybe import Maybe
 from multiprocessing import Pool
-from typing import Dict, List, Tuple
 
 # ========================================================================================
 # Custom Exceptions
@@ -64,7 +63,7 @@ def run_command(
     cmd: str,
     oss_fuzz_dir: Path,
     project: str = "",
-    allowed_exit_codes: Maybe[List[int]] = Maybe.empty,
+    allowed_exit_codes: Maybe[list[int]] = Maybe.empty,
     skip_yes: bool = False
 ) -> int:
     """Execute a command and return the exit code"""
@@ -110,7 +109,7 @@ def run_command(
 # ========================================================================================
 # Build Functions
 # ========================================================================================
-def build_image(project_name: str, oss_fuzz_dir: Path) -> Tuple[bool, str]:
+def build_image(project_name: str, oss_fuzz_dir: Path) -> tuple[bool, str]:
     """Docker image build workflow"""
     try:
         logging.info(f"Building Docker image: {project_name}")
@@ -137,7 +136,7 @@ def build_image(project_name: str, oss_fuzz_dir: Path) -> Tuple[bool, str]:
         logging.error(f"💥 Unhandled exception: {project_name} - {repr(e)}")
         return (False, project_name)
 
-def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> Tuple[bool, str]:
+def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> tuple[bool, str]:
     """Fuzzer build workflow"""
     try:
         logging.info(f"Building fuzzers: {project_name} ({sanitizer} sanitizer)")
@@ -168,7 +167,7 @@ def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> Tupl
 # ========================================================================================
 # Main Execution
 # ========================================================================================
-def load_projects(file_path: Path) -> List[str]:
+def load_projects(file_path: Path) -> list[str]:
     """Load project list from file"""
     if not file_path.exists():
         raise FileNotFoundError(f"Project list not found: {file_path}")
@@ -184,13 +183,13 @@ def load_projects(file_path: Path) -> List[str]:
 
 def execute_builds(
     func,
-    args_list: List[Tuple],
+    args_list: list[tuple],
     worker_count: int,
     success_msg: str,
     failure_msg: str
-) -> Tuple[Dict[str, bool], List[str]]:
+) -> tuple[dict[str, bool], list[str]]:
     """Execute build tasks in parallel and return results"""
-    results: Dict[str, bool] = {}
+    results = {}
     with Pool(worker_count) as pool:
         for success, project in pool.starmap(func, args_list):
             results[project] = success
@@ -246,7 +245,7 @@ def main():
         sys.exit(1)
 
     # Image building workflow
-    image_results: Dict[str, bool] = {}
+    image_results = {}
     if args.mode in ['image', 'both']:
         logging.info("\n" + "="*60)
         logging.info(f"Starting Docker image builds for {len(projects)} projects")
@@ -270,8 +269,8 @@ def main():
             logging.error(f"❌ Failed to save image results: {e}")
 
     # Fuzzer building workflow
-    fuzzer_results: Dict[str, bool] = {}
-    fuzz_projects: List[str] = []
+    fuzzer_results = {}
+    fuzz_projects = []
     if args.mode in ['fuzzer', 'both']:
         logging.info("\n" + "="*60)
         logging.info(f"Starting fuzzer builds ({args.sanitizer} sanitizer)")

From 62847f359d2d55255a68b043df6805a17933a935 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Fri, 25 Jul 2025 18:22:32 +0000
Subject: [PATCH 049/134] remove stdout stderr in build fuzz

---
 fuzz/build_fuzz.py | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/fuzz/build_fuzz.py b/fuzz/build_fuzz.py
index 96e1f18..3f0910c 100644
--- a/fuzz/build_fuzz.py
+++ b/fuzz/build_fuzz.py
@@ -18,7 +18,7 @@
     python3 build_fuzz.py --mode both [project_list] --oss-fuzz-dir /path/to/oss-fuzz --sanitizer address
 
 Example:
-    python3 ./fuzz/build_fuzz.py --mode both data/valid_projects.txt \
+    python3 ./fuzz/build_fuzz.py --mode both data/valid_projects2.txt \
         --oss-fuzz-dir ./fuzz/oss-fuzz \
         --sanitizer address \
         --workers 8
@@ -66,7 +66,7 @@ def run_command(
     allowed_exit_codes: Maybe[list[int]] = Maybe.empty,
     skip_yes: bool = False
 ) -> int:
-    """Execute a command and return the exit code"""
+    """Execute a command and return the exit code (no stdout/stderr capture)"""
     allowed_codes = allowed_exit_codes.value_or([0])
     cmd_str = f"yes | {cmd}" if not skip_yes else cmd
     logging.debug(f"Executing command [{project}]: {cmd_str}")
@@ -75,14 +75,9 @@ def run_command(
         process = subprocess.Popen(
             cmd_str,
             shell=True,
-            cwd=str(oss_fuzz_dir),
-            stdout=subprocess.PIPE if skip_yes else subprocess.DEVNULL,
-            stderr=subprocess.PIPE if skip_yes else subprocess.DEVNULL,
-            text=True
+            cwd=str(oss_fuzz_dir)
         )
-
-        stdout, stderr = process.communicate()
-        exit_code = process.returncode
+        exit_code = process.wait()
 
         if exit_code in allowed_codes:
             return exit_code
@@ -90,13 +85,6 @@ def run_command(
         error_msg = f"Command failed (exit code: {exit_code})"
         if project:
             error_msg += f" for project: {project}"
-
-        if stderr and stderr.strip():
-            error_msg += f"\nError output:\n{stderr.strip()}"
-
-        if stdout and stdout.strip():
-            error_msg += f"\nOutput:\n{stdout.strip()}"
-
         raise CommandError(error_msg, project=project, exit_code=exit_code)
 
     except FileNotFoundError as e:

From 4a5f9ff3346ac7b8333c995e67349798558fc534 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Fri, 25 Jul 2025 19:46:57 +0000
Subject: [PATCH 050/134] test successfully

---
 fuzz/build_fuzz.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fuzz/build_fuzz.py b/fuzz/build_fuzz.py
index 3f0910c..481fba9 100644
--- a/fuzz/build_fuzz.py
+++ b/fuzz/build_fuzz.py
@@ -18,7 +18,7 @@
     python3 build_fuzz.py --mode both [project_list] --oss-fuzz-dir /path/to/oss-fuzz --sanitizer address
 
 Example:
-    python3 ./fuzz/build_fuzz.py --mode both data/valid_projects2.txt \
+    python3 ./fuzz/build_fuzz.py --mode both data/valid_projects3.txt \
         --oss-fuzz-dir ./fuzz/oss-fuzz \
         --sanitizer address \
         --workers 8

From 3f736cb00b57ee2f0124120ead196a27f0d3da9b Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Fri, 25 Jul 2025 22:56:58 +0000
Subject: [PATCH 051/134] rename run fuzz ds to run fuzz print1

---
 fuzz/{run_fuzz_ds.py => run_fuzz_print1.py} | 226 ++++----------------
 1 file changed, 42 insertions(+), 184 deletions(-)
 rename fuzz/{run_fuzz_ds.py => run_fuzz_print1.py} (53%)

diff --git a/fuzz/run_fuzz_ds.py b/fuzz/run_fuzz_print1.py
similarity index 53%
rename from fuzz/run_fuzz_ds.py
rename to fuzz/run_fuzz_print1.py
index b0c4c59..640d527 100644
--- a/fuzz/run_fuzz_ds.py
+++ b/fuzz/run_fuzz_print1.py
@@ -2,23 +2,17 @@
 # -*- coding: utf-8 -*-
 
 """
-run_fuzz_all_targets_input.py
-
-Enhanced with input instrumentation to capture fuzzing inputs.
+run_fuzz_print1.py
 
 This script employs a two-phase approach for fuzz testing:
-1. Discovery phase: Traverse all specified projects to collect all executable fuzz targets in each project
+1. Discovery phase: Traverse all specified projects to collect all executable fuzz targets which start with "fuzz_" and end with "print1" in each project
 2. Execution phase: Create a task pool containing all (project, target) pairs and execute them in parallel using multiprocessing
-3. Input capture: Instrument fuzz targets to record all inputs during fuzzing
 
-Key Enhancements:
-- Added input instrumentation to capture fuzzing inputs
-- Created dedicated input storage directory structure
-- Added AST-based function instrumentation
-- Added input recording and analysis capabilities
+This approach maximizes CPU utilization and provides clear overall progress[2](@ref).
+
+Usage: python3 run_fuzz_print1.py [project_list_file] [--oss-fuzz-dir /your/custom/path/to/oss-fuzz] [--timeout seconds] [--workers N]
+Example: python3 fuzz/run_fuzz_print1.py data/valid_projects3.txt --oss-fuzz-dir /home/jiayiguo/FuzzAug/fuzz/oss-fuzz --timeout 60 --workers 4
 
-Usage: python3 run_fuzz_all_targets_input [project_list_file] [--timeout seconds] [--workers N] [--record-inputs]
-Example: python3 fuzz/run_fuzz_all_targets_input.py data/valid_projects.txt --timeout 60 --workers 4 --record-inputs
 """
 
 import os
@@ -27,120 +21,29 @@
 import argparse
 import logging
 import time
-import ast
-import astor
-import shutil
 from datetime import datetime
 from pathlib import Path
 from multiprocessing import Pool, cpu_count
 from returns.maybe import Maybe, Nothing, Some
 
-# --- Global configuration ---
-HOME_DIR = Path.home()
-OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz"
-LOG_DIR = OSS_FUZZ_DIR / "run_logs3"
-INPUT_DIR = OSS_FUZZ_DIR / "fuzz_inputs"  # Directory to store captured inputs
-
-class FunctionInstrumenter(ast.NodeTransformer):
-    def __init__(self, target_functions: list[str]):
-        self.target_functions = target_functions
-
-    def visit_FunctionDef(self, node):
-        if node.name in self.target_functions:
-            print_stmt = ast.parse(f'print("INPUT_CAPTURE: {node.name} called")').body[0]
-            node.body.insert(0, print_stmt)
-        return self.generic_visit(node)
-
-def instrument_code(source_code: str, target_functions: list[str]) -> str:
-    tree = ast.parse(source_code)
-    instrumenter = FunctionInstrumenter(target_functions)
-    instrumented_tree = instrumenter.visit(tree)
-    ast.fix_missing_locations(instrumented_tree)
-    return ast.unparse(instrumented_tree)
-def prepare_target_for_input_capture(project_name: str, target_name: str) -> Path:
-    """
-    Prepare a Python fuzz target for input capture by instrumenting its code
-    
-    Args:
-        project_name: Name of the project
-        target_name: Name of the target to instrument
-        
-    Returns:
-        Path to the instrumented Python script
-    """
-    try:
-        # Create project-specific input directory
-        project_input_dir = INPUT_DIR / project_name
-        project_input_dir.mkdir(parents=True, exist_ok=True)
-        
-        # Create target-specific input directory
-        target_input_dir = project_input_dir / target_name
-        target_input_dir.mkdir(exist_ok=True)
-        
-        logging.info(f"📁 Created input directory: {target_input_dir}")
-        
-        # Locate Python source file (support multiple extensions)
-        possible_extensions = [".py", ".pyw"]
-        source_file = None
-        
-        # Try possible file extensions
-        for ext in possible_extensions:
-            candidate = OSS_FUZZ_DIR / "projects" / project_name / f"{target_name}{ext}"
-            if candidate.exists():
-                source_file = candidate
-                break
-        
-        if not source_file:
-            logging.warning(f"⚠️ Python source file not found for: {target_name}")
-            return None
-        
-        # Backup original source file
-        backup_file = source_file.with_name(f"{target_name}_original{source_file.suffix}")
-        if not backup_file.exists():
-            shutil.copy2(source_file, backup_file)
-            logging.info(f"💾 Backed up original file: {backup_file}")
-        
-        # Read source code
-        with open(source_file, "r") as f:
-            source_code = f.read()
-        
-        # Instrument the code - use Python-specific entry function
-        possible_entry_functions = ["TestInput", "TestOneInput"]
-        instrumented_code = instrument_code(source_code, possible_entry_functions)
-
-        
-        # Write instrumented code to a new file with same extension
-        instrumented_file = source_file.with_name(f"{target_name}_instrumented{source_file.suffix}")
-        with open(instrumented_file, "w") as f:
-            f.write(instrumented_code)
-        
-        logging.info(f"🔧 Instrumented {target_name} for input capture")
-        
-        # Python doesn't need rebuilding - return instrumented script path
-        return instrumented_file
-        
-    except Exception as e:
-        logging.error(f"❌ Failed to instrument {target_name}: {str(e)}")
-        return None
+
+
+
 
 def run_command(
     cmd: str,
     log_msg: str,
     logger: logging.Logger,
     allowed_exit_codes: Maybe[list[int]] = Nothing,
-    timeout: int = 3600,  # Default 1-hour timeout
-    env: Maybe[dict] = Nothing  # Use Maybe instead of Optional
+    timeout: int = 3600  # Default 1-hour timeout
 ) -> bool:
-    """Execute commands with real-time logging, precise error handling, and input capture"""
+    """Execute commands with real-time logging and precise error handling"""
     allowed_codes = allowed_exit_codes.value_or([])
     logger.info(f"▶️ {log_msg}...")
     logger.debug(f"   $ {cmd}")
 
     process = None
     try:
-        # Convert Maybe[dict] to actual environment or None
-        env_dict = env.value_or(None)
-        
         process = subprocess.Popen(
             cmd,
             shell=True,
@@ -148,8 +51,7 @@ def run_command(
             stderr=subprocess.STDOUT,
             text=True,
             encoding="utf-8",
-            errors="replace",
-            env=env_dict
+            errors="replace"
         )
 
         start_time = time.time()
@@ -166,11 +68,7 @@ def run_command(
             if process.stdout:
                 line = process.stdout.readline()
                 if line:
-                    # Capture input data when detected
-                    if "INPUT_CAPTURE:" in line:
-                        logger.debug(f"📥 {line.strip()}")
-                    else:
-                        logger.debug(line.strip())
+                    logger.debug(line.strip())
             else:
                 time.sleep(0.1)
 
@@ -200,10 +98,11 @@ def run_command(
             except Exception:
                 pass
 
-def discover_targets(project_name: str, logger: logging.Logger) -> list[str]:
+
+def discover_targets(project_name: str, oss_fuzz_dir: Path, logger: logging.Logger) -> list[str]:
     """Discover fuzz targets for a project (starting with 'fuzz_', no extension, and executable)"""
-    out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name
-    targets = []  # Use built-in list type
+    out_dir = oss_fuzz_dir / "build" / "out" / project_name
+    targets: list[str] = []
 
     if not out_dir.is_dir():
         logger.warning(f"Build output directory for project {project_name} does not exist: {out_dir}")
@@ -215,6 +114,7 @@ def discover_targets(project_name: str, logger: logging.Logger) -> list[str]:
                 if (f.is_file() and
                         f.name.startswith("fuzz_") and
                         '.' not in f.name and
+                        f.name.endswith("print1") and
                         os.access(f, os.X_OK)):
                     targets.append(f.name)
             except OSError as e:
@@ -227,11 +127,12 @@ def discover_targets(project_name: str, logger: logging.Logger) -> list[str]:
 
     return targets
 
-def run_single_target(project_name: str, target_name: str, timeout: int, record_inputs: bool = False) -> tuple[bool, str, str]:
-    """Execute fuzz testing workflow for a single (project, target) pair with input capture"""
+
+def run_single_target(project_name: str, target_name: str, timeout: int, oss_fuzz_dir: Path) -> tuple[bool, str, str]:
+    """Execute fuzz testing workflow for a single (project, target) pair"""
     task_id = f"{project_name}_{target_name}"
     logger = logging.getLogger(task_id)
-
+    LOG_DIR = oss_fuzz_dir / "run_pj3_logs"
     try:
         logger.setLevel(logging.DEBUG)
         LOG_DIR.mkdir(parents=True, exist_ok=True)
@@ -244,44 +145,21 @@ def run_single_target(project_name: str, target_name: str, timeout: int, record_
         )
         file_handler.setFormatter(formatter)
         logger.addHandler(file_handler)
-        os.chdir(OSS_FUZZ_DIR)
-
-        logger.info(f"🚀 Starting test -> Project: {project_name}, Target: {target_name}")
-
-        # Prepare environment for input capture if requested
-        env = Nothing
-        instrumented_file = None
-
-        if record_inputs:
-            instrumented_file = prepare_target_for_input_capture(project_name, target_name)
-            if not instrumented_file:
-                logger.error(f"❌ Failed to instrument {target_name}")
-                return False, project_name, target_name
-
-            logger.info(f"🔧 Instrumented {target_name} for input capture")
-
-            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-            input_dir = INPUT_DIR / project_name / target_name / timestamp
-            input_dir.mkdir(parents=True, exist_ok=True)
+        os.chdir(oss_fuzz_dir)
 
-            env_dict = os.environ.copy()
-            env_dict["FUZZ_INPUT_DIR"] = str(input_dir)
-            env = Some(env_dict)
-
-            logger.info(f"📁 Inputs will be stored in: {input_dir}")
-
-        # Prepare command
-        target_to_run = instrumented_file.name if instrumented_file else target_name
-        cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_to_run} -- -max_total_time={timeout}"
+    except (OSError, PermissionError) as e:
+        print(f"❌ Critical error occurred during initialization of task {task_id}: {e}")
+        return False, project_name, target_name
 
-        # Execute command
+    logger.info(f"🚀 Starting test -> Project: {project_name}, Target: {target_name}")
+    try:
+        cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}"
         success = run_command(
             cmd,
             f"Running Target '{target_name}' (timeout={timeout}s)",
             logger,
-            allowed_exit_codes=Some([1, 124]),
-            timeout=timeout + 300,
-            env=env
+            allowed_exit_codes=Some([1, 124]),  # 1=Crashes found, 124=Timeout
+            timeout=timeout + 300
         )
 
         if success:
@@ -294,24 +172,11 @@ def run_single_target(project_name: str, target_name: str, timeout: int, record_
     except Exception as e:
         logger.exception(f"💥 Unexpected error occurred while running target '{target_name}': {e}")
         return False, project_name, target_name
-
     finally:
         for handler in logger.handlers[:]:
             handler.close()
             logger.removeHandler(handler)
 
-        if record_inputs and env.is_just:
-            try:
-                input_dir = Path(env.unwrap()["FUZZ_INPUT_DIR"])
-                if input_dir.exists():
-                    input_count = len(list(input_dir.glob("*")))
-                    logger.info(f"📥 Captured {input_count} inputs for {target_name}")
-            except Exception as e:
-                logger.warning(f"⚠️ Failed to summarize captured inputs: {e}")
-
-    # Final fallback (defensive)
-    return False, project_name, target_name
-
 
 def main():
     # Configure main process logging
@@ -323,13 +188,16 @@ def main():
     logger = logging.getLogger("Main")
 
     # Set up command line argument parsing
-    parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Fuzz Testing Tool with Input Capture")
+    parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Fuzz Testing Tool")
     parser.add_argument("project_list", help="File path containing list of project names")
+    parser.add_argument("--oss-fuzz-dir", type=Path, required=True, help="Path to the oss-fuzz directory (e.g., /path/to/oss-fuzz)")
     parser.add_argument("--timeout", type=int, default=60, help="Timeout duration per Fuzz Target (seconds)")
     parser.add_argument("--workers", type=int, default=cpu_count(), help="Number of parallel worker processes")
-    parser.add_argument("--record-inputs", action="store_true", help="Enable input capture during fuzzing")
     args = parser.parse_args()
 
+    OSS_FUZZ_DIR = args.oss_fuzz_dir.resolve()
+    LOG_DIR = OSS_FUZZ_DIR / "run_ds_logs"
+
     # 1. Read project list file
     try:
         project_path = Path(args.project_list)
@@ -345,13 +213,13 @@ def main():
 
     # 2. Discovery phase: Collect all fuzz targets
     logger.info("\n" + "=" * 20 + " Phase 1: Discover all Fuzz Targets " + "=" * 20)
-    all_fuzz_tasks = []  # Use built-in list type
-    
+    all_fuzz_tasks: list[tuple[str, str]] = []  # Store (project, target) tuples
     try:
         original_cwd = Path.cwd()  # Save current working directory
         os.chdir(OSS_FUZZ_DIR)     # Switch to OSS-Fuzz directory
         for project_name in projects:
-            targets = discover_targets(project_name, logger)
+            targets = discover_targets(project_name, OSS_FUZZ_DIR, logger)
+
             if targets:
                 logger.info(f"🔍 Discovered {len(targets)} targets in project '{project_name}': {', '.join(targets)}")
                 for target in targets:
@@ -371,17 +239,13 @@ def main():
         logger.info("🤷 No executable Fuzz Targets found. Program exits.")
         sys.exit(0)
 
-    # 3. Execution phase: Parallel fuzz testing with input capture
+    # 3. Execution phase: Parallel fuzz testing
     logger.info(f"\n✅ Discovery phase completed. Found a total of {len(all_fuzz_tasks)} fuzz tasks.")
-    if args.record_inputs:
-        logger.info("🔔 Input capture is ENABLED. All fuzzing inputs will be recorded.")
-        logger.info(f"📁 Inputs will be stored in: {INPUT_DIR}")
-    
     logger.info("=" * 20 + " Phase 2: Parallel Fuzzing Execution " + "=" * 23)
     logger.info(f"🚀 Starting parallel testing with {args.workers} worker processes (timeout per target: {args.timeout}s)...")
 
-    # Prepare task parameters (project, target, timeout, record_inputs)
-    tasks_with_args = [(p, t, args.timeout, args.record_inputs) for p, t in all_fuzz_tasks]
+    # Prepare task parameters (project, target, timeout)
+    tasks_with_args = [(p, t, args.timeout, OSS_FUZZ_DIR) for p, t in all_fuzz_tasks]
     results: list[tuple[bool, str, str]] = []  # Store results (success, project, target)
     
     # Execute in parallel using process pool
@@ -402,12 +266,6 @@ def main():
 
     # Output statistical summary
     logger.info(f"📊 Fuzzing completed: Success {success_count}/{total_tasks}, Failed {failed_count}/{total_tasks}")
-    
-    if args.record_inputs:
-        total_inputs = sum(len(list((INPUT_DIR / p / t).glob("*/*.bin"))) for p, t in all_fuzz_tasks)
-        logger.info(f"📥 Total inputs captured: {total_inputs}")
-        logger.info(f"💾 Inputs stored at: {INPUT_DIR}")
-    
     if failed_tasks:
         logger.error("❌ The following Fuzz Targets failed:")
         for project, target in failed_tasks:

From d5c2b2710641d7708168f60385e42cea4fc4b26f Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Fri, 25 Jul 2025 22:57:56 +0000
Subject: [PATCH 052/134] add print(data ) to fuzz target and rename the file
 with "_print1"

---
 fuzz/modify_fuzz_files.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fuzz/modify_fuzz_files.py b/fuzz/modify_fuzz_files.py
index eb7b37e..de8333f 100644
--- a/fuzz/modify_fuzz_files.py
+++ b/fuzz/modify_fuzz_files.py
@@ -34,7 +34,7 @@ def add_print_to_testoneinput(file_path):
 
 def main():
     projects_path = "/home/jiayiguo/FuzzAug/fuzz/oss-fuzz/projects"
-    valid_projects_file = "valid_projects.txt"
+    valid_projects_file = "data/valid_projects.txt"
 
     with open(valid_projects_file, 'r') as f:
         projects = [line.strip() for line in f if line.strip()]
@@ -54,7 +54,7 @@ def main():
                         new_content = add_print_to_testoneinput(file_path)
                         
                         # 保存修改后的文件（添加_print后缀）
-                        new_file_path = file_path.rsplit('.', 1)[0] + '_print.py'
+                        new_file_path = file_path.rsplit('.', 1)[0] + '_print1.py'
                         with open(new_file_path, 'w') as f:
                             f.write(new_content)
                         print(f"Processed: {file_path} -> {new_file_path}")

From d74a6bffcbbfa2e569f31f32e2c92910a9bd7b14 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Fri, 25 Jul 2025 23:11:14 +0000
Subject: [PATCH 053/134] oss -fuzz change

---
 fuzz/oss-fuzz | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fuzz/oss-fuzz b/fuzz/oss-fuzz
index f73b405..4bbbeb5 160000
--- a/fuzz/oss-fuzz
+++ b/fuzz/oss-fuzz
@@ -1 +1 @@
-Subproject commit f73b405d84e886bac90f8b15200230f08a2709c9
+Subproject commit 4bbbeb59599ad38b7984191e2e83bc9a61f7fd4b

From c6a6c23e1e9e5a1f1fa7cba92e6a677243de9a85 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Fri, 25 Jul 2025 23:14:47 +0000
Subject: [PATCH 054/134] rename the print1.py

---
 fuzz/{run_fuzz_print1.py => run_fuzz_all_targets_print1.py} | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
 rename fuzz/{run_fuzz_print1.py => run_fuzz_all_targets_print1.py} (97%)

diff --git a/fuzz/run_fuzz_print1.py b/fuzz/run_fuzz_all_targets_print1.py
similarity index 97%
rename from fuzz/run_fuzz_print1.py
rename to fuzz/run_fuzz_all_targets_print1.py
index 640d527..7f1b5c3 100644
--- a/fuzz/run_fuzz_print1.py
+++ b/fuzz/run_fuzz_all_targets_print1.py
@@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 
 """
-run_fuzz_print1.py
+run_fuzz_all_targets_print1.py
 
 This script employs a two-phase approach for fuzz testing:
 1. Discovery phase: Traverse all specified projects to collect all executable fuzz targets which start with "fuzz_" and end with "print1" in each project
@@ -10,8 +10,8 @@
 
 This approach maximizes CPU utilization and provides clear overall progress[2](@ref).
 
-Usage: python3 run_fuzz_print1.py [project_list_file] [--oss-fuzz-dir /your/custom/path/to/oss-fuzz] [--timeout seconds] [--workers N]
-Example: python3 fuzz/run_fuzz_print1.py data/valid_projects3.txt --oss-fuzz-dir /home/jiayiguo/FuzzAug/fuzz/oss-fuzz --timeout 60 --workers 4
+Usage: python3 run_fuzz_all_targets_print1.py [project_list_file] [--oss-fuzz-dir /your/custom/path/to/oss-fuzz] [--timeout seconds] [--workers N]
+Example: python3 fuzz/run_fuzz_all_targets_print1.py data/valid_projects3.txt --oss-fuzz-dir /home/jiayiguo/FuzzAug/fuzz/oss-fuzz --timeout 60 --workers 4
 
 """
 

From 9d5c9b636d7ceb6368fe4be9c4cca79602fa1487 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Fri, 25 Jul 2025 23:19:31 +0000
Subject: [PATCH 055/134] modify the exegesis

---
 fuzz/run_fuzz_all_targets_print1.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fuzz/run_fuzz_all_targets_print1.py b/fuzz/run_fuzz_all_targets_print1.py
index 7f1b5c3..ce557c3 100644
--- a/fuzz/run_fuzz_all_targets_print1.py
+++ b/fuzz/run_fuzz_all_targets_print1.py
@@ -100,7 +100,7 @@ def run_command(
 
 
 def discover_targets(project_name: str, oss_fuzz_dir: Path, logger: logging.Logger) -> list[str]:
-    """Discover fuzz targets for a project (starting with 'fuzz_', no extension, and executable)"""
+    """Discover fuzz targets for a project (starting with 'fuzz_', ending with 'print1', no extension, and executable)"""
     out_dir = oss_fuzz_dir / "build" / "out" / project_name
     targets: list[str] = []
 

From 521045ea3a0d075de47ec461779241f5620862a4 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Fri, 25 Jul 2025 23:44:13 +0000
Subject: [PATCH 056/134] modify

---
 fuzz/run_fuzz_all_targets.py       |  31 +-
 fuzz/run_fuzz_all_targets_input.py | 463 -----------------------------
 2 files changed, 18 insertions(+), 476 deletions(-)
 delete mode 100644 fuzz/run_fuzz_all_targets_input.py

diff --git a/fuzz/run_fuzz_all_targets.py b/fuzz/run_fuzz_all_targets.py
index 74bed97..fe147d8 100644
--- a/fuzz/run_fuzz_all_targets.py
+++ b/fuzz/run_fuzz_all_targets.py
@@ -10,8 +10,9 @@
 
 This approach maximizes CPU utilization and provides clear overall progress[2](@ref).
 
-Usage: python3 run_fuzz_all_targets.py [project_list_file] [--timeout seconds] [--workers N]
-Example: python3 fuzz/run_fuzz_all_targets.py data/valid_projects.txt --timeout 60 --workers 4
+Usage: python3 run_fuzz_all_targets.py [project_list_file] [--oss-fuzz-dir /your/custom/path/to/oss-fuzz] [--timeout seconds] [--workers N]
+Example: python3 fuzz/run_fuzz_all_targets.py data/valid_projects3.txt --oss-fuzz-dir /home/jiayiguo/FuzzAug/fuzz/oss-fuzz --timeout 60 --workers 4
+
 """
 
 import os
@@ -25,10 +26,8 @@
 from multiprocessing import Pool, cpu_count
 from returns.maybe import Maybe, Nothing, Some
 
-# --- Global configuration ---
-HOME_DIR = Path.home()
-OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz"
-LOG_DIR = OSS_FUZZ_DIR / "run_logs2"
+
+
 
 
 def run_command(
@@ -100,9 +99,9 @@ def run_command(
                 pass
 
 
-def discover_targets(project_name: str, logger: logging.Logger) -> list[str]:
+def discover_targets(project_name: str, oss_fuzz_dir: Path, logger: logging.Logger) -> list[str]:
     """Discover fuzz targets for a project (starting with 'fuzz_', no extension, and executable)"""
-    out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name
+    out_dir = oss_fuzz_dir / "build" / "out" / project_name
     targets: list[str] = []
 
     if not out_dir.is_dir():
@@ -115,6 +114,7 @@ def discover_targets(project_name: str, logger: logging.Logger) -> list[str]:
                 if (f.is_file() and
                         f.name.startswith("fuzz_") and
                         '.' not in f.name and
+                        # f.name.endswith("print1") and
                         os.access(f, os.X_OK)):
                     targets.append(f.name)
             except OSError as e:
@@ -128,11 +128,11 @@ def discover_targets(project_name: str, logger: logging.Logger) -> list[str]:
     return targets
 
 
-def run_single_target(project_name: str, target_name: str, timeout: int) -> tuple[bool, str, str]:
+def run_single_target(project_name: str, target_name: str, timeout: int, oss_fuzz_dir: Path) -> tuple[bool, str, str]:
     """Execute fuzz testing workflow for a single (project, target) pair"""
     task_id = f"{project_name}_{target_name}"
     logger = logging.getLogger(task_id)
-
+    LOG_DIR = oss_fuzz_dir / "run_pj3_logs"
     try:
         logger.setLevel(logging.DEBUG)
         LOG_DIR.mkdir(parents=True, exist_ok=True)
@@ -145,7 +145,7 @@ def run_single_target(project_name: str, target_name: str, timeout: int) -> tupl
         )
         file_handler.setFormatter(formatter)
         logger.addHandler(file_handler)
-        os.chdir(OSS_FUZZ_DIR)
+        os.chdir(oss_fuzz_dir)
 
     except (OSError, PermissionError) as e:
         print(f"❌ Critical error occurred during initialization of task {task_id}: {e}")
@@ -190,10 +190,14 @@ def main():
     # Set up command line argument parsing
     parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Fuzz Testing Tool")
     parser.add_argument("project_list", help="File path containing list of project names")
+    parser.add_argument("--oss-fuzz-dir", type=Path, required=True, help="Path to the oss-fuzz directory (e.g., /path/to/oss-fuzz)")
     parser.add_argument("--timeout", type=int, default=60, help="Timeout duration per Fuzz Target (seconds)")
     parser.add_argument("--workers", type=int, default=cpu_count(), help="Number of parallel worker processes")
     args = parser.parse_args()
 
+    OSS_FUZZ_DIR = args.oss_fuzz_dir.resolve()
+    LOG_DIR = OSS_FUZZ_DIR / "run_fuzz_all_targets_logs"
+
     # 1. Read project list file
     try:
         project_path = Path(args.project_list)
@@ -214,7 +218,8 @@ def main():
         original_cwd = Path.cwd()  # Save current working directory
         os.chdir(OSS_FUZZ_DIR)     # Switch to OSS-Fuzz directory
         for project_name in projects:
-            targets = discover_targets(project_name, logger)
+            targets = discover_targets(project_name, OSS_FUZZ_DIR, logger)
+
             if targets:
                 logger.info(f"🔍 Discovered {len(targets)} targets in project '{project_name}': {', '.join(targets)}")
                 for target in targets:
@@ -240,7 +245,7 @@ def main():
     logger.info(f"🚀 Starting parallel testing with {args.workers} worker processes (timeout per target: {args.timeout}s)...")
 
     # Prepare task parameters (project, target, timeout)
-    tasks_with_args = [(p, t, args.timeout) for p, t in all_fuzz_tasks]
+    tasks_with_args = [(p, t, args.timeout, OSS_FUZZ_DIR) for p, t in all_fuzz_tasks]
     results: list[tuple[bool, str, str]] = []  # Store results (success, project, target)
     
     # Execute in parallel using process pool
diff --git a/fuzz/run_fuzz_all_targets_input.py b/fuzz/run_fuzz_all_targets_input.py
deleted file mode 100644
index 4d38ffb..0000000
--- a/fuzz/run_fuzz_all_targets_input.py
+++ /dev/null
@@ -1,463 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-"""
-run_fuzz_all_targets_input.py
-
-Enhanced with input instrumentation to capture fuzzing inputs.
-
-This script employs a two-phase approach for fuzz testing:
-1. Discovery phase: Traverse all specified projects to collect all executable fuzz targets in each project
-2. Execution phase: Create a task pool containing all (project, target) pairs and execute them in parallel using multiprocessing
-3. Input capture: Instrument fuzz targets to record all inputs during fuzzing
-
-Key Enhancements:
-- Added input instrumentation to capture fuzzing inputs
-- Created dedicated input storage directory structure
-- Added AST-based function instrumentation
-- Added input recording and analysis capabilities
-
-Usage: python3 run_fuzz_all_targets_input [project_list_file] [--timeout seconds] [--workers N] [--record-inputs]
-Example: python3 fuzz/run_fuzz_all_targets_input.py data/valid_projects.txt --timeout 60 --workers 4 --record-inputs
-"""
-
-import os
-import sys
-import subprocess
-import argparse
-import logging
-import time
-import ast
-import astor
-import shutil
-from datetime import datetime
-from pathlib import Path
-from multiprocessing import Pool, cpu_count
-from returns.maybe import Maybe, Nothing, Some
-
-# --- Global configuration ---
-HOME_DIR = Path.home()
-OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz"
-LOG_DIR = OSS_FUZZ_DIR / "run_logs3"
-INPUT_DIR = OSS_FUZZ_DIR / "fuzz_inputs"  # Directory to store captured inputs
-
-class FunctionInstrumenter(ast.NodeTransformer):
-    """AST transformer to instrument function entries for input recording"""
-    def __init__(self, target_functions: list[str]):
-        self.target_functions = target_functions
-        super().__init__()
-    
-    def visit_FunctionDef(self, node):
-        """Instrument function definition to add input recording"""
-        # 只对目标函数进行插桩
-        if node.name in self.target_functions:
-            # Add print statement at the beginning of the function
-            input_record_stmt = ast.Expr(
-                value=ast.Call(
-                    func=ast.Name(id='print', ctx=ast.Load()),
-                    args=[ast.Constant(value=f"INPUT_CAPTURE: {node.name} received input: {{data}}")],
-                    keywords=[]
-                )
-            )
-            
-            # Insert the print statement at the top of the function body
-            if node.body:
-                node.body.insert(0, input_record_stmt)
-        
-        return node
-
-
-def instrument_code(source_code: str, target_function: list[str]) -> str:
-    """
-    Instrument source code to record inputs for specific function
-    
-    Args:
-        source_code: Original source code
-        target_function: Name of the function to instrument
-        
-    Returns:
-        Instrumented source code
-    """
-    try:
-        # Parse the source code into an AST
-        tree = ast.parse(source_code)
-        
-        # Create instrumenter and apply transformations
-        instrumenter = FunctionInstrumenter(target_functions)
-        modified_tree = instrumenter.visit(tree)
-        
-        # Add missing location information for generated nodes
-        ast.fix_missing_locations(modified_tree)
-        
-        # Generate the modified source code
-        return astor.to_source(modified_tree)
-    except Exception as e:
-        logging.error(f"🔧 Code instrumentation failed: {str(e)}")
-        return source_code  # Return original if instrumentation fails
-
-def prepare_target_for_input_capture(project_name: str, target_name: str) -> Path:
-    """
-    Prepare a Python fuzz target for input capture by instrumenting its code
-    
-    Args:
-        project_name: Name of the project
-        target_name: Name of the target to instrument
-        
-    Returns:
-        Path to the instrumented Python script
-    """
-    try:
-        # Create project-specific input directory
-        project_input_dir = INPUT_DIR / project_name
-        project_input_dir.mkdir(parents=True, exist_ok=True)
-        
-        # Create target-specific input directory
-        target_input_dir = project_input_dir / target_name
-        target_input_dir.mkdir(exist_ok=True)
-        
-        logging.info(f"📁 Created input directory: {target_input_dir}")
-        
-        # Locate Python source file (support multiple extensions)
-        possible_extensions = [".py", ".pyw"]
-        source_file = None
-        
-        # Try possible file extensions
-        for ext in possible_extensions:
-            candidate = OSS_FUZZ_DIR / "projects" / project_name / f"{target_name}{ext}"
-            if candidate.exists():
-                source_file = candidate
-                break
-        
-        if not source_file:
-            logging.warning(f"⚠️ Python source file not found for: {target_name}")
-            return None
-        
-        # Backup original source file
-        backup_file = source_file.with_name(f"{target_name}_original{source_file.suffix}")
-        if not backup_file.exists():
-            shutil.copy2(source_file, backup_file)
-            logging.info(f"💾 Backed up original file: {backup_file}")
-        
-        # Read source code
-        with open(source_file, "r") as f:
-            source_code = f.read()
-        
-        # Instrument the code - use Python-specific entry function
-        possible_entry_functions = ["TestInput", "TestOneInput"]
-        instrumented_code = instrument_code(source_code, possible_entry_functions)
-
-        
-        # Write instrumented code to a new file with same extension
-        instrumented_file = source_file.with_name(f"{target_name}_instrumented{source_file.suffix}")
-        with open(instrumented_file, "w") as f:
-            f.write(instrumented_code)
-        
-        logging.info(f"🔧 Instrumented {target_name} for input capture")
-        
-        # Python doesn't need rebuilding - return instrumented script path
-        return instrumented_file
-        
-    except Exception as e:
-        logging.error(f"❌ Failed to instrument {target_name}: {str(e)}")
-        return None
-
-def run_command(
-    cmd: str,
-    log_msg: str,
-    logger: logging.Logger,
-    allowed_exit_codes: Maybe[list[int]] = Nothing,
-    timeout: int = 3600,  # Default 1-hour timeout
-    env: Maybe[dict] = Nothing  # Use Maybe instead of Optional
-) -> bool:
-    """Execute commands with real-time logging, precise error handling, and input capture"""
-    allowed_codes = allowed_exit_codes.value_or([])
-    logger.info(f"▶️ {log_msg}...")
-    logger.debug(f"   $ {cmd}")
-
-    process = None
-    try:
-        # Convert Maybe[dict] to actual environment or None
-        env_dict = env.value_or(None)
-        
-        process = subprocess.Popen(
-            cmd,
-            shell=True,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.STDOUT,
-            text=True,
-            encoding="utf-8",
-            errors="replace",
-            env=env_dict
-        )
-
-        start_time = time.time()
-        while process.poll() is None:
-            if time.time() - start_time > timeout:
-                logger.error(f"⌛ Command timed out after {timeout} seconds")
-                process.terminate()
-                try:
-                    process.wait(timeout=5)
-                except subprocess.TimeoutExpired:
-                    process.kill()
-                return False
-
-            if process.stdout:
-                line = process.stdout.readline()
-                if line:
-                    # Capture input data when detected
-                    if "INPUT_CAPTURE:" in line:
-                        logger.debug(f"📥 {line.strip()}")
-                    else:
-                        logger.debug(line.strip())
-            else:
-                time.sleep(0.1)
-
-        exit_code = process.returncode
-        if exit_code not in [0, *allowed_codes]:
-            logger.error(f"❌ Command execution failed, exit code: {exit_code}")
-            return False
-        return True
-
-    except FileNotFoundError:
-        logger.error(f"🔍 Command not found: {cmd.split()[0]}")
-        return False
-    except PermissionError:
-        logger.error(f"🔒 Insufficient permissions to execute command: {cmd}")
-        return False
-    except subprocess.SubprocessError as e:
-        logger.exception(f"💥 Subprocess error: {e}")
-        return False
-    except OSError as e:
-        logger.exception(f"💥 Operating system error during command execution: {e}")
-        return False
-    finally:
-        if process and process.poll() is None:
-            try:
-                process.terminate()
-                process.wait(timeout=5)
-            except Exception:
-                pass
-
-def discover_targets(project_name: str, logger: logging.Logger) -> list[str]:
-    """Discover fuzz targets for a project (starting with 'fuzz_', no extension, and executable)"""
-    out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name
-    targets = []  # Use built-in list type
-
-    if not out_dir.is_dir():
-        logger.warning(f"Build output directory for project {project_name} does not exist: {out_dir}")
-        return targets
-
-    try:
-        for f in out_dir.iterdir():
-            try:
-                if (f.is_file() and
-                        f.name.startswith("fuzz_") and
-                        '.' not in f.name and
-                        os.access(f, os.X_OK)):
-                    targets.append(f.name)
-            except OSError as e:
-                logger.warning(f"⚠️ Error while checking file {f.name}, skipped: {e}")
-
-    except PermissionError:
-        logger.error(f"🔒 Insufficient permissions to access directory: {out_dir}")
-    except OSError as e:
-        logger.exception(f"💥 Operating system error occurred while discovering targets: {e}")
-
-    return targets
-
-def run_single_target(project_name: str, target_name: str, timeout: int, record_inputs: bool = False) -> tuple[bool, str, str]:
-    """Execute fuzz testing workflow for a single (project, target) pair with input capture"""
-    task_id = f"{project_name}_{target_name}"
-    logger = logging.getLogger(task_id)
-
-    try:
-        logger.setLevel(logging.DEBUG)
-        LOG_DIR.mkdir(parents=True, exist_ok=True)
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        log_file = LOG_DIR / f"run_{task_id}_{timestamp}.log"
-        file_handler = logging.FileHandler(log_file, encoding="utf-8")
-        formatter = logging.Formatter(
-            "%(asctime)s [PID:%(process)d] %(levelname)s: %(message)s",
-            datefmt="%Y-%m-%d %H:%M:%S"
-        )
-        file_handler.setFormatter(formatter)
-        logger.addHandler(file_handler)
-        os.chdir(OSS_FUZZ_DIR)
-
-        logger.info(f"🚀 Starting test -> Project: {project_name}, Target: {target_name}")
-
-        # Prepare environment for input capture if requested
-        env = Nothing
-        instrumented_file = None
-
-        if record_inputs:
-            instrumented_file = prepare_target_for_input_capture(project_name, target_name)
-            if not instrumented_file:
-                logger.error(f"❌ Failed to instrument {target_name}")
-                return False, project_name, target_name
-
-            logger.info(f"🔧 Instrumented {target_name} for input capture")
-
-            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-            input_dir = INPUT_DIR / project_name / target_name / timestamp
-            input_dir.mkdir(parents=True, exist_ok=True)
-
-            env_dict = os.environ.copy()
-            env_dict["FUZZ_INPUT_DIR"] = str(input_dir)
-            env = Some(env_dict)
-
-            logger.info(f"📁 Inputs will be stored in: {input_dir}")
-
-        # Prepare command
-        target_to_run = instrumented_file.name if instrumented_file else target_name
-        cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_to_run} -- -max_total_time={timeout}"
-
-        # Execute command
-        success = run_command(
-            cmd,
-            f"Running Target '{target_name}' (timeout={timeout}s)",
-            logger,
-            allowed_exit_codes=Some([1, 124]),
-            timeout=timeout + 300,
-            env=env
-        )
-
-        if success:
-            logger.info(f"✅ Target '{target_name}' completed successfully.")
-        else:
-            logger.error(f"❌ Target '{target_name}' failed.")
-
-        return success, project_name, target_name
-
-    except Exception as e:
-        logger.exception(f"💥 Unexpected error occurred while running target '{target_name}': {e}")
-        return False, project_name, target_name
-
-    finally:
-        for handler in logger.handlers[:]:
-            handler.close()
-            logger.removeHandler(handler)
-
-        if record_inputs and env.is_just:
-            try:
-                input_dir = Path(env.unwrap()["FUZZ_INPUT_DIR"])
-                if input_dir.exists():
-                    input_count = len(list(input_dir.glob("*")))
-                    logger.info(f"📥 Captured {input_count} inputs for {target_name}")
-            except Exception as e:
-                logger.warning(f"⚠️ Failed to summarize captured inputs: {e}")
-
-    # Final fallback (defensive)
-    return False, project_name, target_name
-
-
-def main():
-    # Configure main process logging
-    logging.basicConfig(
-        level=logging.INFO,
-        format="[%(levelname)s] %(message)s",
-        stream=sys.stdout
-    )
-    logger = logging.getLogger("Main")
-
-    # Set up command line argument parsing
-    parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Fuzz Testing Tool with Input Capture")
-    parser.add_argument("project_list", help="File path containing list of project names")
-    parser.add_argument("--timeout", type=int, default=60, help="Timeout duration per Fuzz Target (seconds)")
-    parser.add_argument("--workers", type=int, default=cpu_count(), help="Number of parallel worker processes")
-    parser.add_argument("--record-inputs", action="store_true", help="Enable input capture during fuzzing")
-    args = parser.parse_args()
-
-    # 1. Read project list file
-    try:
-        project_path = Path(args.project_list)
-        with open(project_path, "r", encoding="utf-8") as f:
-            projects = [line.strip() for line in f if line.strip()]
-        logger.info(f"📋 Loaded {len(projects)} projects from {project_path.name}.")
-    except FileNotFoundError:
-        logger.error(f"❌ Project list file not found: {args.project_list}")
-        sys.exit(1)
-    except (OSError, PermissionError) as e:
-        logger.exception(f"💥 Error occurred while reading project list: {e}")
-        sys.exit(1)
-
-    # 2. Discovery phase: Collect all fuzz targets
-    logger.info("\n" + "=" * 20 + " Phase 1: Discover all Fuzz Targets " + "=" * 20)
-    all_fuzz_tasks = []  # Use built-in list type
-    
-    try:
-        original_cwd = Path.cwd()  # Save current working directory
-        os.chdir(OSS_FUZZ_DIR)     # Switch to OSS-Fuzz directory
-        for project_name in projects:
-            targets = discover_targets(project_name, logger)
-            if targets:
-                logger.info(f"🔍 Discovered {len(targets)} targets in project '{project_name}': {', '.join(targets)}")
-                for target in targets:
-                    all_fuzz_tasks.append((project_name, target))
-            else:
-                logger.warning(f"⚠️ No Fuzz Targets found in project '{project_name}'.")
-        os.chdir(original_cwd)  # Restore original working directory
-    except FileNotFoundError:
-        logger.error(f"❌ OSS-Fuzz directory does not exist: {OSS_FUZZ_DIR}")
-        sys.exit(1)
-    except Exception as e:
-        logger.exception(f"💥 Unknown error occurred during discovery phase: {e}")
-        sys.exit(1)
-
-    # Check if any valid targets were found
-    if not all_fuzz_tasks:
-        logger.info("🤷 No executable Fuzz Targets found. Program exits.")
-        sys.exit(0)
-
-    # 3. Execution phase: Parallel fuzz testing with input capture
-    logger.info(f"\n✅ Discovery phase completed. Found a total of {len(all_fuzz_tasks)} fuzz tasks.")
-    if args.record_inputs:
-        logger.info("🔔 Input capture is ENABLED. All fuzzing inputs will be recorded.")
-        logger.info(f"📁 Inputs will be stored in: {INPUT_DIR}")
-    
-    logger.info("=" * 20 + " Phase 2: Parallel Fuzzing Execution " + "=" * 23)
-    logger.info(f"🚀 Starting parallel testing with {args.workers} worker processes (timeout per target: {args.timeout}s)...")
-
-    # Prepare task parameters (project, target, timeout, record_inputs)
-    tasks_with_args = [(p, t, args.timeout, args.record_inputs) for p, t in all_fuzz_tasks]
-    results: list[tuple[bool, str, str]] = []  # Store results (success, project, target)
-    
-    # Execute in parallel using process pool
-    with Pool(args.workers) as pool:
-        try:
-            results = pool.starmap(run_single_target, tasks_with_args)
-        except Exception as e:
-            logger.error(f"💥 Critical error occurred during parallel execution: {e}")
-            pool.terminate()
-            pool.join()
-
-    # 4. Result summary and reporting
-    logger.info("\n" + "=" * 20 + " Phase 3: Result Summary " + "=" * 28)
-    failed_tasks = [(p, t) for success, p, t in results if not success]  # List of failed tasks
-    total_tasks = len(all_fuzz_tasks)
-    failed_count = len(failed_tasks)
-    success_count = total_tasks - failed_count
-
-    # Output statistical summary
-    logger.info(f"📊 Fuzzing completed: Success {success_count}/{total_tasks}, Failed {failed_count}/{total_tasks}")
-    
-    if args.record_inputs:
-        total_inputs = sum(len(list((INPUT_DIR / p / t).glob("*/*.bin"))) for p, t in all_fuzz_tasks)
-        logger.info(f"📥 Total inputs captured: {total_inputs}")
-        logger.info(f"💾 Inputs stored at: {INPUT_DIR}")
-    
-    if failed_tasks:
-        logger.error("❌ The following Fuzz Targets failed:")
-        for project, target in failed_tasks:
-            logger.error(f"  - Project: {project}, Target: {target}")  # List detailed failures
-
-
-if __name__ == "__main__":
-    try:
-        main()
-    except KeyboardInterrupt:
-        print("\n🛑 Operation interrupted by user.")
-        sys.exit(1)
-    except Exception as e:
-        print(f"\n💥 Fatal error in main program: {e}")
-        sys.exit(1)
\ No newline at end of file

From 6c18832ee0dcf906d917ba5513f202088e627337 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Fri, 25 Jul 2025 23:44:56 +0000
Subject: [PATCH 057/134] modify log name

---
 fuzz/run_fuzz_all_targets_print1.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fuzz/run_fuzz_all_targets_print1.py b/fuzz/run_fuzz_all_targets_print1.py
index ce557c3..8b5c31d 100644
--- a/fuzz/run_fuzz_all_targets_print1.py
+++ b/fuzz/run_fuzz_all_targets_print1.py
@@ -132,7 +132,7 @@ def run_single_target(project_name: str, target_name: str, timeout: int, oss_fuz
     """Execute fuzz testing workflow for a single (project, target) pair"""
     task_id = f"{project_name}_{target_name}"
     logger = logging.getLogger(task_id)
-    LOG_DIR = oss_fuzz_dir / "run_pj3_logs"
+    LOG_DIR = oss_fuzz_dir / "run_fuzz_all_targets_logs"
     try:
         logger.setLevel(logging.DEBUG)
         LOG_DIR.mkdir(parents=True, exist_ok=True)

From 679ec482923bd8d8897df40dec4f9860d50fb0c0 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Sat, 26 Jul 2025 00:00:08 +0000
Subject: [PATCH 058/134] type error

---
 fuzz/build_fuzzers.py    |  4 ++--
 image_build_results.json | 11 ++++++++++-
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/fuzz/build_fuzzers.py b/fuzz/build_fuzzers.py
index 1dfd825..8a8a0a4 100644
--- a/fuzz/build_fuzzers.py
+++ b/fuzz/build_fuzzers.py
@@ -29,7 +29,7 @@
 
 class BuildError(Exception):
     """Base exception for build failures"""
-    def __init__(self, message: str, project: str = "", exit_code: int = None):
+    def __init__(self, message: str, project: str = "", exit_code: Optional[int] = None):
         super().__init__(message)
         self.project = project
         self.exit_code = exit_code
@@ -72,7 +72,7 @@ def run_command(
         if exit_code in allowed_exit_codes:
             return exit_code
             
-        # 构建详细的错误信息
+        # Build detailed error message
         error_msg = f"Command failed (exit code: {exit_code})"
         if project:
             error_msg += f" for project: {project}"
diff --git a/image_build_results.json b/image_build_results.json
index 4af9787..2e47cc5 100644
--- a/image_build_results.json
+++ b/image_build_results.json
@@ -1 +1,10 @@
-{"abseil-py": true, "adal": true, "aiohttp": true, "aniso8601": true, "ansible": true, "argcomplete": true, "arrow-py": true, "asn1crypto": true, "asteval": true, "astroid": true, "asttokens": true, "attrs": true, "autoflake": true, "autopep8": true, "azure-sdk-for-python": true, "babel": true, "black": true, "botocore": true, "bottleneck": true, "bz2file": true, "cachetools": true, "cffi": true, "chardet": true, "charset_normalizer": true, "click": true, "cloud-custodian": true, "configparser": true, "connexion": true, "coveragepy": true, "croniter": true, "cryptography": true, "cssselect": true, "dask": true, "decorator": true, "defusedxml": true, "digest": true, "dill": true, "distlib": true, "dnspython": true, "docutils": true, "ecdsa-python": true, "et-xmlfile": true, "face": true, "filelock": true, "filesystem_spec": true, "flask": true, "flask-jwt-extended": true, "flask-restx": true, "flask-wtf": true, "fonttools": true, "ftfy": true, "g-api-auth-httplib2": true, "g-api-auth-library-python": true, "g-api-pubsub": true, "g-api-py-api-common-protos": true, "g-api-py-oauthlib": true, "g-api-python-bigquery-storage": true, "g-api-python-client": true, "g-api-python-cloud-core": true, "g-api-python-firestore": true, "g-api-python-tasks": true, "g-api-resource-manager": true, "g-api-resumable-media-python": true, "g-api-secret-manager": true, "g-apis-py-api-core": true, "gast": true, "gc-iam": true, "gcloud-error-py": true, "g-cloud-logging-py": true, "gcp-python-cloud-storage": true, "genshi": true, "gitdb": true, "glom": true, "gprof2dot": true, "g-py-bigquery": true, "g-py-crc32c": true, "grpc-py": true, "gunicorn": true, "h11": true, "h5py": true, "hiredis-py": true, "html2text": true, "html5lib-python": true, "httpcore": true, "httpretty": true, "httpx": true, "idna": true, "ijson": true, "importlib_metadata": true, "iniconfig": true, "ipaddress": true, "ipykernel": true, "ipython": true, "isodate": true, "itsdangerous": true, "jedi": true, "jinja2": true, "jmespathpy": true, "joblib": true, "jsmin": true, "jupyter-nbconvert": true, "jupyter_server": true, "kafka": true, "keras": false, "kiwisolver": true, "lark-parser": true, "libcst": true, "looker-sdk": true, "lxml": true, "mako": true, "markupsafe": true, "matplotlib": true, "mccabe": true, "mdit-py-plugins": true, "mdurl": true, "more-itertools": true, "mrab-regex": true, "msal": true, "msgpack-python": true, "multidict": true, "mutagen": true, "nbclassic": true, "nbformat": true, "netaddr-py": true, "networkx": true, "ntlm2": true, "ntlm-auth": true, "numexpr": true, "numpy": true, "oauth2": true, "oauthlib": true, "olefile": true, "openapi-schema-validator": true, "opencensus-python": true, "openpyxl": true, "opt_einsum": true, "oracle-py-cx": true, "orjson": true, "oscrypto": true, "packaging": true, "pandas": true, "paramiko": true, "parse": true, "parsimonious": true, "pasta": true, "pathlib2": true, "pdoc": true, "pem": true, "pendulum": true, "pip": true, "ply": true, "protobuf-python": true, "proto-plus-python": true, "psqlparse": true, "psutil": true, "psycopg2": true, "pyasn1": true, "pyasn1-modules": true, "pycparser": true, "pycrypto": true, "pydantic": true, "pydateutil": true, "pygments": true, "pyjson5": true, "pyjwt": true, "pymysql": true, "pynacl": true, "pyodbc": true, "pyparsing": true, "pyrsistent": true, "py-serde": true, "pytables": true, "pytest-py": true, "python3-openid": true, "python-ecdsa": true, "python-email-validator": true, "python-fastjsonschema": true, "python-future": true, "python-graphviz": true, "python-hyperlink": true, "python-jose": true, "python-lz4": true, "python-markdown": true, "python-markdownify": true, "python-nameparser": true, "python-nvd3": true, "python-pathspec": true, "python-prompt-toolkit": true, "python-pypdf": true, "python-rison": true, "python-rsa": true, "python-tabulate": true, "pytz": true, "pyxdg": true, "pyyaml": true, "pyzmq": true, "redis-py": true, "requests": true, "retry": true, "rfc3967": true, "rich": true, "sacremoses": true, "scikit-learn": true, "scipy": true, "setuptools": true, "sigstore-python": true, "simplejson": true, "six": true, "smart_open": true, "soupsieve": true, "sqlalchemy_jsonfield": true, "sqlalchemy-utils": true, "sqlparse": true, "stack_data": true, "tensorflow-addons": false, "tinycss2": true, "toml": true, "tomlkit": true, "toolbelt": true, "toolz": true, "tqdm": true, "typing_extensions": true, "underscore": true, "uritemplate": true, "urlextract": true, "urllib3": true, "validators": true, "w3lib": true, "websocket-client": true, "wheel": true, "wtforms": true, "xlrd": true, "yarl": true, "zipp": true}
\ No newline at end of file
+{
+    "asteval": true,
+    "astroid": true,
+    "asttokens": true,
+    "attrs": true,
+    "autoflake": true,
+    "autopep8": true,
+    "azure-sdk-for-python": true,
+    "babel": true
+}
\ No newline at end of file

From b4d18115368c2ed0277dae73ca61733f6fbbe6b6 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Sat, 26 Jul 2025 00:05:39 +0000
Subject: [PATCH 059/134] list dict tuple

---
 fuzz/build_fuzzers.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fuzz/build_fuzzers.py b/fuzz/build_fuzzers.py
index 8a8a0a4..65fa93a 100644
--- a/fuzz/build_fuzzers.py
+++ b/fuzz/build_fuzzers.py
@@ -24,7 +24,7 @@
 import logging
 import json
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Optional
 from multiprocessing import Pool, cpu_count
 
 class BuildError(Exception):
@@ -50,7 +50,7 @@ def run_command(
     cmd: str,
     oss_fuzz_dir: Path,
     project: str = "",
-    allowed_exit_codes: Optional[List[int]] = None
+    allowed_exit_codes: Optional[list[int]] = None
 ) -> int:
     """Execute a command and return the exit code"""
     allowed_exit_codes = allowed_exit_codes or [0]
@@ -92,7 +92,7 @@ def run_command(
     except subprocess.SubprocessError as e:
         raise CommandError(f"Subprocess error: {e}", project=project) from e
 
-def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> Tuple[bool, str]:
+def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> tuple[bool, str]:
     """Fuzzer build workflow"""
     try:
         logging.info("=" * 60)

From d030841f68ef8762f78584f06c9a5e35668f0c7e Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Sat, 26 Jul 2025 00:09:54 +0000
Subject: [PATCH 060/134] type error

---
 fuzz/build_fuzz.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fuzz/build_fuzz.py b/fuzz/build_fuzz.py
index 481fba9..3cbb505 100644
--- a/fuzz/build_fuzz.py
+++ b/fuzz/build_fuzz.py
@@ -233,7 +233,7 @@ def main():
         sys.exit(1)
 
     # Image building workflow
-    image_results = {}
+    image_results: dict[str, bool] = {} 
     if args.mode in ['image', 'both']:
         logging.info("\n" + "="*60)
         logging.info(f"Starting Docker image builds for {len(projects)} projects")
@@ -257,7 +257,7 @@ def main():
             logging.error(f"❌ Failed to save image results: {e}")
 
     # Fuzzer building workflow
-    fuzzer_results = {}
+    fuzzer_results: dict[str, bool] = {}
     fuzz_projects = []
     if args.mode in ['fuzzer', 'both']:
         logging.info("\n" + "="*60)

From 3f9df9d3a6ba35c2bbdef5692ba90b1b5b86f9f6 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Mon, 28 Jul 2025 23:30:30 +0000
Subject: [PATCH 061/134] construct errors module

---
 fuzz/build_fuzz.py    | 24 ++----------------------
 fuzz/build_fuzzers.py | 20 +-------------------
 fuzz/errors.py        | 20 ++++++++++++++++++++
 3 files changed, 23 insertions(+), 41 deletions(-)
 create mode 100644 fuzz/errors.py

diff --git a/fuzz/build_fuzz.py b/fuzz/build_fuzz.py
index 3cbb505..5db274e 100644
--- a/fuzz/build_fuzz.py
+++ b/fuzz/build_fuzz.py
@@ -18,7 +18,7 @@
     python3 build_fuzz.py --mode both [project_list] --oss-fuzz-dir /path/to/oss-fuzz --sanitizer address
 
 Example:
-    python3 ./fuzz/build_fuzz.py --mode both data/valid_projects3.txt \
+    python3 fuzz/build_fuzz.py --mode both data/valid_projects3.txt \
         --oss-fuzz-dir ./fuzz/oss-fuzz \
         --sanitizer address \
         --workers 8
@@ -33,28 +33,8 @@
 from pathlib import Path
 from returns.maybe import Maybe
 from multiprocessing import Pool
+from errors import BuildError, CommandError, PathError, ConfigError
 
-# ========================================================================================
-# Custom Exceptions
-# ========================================================================================
-class BuildError(Exception):
-    """Base exception for build failures"""
-    def __init__(self, message: str, project: str = "", exit_code: int | None = None):
-        super().__init__(message)
-        self.project = project
-        self.exit_code = exit_code
-
-class CommandError(BuildError):
-    """Exception for command execution failures"""
-    pass
-
-class PathError(BuildError):
-    """Exception for missing paths or files"""
-    pass
-
-class ConfigError(BuildError):
-    """Exception for configuration errors"""
-    pass
 
 # ========================================================================================
 # Helper Functions
diff --git a/fuzz/build_fuzzers.py b/fuzz/build_fuzzers.py
index 65fa93a..a252dbd 100644
--- a/fuzz/build_fuzzers.py
+++ b/fuzz/build_fuzzers.py
@@ -26,25 +26,7 @@
 from pathlib import Path
 from typing import Optional
 from multiprocessing import Pool, cpu_count
-
-class BuildError(Exception):
-    """Base exception for build failures"""
-    def __init__(self, message: str, project: str = "", exit_code: Optional[int] = None):
-        super().__init__(message)
-        self.project = project
-        self.exit_code = exit_code
-
-class CommandError(BuildError):
-    """Exception for command execution failures"""
-    pass
-
-class PathError(BuildError):
-    """Exception for missing paths or files"""
-    pass
-
-class ConfigError(BuildError):
-    """Exception for configuration errors"""
-    pass
+from errors import BuildError, CommandError, PathError, ConfigError
 
 def run_command(
     cmd: str,
diff --git a/fuzz/errors.py b/fuzz/errors.py
new file mode 100644
index 0000000..294e642
--- /dev/null
+++ b/fuzz/errors.py
@@ -0,0 +1,20 @@
+# fuzz/errors.py
+
+class BuildError(Exception):
+    """Base exception for build failures"""
+    def __init__(self, message: str, project: str = "", exit_code: int | None = None):
+        super().__init__(message)
+        self.project = project
+        self.exit_code = exit_code
+
+class CommandError(BuildError):
+    """Exception for command execution failures"""
+    pass
+
+class PathError(BuildError):
+    """Exception for missing paths or files"""
+    pass
+
+class ConfigError(BuildError):
+    """Exception for configuration errors"""
+    pass

From 85b7904e9dd0700b7e00c830bde1562c356783c5 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Mon, 28 Jul 2025 23:45:10 +0000
Subject: [PATCH 062/134] run_command module

---
 fuzz/command_util.py | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 fuzz/command_util.py

diff --git a/fuzz/command_util.py b/fuzz/command_util.py
new file mode 100644
index 0000000..2f0c65a
--- /dev/null
+++ b/fuzz/command_util.py
@@ -0,0 +1,42 @@
+# command_util.py
+
+import subprocess
+import logging
+from returns.maybe import Maybe
+from errors import CommandError
+from pathlib import Path
+
+def run_command(
+    cmd: str,
+    oss_fuzz_dir: Path,
+    project: str = "",
+    allowed_exit_codes: Maybe[list[int]] = Maybe.empty,
+    skip_yes: bool = False
+) -> int:
+    """Execute a command and return the exit code (no stdout/stderr capture)"""
+    allowed_codes = allowed_exit_codes.value_or([0])
+    cmd_str = f"yes | {cmd}" if not skip_yes else cmd
+    logging.debug(f"Executing command [{project}]: {cmd_str}")
+
+    try:
+        process = subprocess.Popen(
+            cmd_str,
+            shell=True,
+            cwd=str(oss_fuzz_dir)
+        )
+        exit_code = process.wait()
+
+        if exit_code in allowed_codes:
+            return exit_code
+
+        error_msg = f"Command failed (exit code: {exit_code})"
+        if project:
+            error_msg += f" for project: {project}"
+        raise CommandError(error_msg, project=project, exit_code=exit_code)
+
+    except FileNotFoundError as e:
+        raise CommandError(f"Command not found: {cmd.split()[0]}", project=project) from e
+    except OSError as e:
+        raise CommandError(f"System error: {e}", project=project) from e
+    except subprocess.SubprocessError as e:
+        raise CommandError(f"Subprocess error: {e}", project=project) from e

From 40102b44e237374f5e374c76de0d68ae61ccbf30 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Mon, 28 Jul 2025 23:52:08 +0000
Subject: [PATCH 063/134] combine the run_command instrument to one file

---
 fuzz/build_fuzz.py                  |   2 +
 fuzz/command_util.py                | 114 +++++++++++++++++++++++-----
 fuzz/run_fuzz_all_targets_print1.py |   2 +-
 image_build_results.json            |   4 -
 4 files changed, 96 insertions(+), 26 deletions(-)

diff --git a/fuzz/build_fuzz.py b/fuzz/build_fuzz.py
index 5db274e..c1f7aeb 100644
--- a/fuzz/build_fuzz.py
+++ b/fuzz/build_fuzz.py
@@ -34,6 +34,8 @@
 from returns.maybe import Maybe
 from multiprocessing import Pool
 from errors import BuildError, CommandError, PathError, ConfigError
+from command_util import run_command_build_fuzz as run_command
+
 
 
 # ========================================================================================
diff --git a/fuzz/command_util.py b/fuzz/command_util.py
index 2f0c65a..7455af2 100644
--- a/fuzz/command_util.py
+++ b/fuzz/command_util.py
@@ -6,37 +6,109 @@
 from errors import CommandError
 from pathlib import Path
 
-def run_command(
+import subprocess
+import time
+import logging
+from pathlib import Path
+from returns.maybe import Maybe
+from errors import CommandError
+
+def _run_subprocess(
+    cmd: str,
+    cwd: Path = None,
+    capture_output: bool = False,
+    timeout: int = None,
+    logger: logging.Logger = None,
+) -> tuple[int, list[str]]:
+    """
+    低层执行子进程命令
+    - capture_output=True：捕获 stdout，返回输出列表
+    - timeout 秒超时（无超时则None）
+    - logger 用于实时打印输出
+    返回：(退出码, 输出行列表)
+    """
+    process = subprocess.Popen(
+        cmd,
+        shell=True,
+        cwd=str(cwd) if cwd else None,
+        stdout=subprocess.PIPE if capture_output else None,
+        stderr=subprocess.STDOUT if capture_output else None,
+        text=True,
+        encoding="utf-8",
+        errors="replace",
+    )
+
+    output_lines = []
+    start_time = time.time()
+
+    try:
+        if capture_output:
+            while True:
+                line = process.stdout.readline()
+                if line:
+                    output_lines.append(line.rstrip())
+                    if logger:
+                        logger.debug(line.rstrip())
+                elif process.poll() is not None:
+                    break
+
+                if timeout and (time.time() - start_time) > timeout:
+                    if logger:
+                        logger.error(f"⌛ Command timed out after {timeout} seconds")
+                    process.terminate()
+                    try:
+                        process.wait(timeout=5)
+                    except subprocess.TimeoutExpired:
+                        process.kill()
+                    return -1, output_lines
+                time.sleep(0.05)
+        else:
+            # 不捕获输出，直接等待结束
+            process.wait(timeout=timeout)
+
+    except Exception as e:
+        if logger:
+            logger.exception(f"Error during command execution: {e}")
+        process.kill()
+        raise e
+
+    return process.returncode, output_lines
+
+
+def run_command_build_fuzz(
     cmd: str,
     oss_fuzz_dir: Path,
     project: str = "",
     allowed_exit_codes: Maybe[list[int]] = Maybe.empty,
     skip_yes: bool = False
 ) -> int:
-    """Execute a command and return the exit code (no stdout/stderr capture)"""
+    """build_fuzz.py 中使用的 run_command，简化版，抛异常"""
     allowed_codes = allowed_exit_codes.value_or([0])
     cmd_str = f"yes | {cmd}" if not skip_yes else cmd
-    logging.debug(f"Executing command [{project}]: {cmd_str}")
-
-    try:
-        process = subprocess.Popen(
-            cmd_str,
-            shell=True,
-            cwd=str(oss_fuzz_dir)
-        )
-        exit_code = process.wait()
-
-        if exit_code in allowed_codes:
-            return exit_code
-
+    exit_code, _ = _run_subprocess(cmd_str, cwd=oss_fuzz_dir)
+    if exit_code not in allowed_codes:
         error_msg = f"Command failed (exit code: {exit_code})"
         if project:
             error_msg += f" for project: {project}"
         raise CommandError(error_msg, project=project, exit_code=exit_code)
+    return exit_code
+
+
+def run_command_fuzz_all_targets(
+    cmd: str,
+    log_msg: str,
+    logger: logging.Logger,
+    allowed_exit_codes: Maybe[list[int]] = Maybe.empty,
+    timeout: int = 3600,
+) -> bool:
+    """run_fuzz_all_targets_print1.py 中使用，带实时日志与超时，返回bool"""
+    logger.info(f"▶️ {log_msg}...")
+    logger.debug(f"   $ {cmd}")
+
+    allowed_codes = allowed_exit_codes.value_or([])
+    exit_code, _ = _run_subprocess(cmd, capture_output=True, timeout=timeout, logger=logger)
+    if exit_code not in [0, *allowed_codes]:
+        logger.error(f"❌ Command execution failed, exit code: {exit_code}")
+        return False
+    return True
 
-    except FileNotFoundError as e:
-        raise CommandError(f"Command not found: {cmd.split()[0]}", project=project) from e
-    except OSError as e:
-        raise CommandError(f"System error: {e}", project=project) from e
-    except subprocess.SubprocessError as e:
-        raise CommandError(f"Subprocess error: {e}", project=project) from e
diff --git a/fuzz/run_fuzz_all_targets_print1.py b/fuzz/run_fuzz_all_targets_print1.py
index 8b5c31d..c5e15eb 100644
--- a/fuzz/run_fuzz_all_targets_print1.py
+++ b/fuzz/run_fuzz_all_targets_print1.py
@@ -25,7 +25,7 @@
 from pathlib import Path
 from multiprocessing import Pool, cpu_count
 from returns.maybe import Maybe, Nothing, Some
-
+from command_util import run_command_fuzz_all_targets as run_command
 
 
 
diff --git a/image_build_results.json b/image_build_results.json
index 2e47cc5..72b9fe6 100644
--- a/image_build_results.json
+++ b/image_build_results.json
@@ -1,8 +1,4 @@
 {
-    "asteval": true,
-    "astroid": true,
-    "asttokens": true,
-    "attrs": true,
     "autoflake": true,
     "autopep8": true,
     "azure-sdk-for-python": true,

From e0c7740f66e636e8ac2af786fc7a93fbff26538d Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 29 Jul 2025 00:05:28 +0000
Subject: [PATCH 064/134] remove the  run_command

---
 fuzz/build_fuzz.py                  | 40 -----------------
 fuzz/run_fuzz_all_targets_print1.py | 70 -----------------------------
 2 files changed, 110 deletions(-)

diff --git a/fuzz/build_fuzz.py b/fuzz/build_fuzz.py
index c1f7aeb..00f8af6 100644
--- a/fuzz/build_fuzz.py
+++ b/fuzz/build_fuzz.py
@@ -36,46 +36,6 @@
 from errors import BuildError, CommandError, PathError, ConfigError
 from command_util import run_command_build_fuzz as run_command
 
-
-
-# ========================================================================================
-# Helper Functions
-# ========================================================================================
-def run_command(
-    cmd: str,
-    oss_fuzz_dir: Path,
-    project: str = "",
-    allowed_exit_codes: Maybe[list[int]] = Maybe.empty,
-    skip_yes: bool = False
-) -> int:
-    """Execute a command and return the exit code (no stdout/stderr capture)"""
-    allowed_codes = allowed_exit_codes.value_or([0])
-    cmd_str = f"yes | {cmd}" if not skip_yes else cmd
-    logging.debug(f"Executing command [{project}]: {cmd_str}")
-
-    try:
-        process = subprocess.Popen(
-            cmd_str,
-            shell=True,
-            cwd=str(oss_fuzz_dir)
-        )
-        exit_code = process.wait()
-
-        if exit_code in allowed_codes:
-            return exit_code
-
-        error_msg = f"Command failed (exit code: {exit_code})"
-        if project:
-            error_msg += f" for project: {project}"
-        raise CommandError(error_msg, project=project, exit_code=exit_code)
-
-    except FileNotFoundError as e:
-        raise CommandError(f"Command not found: {cmd.split()[0]}", project=project) from e
-    except OSError as e:
-        raise CommandError(f"System error: {e}", project=project) from e
-    except subprocess.SubprocessError as e:
-        raise CommandError(f"Subprocess error: {e}", project=project) from e
-
 # ========================================================================================
 # Build Functions
 # ========================================================================================
diff --git a/fuzz/run_fuzz_all_targets_print1.py b/fuzz/run_fuzz_all_targets_print1.py
index c5e15eb..04400c6 100644
--- a/fuzz/run_fuzz_all_targets_print1.py
+++ b/fuzz/run_fuzz_all_targets_print1.py
@@ -29,76 +29,6 @@
 
 
 
-
-def run_command(
-    cmd: str,
-    log_msg: str,
-    logger: logging.Logger,
-    allowed_exit_codes: Maybe[list[int]] = Nothing,
-    timeout: int = 3600  # Default 1-hour timeout
-) -> bool:
-    """Execute commands with real-time logging and precise error handling"""
-    allowed_codes = allowed_exit_codes.value_or([])
-    logger.info(f"▶️ {log_msg}...")
-    logger.debug(f"   $ {cmd}")
-
-    process = None
-    try:
-        process = subprocess.Popen(
-            cmd,
-            shell=True,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.STDOUT,
-            text=True,
-            encoding="utf-8",
-            errors="replace"
-        )
-
-        start_time = time.time()
-        while process.poll() is None:
-            if time.time() - start_time > timeout:
-                logger.error(f"⌛ Command timed out after {timeout} seconds")
-                process.terminate()
-                try:
-                    process.wait(timeout=5)
-                except subprocess.TimeoutExpired:
-                    process.kill()
-                return False
-
-            if process.stdout:
-                line = process.stdout.readline()
-                if line:
-                    logger.debug(line.strip())
-            else:
-                time.sleep(0.1)
-
-        exit_code = process.returncode
-        if exit_code not in [0, *allowed_codes]:
-            logger.error(f"❌ Command execution failed, exit code: {exit_code}")
-            return False
-        return True
-
-    except FileNotFoundError:
-        logger.error(f"🔍 Command not found: {cmd.split()[0]}")
-        return False
-    except PermissionError:
-        logger.error(f"🔒 Insufficient permissions to execute command: {cmd}")
-        return False
-    except subprocess.SubprocessError as e:
-        logger.exception(f"💥 Subprocess error: {e}")
-        return False
-    except OSError as e:
-        logger.exception(f"💥 Operating system error during command execution: {e}")
-        return False
-    finally:
-        if process and process.poll() is None:
-            try:
-                process.terminate()
-                process.wait(timeout=5)
-            except Exception:
-                pass
-
-
 def discover_targets(project_name: str, oss_fuzz_dir: Path, logger: logging.Logger) -> list[str]:
     """Discover fuzz targets for a project (starting with 'fuzz_', ending with 'print1', no extension, and executable)"""
     out_dir = oss_fuzz_dir / "build" / "out" / project_name

From 0631504097b8184f1e534a97b83e4bbf3e89d4fb Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 29 Jul 2025 00:16:33 +0000
Subject: [PATCH 065/134] modify

---
 fuzz/build_fuzzers.py        |  77 +++++++++----------
 fuzz/run_fuzz_all_targets.py | 141 +++++++++++++++++------------------
 2 files changed, 108 insertions(+), 110 deletions(-)

diff --git a/fuzz/build_fuzzers.py b/fuzz/build_fuzzers.py
index a252dbd..ab8ac7d 100644
--- a/fuzz/build_fuzzers.py
+++ b/fuzz/build_fuzzers.py
@@ -27,52 +27,53 @@
 from typing import Optional
 from multiprocessing import Pool, cpu_count
 from errors import BuildError, CommandError, PathError, ConfigError
-
-def run_command(
-    cmd: str,
-    oss_fuzz_dir: Path,
-    project: str = "",
-    allowed_exit_codes: Optional[list[int]] = None
-) -> int:
-    """Execute a command and return the exit code"""
-    allowed_exit_codes = allowed_exit_codes or [0]
-    logging.info(f"▶️ Executing command: {cmd}")
+from command_util import run_command_build_fuzz as run_command
+
+# def run_command(
+#     cmd: str,
+#     oss_fuzz_dir: Path,
+#     project: str = "",
+#     allowed_exit_codes: Optional[list[int]] = None
+# ) -> int:
+#     """Execute a command and return the exit code"""
+#     allowed_exit_codes = allowed_exit_codes or [0]
+#     logging.info(f"▶️ Executing command: {cmd}")
     
-    try:
-        process = subprocess.Popen(
-            cmd,
-            shell=True,
-            cwd=str(oss_fuzz_dir),
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True
-        )
+#     try:
+#         process = subprocess.Popen(
+#             cmd,
+#             shell=True,
+#             cwd=str(oss_fuzz_dir),
+#             stdout=subprocess.PIPE,
+#             stderr=subprocess.PIPE,
+#             text=True
+#         )
         
-        stdout, stderr = process.communicate()
-        exit_code = process.returncode
+#         stdout, stderr = process.communicate()
+#         exit_code = process.returncode
         
-        if exit_code in allowed_exit_codes:
-            return exit_code
+#         if exit_code in allowed_exit_codes:
+#             return exit_code
             
-        # Build detailed error message
-        error_msg = f"Command failed (exit code: {exit_code})"
-        if project:
-            error_msg += f" for project: {project}"
+#         # Build detailed error message
+#         error_msg = f"Command failed (exit code: {exit_code})"
+#         if project:
+#             error_msg += f" for project: {project}"
             
-        if stderr.strip():
-            error_msg += f"\nError output:\n{stderr.strip()}"
+#         if stderr.strip():
+#             error_msg += f"\nError output:\n{stderr.strip()}"
             
-        if stdout.strip():
-            error_msg += f"\nOutput:\n{stdout.strip()}"
+#         if stdout.strip():
+#             error_msg += f"\nOutput:\n{stdout.strip()}"
             
-        raise CommandError(error_msg, project=project, exit_code=exit_code)
+#         raise CommandError(error_msg, project=project, exit_code=exit_code)
     
-    except FileNotFoundError as e:
-        raise CommandError(f"Command not found: {cmd.split()[0]}", project=project) from e
-    except OSError as e:
-        raise CommandError(f"System error: {e}", project=project) from e
-    except subprocess.SubprocessError as e:
-        raise CommandError(f"Subprocess error: {e}", project=project) from e
+#     except FileNotFoundError as e:
+#         raise CommandError(f"Command not found: {cmd.split()[0]}", project=project) from e
+#     except OSError as e:
+#         raise CommandError(f"System error: {e}", project=project) from e
+#     except subprocess.SubprocessError as e:
+#         raise CommandError(f"Subprocess error: {e}", project=project) from e
 
 def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> tuple[bool, str]:
     """Fuzzer build workflow"""
diff --git a/fuzz/run_fuzz_all_targets.py b/fuzz/run_fuzz_all_targets.py
index fe147d8..372e13a 100644
--- a/fuzz/run_fuzz_all_targets.py
+++ b/fuzz/run_fuzz_all_targets.py
@@ -25,78 +25,75 @@
 from pathlib import Path
 from multiprocessing import Pool, cpu_count
 from returns.maybe import Maybe, Nothing, Some
-
-
-
-
-
-def run_command(
-    cmd: str,
-    log_msg: str,
-    logger: logging.Logger,
-    allowed_exit_codes: Maybe[list[int]] = Nothing,
-    timeout: int = 3600  # Default 1-hour timeout
-) -> bool:
-    """Execute commands with real-time logging and precise error handling"""
-    allowed_codes = allowed_exit_codes.value_or([])
-    logger.info(f"▶️ {log_msg}...")
-    logger.debug(f"   $ {cmd}")
-
-    process = None
-    try:
-        process = subprocess.Popen(
-            cmd,
-            shell=True,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.STDOUT,
-            text=True,
-            encoding="utf-8",
-            errors="replace"
-        )
-
-        start_time = time.time()
-        while process.poll() is None:
-            if time.time() - start_time > timeout:
-                logger.error(f"⌛ Command timed out after {timeout} seconds")
-                process.terminate()
-                try:
-                    process.wait(timeout=5)
-                except subprocess.TimeoutExpired:
-                    process.kill()
-                return False
-
-            if process.stdout:
-                line = process.stdout.readline()
-                if line:
-                    logger.debug(line.strip())
-            else:
-                time.sleep(0.1)
-
-        exit_code = process.returncode
-        if exit_code not in [0, *allowed_codes]:
-            logger.error(f"❌ Command execution failed, exit code: {exit_code}")
-            return False
-        return True
-
-    except FileNotFoundError:
-        logger.error(f"🔍 Command not found: {cmd.split()[0]}")
-        return False
-    except PermissionError:
-        logger.error(f"🔒 Insufficient permissions to execute command: {cmd}")
-        return False
-    except subprocess.SubprocessError as e:
-        logger.exception(f"💥 Subprocess error: {e}")
-        return False
-    except OSError as e:
-        logger.exception(f"💥 Operating system error during command execution: {e}")
-        return False
-    finally:
-        if process and process.poll() is None:
-            try:
-                process.terminate()
-                process.wait(timeout=5)
-            except Exception:
-                pass
+from command_util import run_command_fuzz_all_targets as run_command
+
+# def run_command(
+#     cmd: str,
+#     log_msg: str,
+#     logger: logging.Logger,
+#     allowed_exit_codes: Maybe[list[int]] = Nothing,
+#     timeout: int = 3600  # Default 1-hour timeout
+# ) -> bool:
+#     """Execute commands with real-time logging and precise error handling"""
+#     allowed_codes = allowed_exit_codes.value_or([])
+#     logger.info(f"▶️ {log_msg}...")
+#     logger.debug(f"   $ {cmd}")
+
+#     process = None
+#     try:
+#         process = subprocess.Popen(
+#             cmd,
+#             shell=True,
+#             stdout=subprocess.PIPE,
+#             stderr=subprocess.STDOUT,
+#             text=True,
+#             encoding="utf-8",
+#             errors="replace"
+#         )
+
+#         start_time = time.time()
+#         while process.poll() is None:
+#             if time.time() - start_time > timeout:
+#                 logger.error(f"⌛ Command timed out after {timeout} seconds")
+#                 process.terminate()
+#                 try:
+#                     process.wait(timeout=5)
+#                 except subprocess.TimeoutExpired:
+#                     process.kill()
+#                 return False
+
+#             if process.stdout:
+#                 line = process.stdout.readline()
+#                 if line:
+#                     logger.debug(line.strip())
+#             else:
+#                 time.sleep(0.1)
+
+#         exit_code = process.returncode
+#         if exit_code not in [0, *allowed_codes]:
+#             logger.error(f"❌ Command execution failed, exit code: {exit_code}")
+#             return False
+#         return True
+
+#     except FileNotFoundError:
+#         logger.error(f"🔍 Command not found: {cmd.split()[0]}")
+#         return False
+#     except PermissionError:
+#         logger.error(f"🔒 Insufficient permissions to execute command: {cmd}")
+#         return False
+#     except subprocess.SubprocessError as e:
+#         logger.exception(f"💥 Subprocess error: {e}")
+#         return False
+#     except OSError as e:
+#         logger.exception(f"💥 Operating system error during command execution: {e}")
+#         return False
+#     finally:
+#         if process and process.poll() is None:
+#             try:
+#                 process.terminate()
+#                 process.wait(timeout=5)
+#             except Exception:
+#                 pass
 
 
 def discover_targets(project_name: str, oss_fuzz_dir: Path, logger: logging.Logger) -> list[str]:

From 5e28b5c989949f087dfc47cd8b638cb353ed2bef Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 29 Jul 2025 00:28:46 +0000
Subject: [PATCH 066/134] mytype check

---
 fuzz/command_util.py | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/fuzz/command_util.py b/fuzz/command_util.py
index 7455af2..bb8e0dd 100644
--- a/fuzz/command_util.py
+++ b/fuzz/command_util.py
@@ -1,24 +1,20 @@
-# command_util.py
-
-import subprocess
-import logging
-from returns.maybe import Maybe
-from errors import CommandError
-from pathlib import Path
+# fuzz/command_util.py
 
 import subprocess
 import time
 import logging
 from pathlib import Path
+from typing import Optional
 from returns.maybe import Maybe
 from errors import CommandError
 
+
 def _run_subprocess(
     cmd: str,
-    cwd: Path = None,
+    cwd: Optional[Path] = None,
     capture_output: bool = False,
-    timeout: int = None,
-    logger: logging.Logger = None,
+    timeout: Optional[int] = None,
+    logger: Optional[logging.Logger] = None,
 ) -> tuple[int, list[str]]:
     """
     低层执行子进程命令
@@ -43,6 +39,7 @@ def _run_subprocess(
 
     try:
         if capture_output:
+            assert process.stdout is not None  # ✅ MyPy static check
             while True:
                 line = process.stdout.readline()
                 if line:
@@ -79,7 +76,7 @@ def run_command_build_fuzz(
     cmd: str,
     oss_fuzz_dir: Path,
     project: str = "",
-    allowed_exit_codes: Maybe[list[int]] = Maybe.empty,
+    allowed_exit_codes: Maybe[list[int]] = Maybe.empty(),
     skip_yes: bool = False
 ) -> int:
     """build_fuzz.py 中使用的 run_command，简化版，抛异常"""
@@ -98,7 +95,7 @@ def run_command_fuzz_all_targets(
     cmd: str,
     log_msg: str,
     logger: logging.Logger,
-    allowed_exit_codes: Maybe[list[int]] = Maybe.empty,
+    allowed_exit_codes: Maybe[list[int]] = Maybe.empty(),
     timeout: int = 3600,
 ) -> bool:
     """run_fuzz_all_targets_print1.py 中使用，带实时日志与超时，返回bool"""
@@ -111,4 +108,3 @@ def run_command_fuzz_all_targets(
         logger.error(f"❌ Command execution failed, exit code: {exit_code}")
         return False
     return True
-

From 368c0e4699a48e3d19d5859c7f4636f5e3edb383 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 29 Jul 2025 00:45:10 +0000
Subject: [PATCH 067/134] mytype

---
 fuzz/command_util.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/fuzz/command_util.py b/fuzz/command_util.py
index bb8e0dd..0d04364 100644
--- a/fuzz/command_util.py
+++ b/fuzz/command_util.py
@@ -4,11 +4,10 @@
 import time
 import logging
 from pathlib import Path
-from typing import Optional
+from typing import Optional, cast
 from returns.maybe import Maybe
 from errors import CommandError
 
-
 def _run_subprocess(
     cmd: str,
     cwd: Optional[Path] = None,
@@ -39,9 +38,13 @@ def _run_subprocess(
 
     try:
         if capture_output:
-            assert process.stdout is not None  # ✅ MyPy static check
+            # 类型断言确保 stdout 不为 None
+            stdout = cast(Optional[subprocess.PIPE], process.stdout)
+            if stdout is None:
+                raise RuntimeError("Stdout should not be None when capture_output is True")
+                
             while True:
-                line = process.stdout.readline()
+                line = stdout.readline()
                 if line:
                     output_lines.append(line.rstrip())
                     if logger:
@@ -49,7 +52,7 @@ def _run_subprocess(
                 elif process.poll() is not None:
                     break
 
-                if timeout and (time.time() - start_time) > timeout:
+                if timeout is not None and (time.time() - start_time) > timeout:
                     if logger:
                         logger.error(f"⌛ Command timed out after {timeout} seconds")
                     process.terminate()
@@ -107,4 +110,4 @@ def run_command_fuzz_all_targets(
     if exit_code not in [0, *allowed_codes]:
         logger.error(f"❌ Command execution failed, exit code: {exit_code}")
         return False
-    return True
+    return True
\ No newline at end of file

From 1be1dfa648f56fd4547d7390e718c7b7efc90bcc Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 29 Jul 2025 00:49:18 +0000
Subject: [PATCH 068/134] mytype

---
 fuzz/command_util.py | 59 +++++++++++++++++++++++++-------------------
 1 file changed, 33 insertions(+), 26 deletions(-)

diff --git a/fuzz/command_util.py b/fuzz/command_util.py
index 0d04364..76032ec 100644
--- a/fuzz/command_util.py
+++ b/fuzz/command_util.py
@@ -1,24 +1,24 @@
-# fuzz/command_util.py
+# command_util.py
 
 import subprocess
-import time
 import logging
+import time
 from pathlib import Path
-from typing import Optional, cast
+from typing import Optional, Tuple, List
 from returns.maybe import Maybe
 from errors import CommandError
 
 def _run_subprocess(
     cmd: str,
-    cwd: Optional[Path] = None,
+    cwd: Optional[Path] = None,  # 修复：添加 Optional 类型
     capture_output: bool = False,
-    timeout: Optional[int] = None,
-    logger: Optional[logging.Logger] = None,
-) -> tuple[int, list[str]]:
+    timeout: Optional[int] = None,  # 修复：添加 Optional 类型
+    logger: Optional[logging.Logger] = None,  # 修复：添加 Optional 类型
+) -> Tuple[int, List[str]]:  # 建议使用 Tuple 替代 tuple
     """
     低层执行子进程命令
     - capture_output=True：捕获 stdout，返回输出列表
-    - timeout 秒超时（无超时则None）
+    - timeout 秒超时（无超时则 None）
     - logger 用于实时打印输出
     返回：(退出码, 输出行列表)
     """
@@ -33,28 +33,29 @@ def _run_subprocess(
         errors="replace",
     )
 
-    output_lines = []
+    output_lines: List[str] = []
     start_time = time.time()
 
     try:
         if capture_output:
-            # 类型断言确保 stdout 不为 None
-            stdout = cast(Optional[subprocess.PIPE], process.stdout)
-            if stdout is None:
-                raise RuntimeError("Stdout should not be None when capture_output is True")
+            # 确保 stdout 不是 None
+            if process.stdout is None:
+                raise RuntimeError("stdout is unexpectedly None in capture mode")
                 
             while True:
-                line = stdout.readline()
+                line = process.stdout.readline()
                 if line:
-                    output_lines.append(line.rstrip())
+                    line_stripped = line.rstrip()
+                    output_lines.append(line_stripped)
                     if logger:
-                        logger.debug(line.rstrip())
+                        logger.debug(line_stripped)
                 elif process.poll() is not None:
                     break
 
-                if timeout is not None and (time.time() - start_time) > timeout:
+                # 处理超时逻辑
+                if timeout and (time.time() - start_time) > timeout:
                     if logger:
-                        logger.error(f"⌛ Command timed out after {timeout} seconds")
+                        logger.error(f"⌛ 命令超时，耗时 {timeout} 秒")
                     process.terminate()
                     try:
                         process.wait(timeout=5)
@@ -68,9 +69,9 @@ def _run_subprocess(
 
     except Exception as e:
         if logger:
-            logger.exception(f"Error during command execution: {e}")
+            logger.exception(f"执行命令时出错：{e}")
         process.kill()
-        raise e
+        raise CommandError(f"Command failed: {str(e)}") from e
 
     return process.returncode, output_lines
 
@@ -79,7 +80,7 @@ def run_command_build_fuzz(
     cmd: str,
     oss_fuzz_dir: Path,
     project: str = "",
-    allowed_exit_codes: Maybe[list[int]] = Maybe.empty(),
+    allowed_exit_codes: Maybe[List[int]] = Maybe.empty,
     skip_yes: bool = False
 ) -> int:
     """build_fuzz.py 中使用的 run_command，简化版，抛异常"""
@@ -87,7 +88,7 @@ def run_command_build_fuzz(
     cmd_str = f"yes | {cmd}" if not skip_yes else cmd
     exit_code, _ = _run_subprocess(cmd_str, cwd=oss_fuzz_dir)
     if exit_code not in allowed_codes:
-        error_msg = f"Command failed (exit code: {exit_code})"
+        error_msg = f"命令失败（退出码：{exit_code}）"
         if project:
             error_msg += f" for project: {project}"
         raise CommandError(error_msg, project=project, exit_code=exit_code)
@@ -98,16 +99,22 @@ def run_command_fuzz_all_targets(
     cmd: str,
     log_msg: str,
     logger: logging.Logger,
-    allowed_exit_codes: Maybe[list[int]] = Maybe.empty(),
+    allowed_exit_codes: Maybe[List[int]] = Maybe.empty,
     timeout: int = 3600,
 ) -> bool:
-    """run_fuzz_all_targets_print1.py 中使用，带实时日志与超时，返回bool"""
+    """run_fuzz_all_targets_print1.py 中使用，带实时日志与超时，返回 bool"""
     logger.info(f"▶️ {log_msg}...")
     logger.debug(f"   $ {cmd}")
 
     allowed_codes = allowed_exit_codes.value_or([])
-    exit_code, _ = _run_subprocess(cmd, capture_output=True, timeout=timeout, logger=logger)
+    exit_code, _ = _run_subprocess(
+        cmd, 
+        capture_output=True, 
+        timeout=timeout, 
+        logger=logger
+    )
+    
     if exit_code not in [0, *allowed_codes]:
-        logger.error(f"❌ Command execution failed, exit code: {exit_code}")
+        logger.error(f"❌ 命令执行失败，退出码：{exit_code}")
         return False
     return True
\ No newline at end of file

From 09ba145886f9cff5ef295553f93ff0fd508c18da Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 29 Jul 2025 00:58:19 +0000
Subject: [PATCH 069/134] mytype

---
 fuzz/command_util.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/fuzz/command_util.py b/fuzz/command_util.py
index 76032ec..63e2c76 100644
--- a/fuzz/command_util.py
+++ b/fuzz/command_util.py
@@ -4,17 +4,17 @@
 import logging
 import time
 from pathlib import Path
-from typing import Optional, Tuple, List
+from typing import Optional
 from returns.maybe import Maybe
 from errors import CommandError
 
 def _run_subprocess(
     cmd: str,
-    cwd: Optional[Path] = None,  # 修复：添加 Optional 类型
+    cwd: Optional[Path] = None,  
     capture_output: bool = False,
-    timeout: Optional[int] = None,  # 修复：添加 Optional 类型
-    logger: Optional[logging.Logger] = None,  # 修复：添加 Optional 类型
-) -> Tuple[int, List[str]]:  # 建议使用 Tuple 替代 tuple
+    timeout: Optional[int] = None,  
+    logger: Optional[logging.Logger] = None,  
+) -> tuple[int, list[str]]:  
     """
     低层执行子进程命令
     - capture_output=True：捕获 stdout，返回输出列表
@@ -33,12 +33,12 @@ def _run_subprocess(
         errors="replace",
     )
 
-    output_lines: List[str] = []
+    output_lines: list[str] = []
     start_time = time.time()
 
     try:
         if capture_output:
-            # 确保 stdout 不是 None
+           
             if process.stdout is None:
                 raise RuntimeError("stdout is unexpectedly None in capture mode")
                 
@@ -52,7 +52,7 @@ def _run_subprocess(
                 elif process.poll() is not None:
                     break
 
-                # 处理超时逻辑
+                # Handle timeout logic
                 if timeout and (time.time() - start_time) > timeout:
                     if logger:
                         logger.error(f"⌛ 命令超时，耗时 {timeout} 秒")
@@ -64,7 +64,7 @@ def _run_subprocess(
                     return -1, output_lines
                 time.sleep(0.05)
         else:
-            # 不捕获输出，直接等待结束
+        
             process.wait(timeout=timeout)
 
     except Exception as e:
@@ -80,7 +80,7 @@ def run_command_build_fuzz(
     cmd: str,
     oss_fuzz_dir: Path,
     project: str = "",
-    allowed_exit_codes: Maybe[List[int]] = Maybe.empty,
+    allowed_exit_codes: Maybe[list[int]] = Maybe.empty,
     skip_yes: bool = False
 ) -> int:
     """build_fuzz.py 中使用的 run_command，简化版，抛异常"""
@@ -99,7 +99,7 @@ def run_command_fuzz_all_targets(
     cmd: str,
     log_msg: str,
     logger: logging.Logger,
-    allowed_exit_codes: Maybe[List[int]] = Maybe.empty,
+    allowed_exit_codes: Maybe[list[int]] = Maybe.empty,
     timeout: int = 3600,
 ) -> bool:
     """run_fuzz_all_targets_print1.py 中使用，带实时日志与超时，返回 bool"""

From 7a1f248f2079437809d5ecb89d48037c10bb9d28 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 29 Jul 2025 01:05:34 +0000
Subject: [PATCH 070/134] translate

---
 fuzz/command_util.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/fuzz/command_util.py b/fuzz/command_util.py
index 63e2c76..198de4b 100644
--- a/fuzz/command_util.py
+++ b/fuzz/command_util.py
@@ -16,11 +16,11 @@ def _run_subprocess(
     logger: Optional[logging.Logger] = None,  
 ) -> tuple[int, list[str]]:  
     """
-    低层执行子进程命令
-    - capture_output=True：捕获 stdout，返回输出列表
-    - timeout 秒超时（无超时则 None）
-    - logger 用于实时打印输出
-    返回：(退出码, 输出行列表)
+    Execute child process commands at a lower level
+    - capture_output=True: Captures stdout, returns a list of outputs
+    - timeout (None without timeout)
+    Logger for real-time printouts
+    Return: (Exit Code, Output Line List)
     """
     process = subprocess.Popen(
         cmd,
@@ -83,12 +83,12 @@ def run_command_build_fuzz(
     allowed_exit_codes: Maybe[list[int]] = Maybe.empty,
     skip_yes: bool = False
 ) -> int:
-    """build_fuzz.py 中使用的 run_command，简化版，抛异常"""
+    """run_command used in build_fuzz.py, build_fuzzers.py """
     allowed_codes = allowed_exit_codes.value_or([0])
     cmd_str = f"yes | {cmd}" if not skip_yes else cmd
     exit_code, _ = _run_subprocess(cmd_str, cwd=oss_fuzz_dir)
     if exit_code not in allowed_codes:
-        error_msg = f"命令失败（退出码：{exit_code}）"
+        error_msg = f"The command failed（exit code：{exit_code}）"
         if project:
             error_msg += f" for project: {project}"
         raise CommandError(error_msg, project=project, exit_code=exit_code)
@@ -102,7 +102,7 @@ def run_command_fuzz_all_targets(
     allowed_exit_codes: Maybe[list[int]] = Maybe.empty,
     timeout: int = 3600,
 ) -> bool:
-    """run_fuzz_all_targets_print1.py 中使用，带实时日志与超时，返回 bool"""
+    """run_command used in run_fuzz_all_targets_print1.py, run_fuzz_all_targets.py"""
     logger.info(f"▶️ {log_msg}...")
     logger.debug(f"   $ {cmd}")
 
@@ -115,6 +115,6 @@ def run_command_fuzz_all_targets(
     )
     
     if exit_code not in [0, *allowed_codes]:
-        logger.error(f"❌ 命令执行失败，退出码：{exit_code}")
+        logger.error(f"❌ The command failed， exit code：{exit_code}")
         return False
     return True
\ No newline at end of file

From 6a062bb590c376663aa10f7e61a549337658bca3 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 29 Jul 2025 01:07:02 +0000
Subject: [PATCH 071/134] remove run command

---
 fuzz/run_fuzz_all_targets.py | 69 ------------------------------------
 1 file changed, 69 deletions(-)

diff --git a/fuzz/run_fuzz_all_targets.py b/fuzz/run_fuzz_all_targets.py
index 372e13a..8990e33 100644
--- a/fuzz/run_fuzz_all_targets.py
+++ b/fuzz/run_fuzz_all_targets.py
@@ -27,75 +27,6 @@
 from returns.maybe import Maybe, Nothing, Some
 from command_util import run_command_fuzz_all_targets as run_command
 
-# def run_command(
-#     cmd: str,
-#     log_msg: str,
-#     logger: logging.Logger,
-#     allowed_exit_codes: Maybe[list[int]] = Nothing,
-#     timeout: int = 3600  # Default 1-hour timeout
-# ) -> bool:
-#     """Execute commands with real-time logging and precise error handling"""
-#     allowed_codes = allowed_exit_codes.value_or([])
-#     logger.info(f"▶️ {log_msg}...")
-#     logger.debug(f"   $ {cmd}")
-
-#     process = None
-#     try:
-#         process = subprocess.Popen(
-#             cmd,
-#             shell=True,
-#             stdout=subprocess.PIPE,
-#             stderr=subprocess.STDOUT,
-#             text=True,
-#             encoding="utf-8",
-#             errors="replace"
-#         )
-
-#         start_time = time.time()
-#         while process.poll() is None:
-#             if time.time() - start_time > timeout:
-#                 logger.error(f"⌛ Command timed out after {timeout} seconds")
-#                 process.terminate()
-#                 try:
-#                     process.wait(timeout=5)
-#                 except subprocess.TimeoutExpired:
-#                     process.kill()
-#                 return False
-
-#             if process.stdout:
-#                 line = process.stdout.readline()
-#                 if line:
-#                     logger.debug(line.strip())
-#             else:
-#                 time.sleep(0.1)
-
-#         exit_code = process.returncode
-#         if exit_code not in [0, *allowed_codes]:
-#             logger.error(f"❌ Command execution failed, exit code: {exit_code}")
-#             return False
-#         return True
-
-#     except FileNotFoundError:
-#         logger.error(f"🔍 Command not found: {cmd.split()[0]}")
-#         return False
-#     except PermissionError:
-#         logger.error(f"🔒 Insufficient permissions to execute command: {cmd}")
-#         return False
-#     except subprocess.SubprocessError as e:
-#         logger.exception(f"💥 Subprocess error: {e}")
-#         return False
-#     except OSError as e:
-#         logger.exception(f"💥 Operating system error during command execution: {e}")
-#         return False
-#     finally:
-#         if process and process.poll() is None:
-#             try:
-#                 process.terminate()
-#                 process.wait(timeout=5)
-#             except Exception:
-#                 pass
-
-
 def discover_targets(project_name: str, oss_fuzz_dir: Path, logger: logging.Logger) -> list[str]:
     """Discover fuzz targets for a project (starting with 'fuzz_', no extension, and executable)"""
     out_dir = oss_fuzz_dir / "build" / "out" / project_name

From 26eceebe4ec971a2cc1345e51e4cc6609d17a617 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 29 Jul 2025 18:49:16 +0000
Subject: [PATCH 072/134] timeout - shell instrument

---
 fuzz/command_util.py | 43 +++++++++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/fuzz/command_util.py b/fuzz/command_util.py
index 198de4b..f29c9a3 100644
--- a/fuzz/command_util.py
+++ b/fuzz/command_util.py
@@ -18,10 +18,16 @@ def _run_subprocess(
     """
     Execute child process commands at a lower level
     - capture_output=True: Captures stdout, returns a list of outputs
-    - timeout (None without timeout)
-    Logger for real-time printouts
+    - timeout: Uses shell's `timeout` command for timeout handling
+    - logger: For real-time printouts
     Return: (Exit Code, Output Line List)
     """
+    # 如果有超时要求，使用 shell 的 timeout 命令
+    if timeout and timeout > 0:
+        cmd = f"timeout {timeout} {cmd}"
+        if logger:
+            logger.debug(f"⌛ Adding timeout ({timeout}s) to command")
+
     process = subprocess.Popen(
         cmd,
         shell=True,
@@ -34,7 +40,6 @@ def _run_subprocess(
     )
 
     output_lines: list[str] = []
-    start_time = time.time()
 
     try:
         if capture_output:
@@ -51,21 +56,9 @@ def _run_subprocess(
                         logger.debug(line_stripped)
                 elif process.poll() is not None:
                     break
-
-                # Handle timeout logic
-                if timeout and (time.time() - start_time) > timeout:
-                    if logger:
-                        logger.error(f"⌛ 命令超时，耗时 {timeout} 秒")
-                    process.terminate()
-                    try:
-                        process.wait(timeout=5)
-                    except subprocess.TimeoutExpired:
-                        process.kill()
-                    return -1, output_lines
                 time.sleep(0.05)
         else:
-        
-            process.wait(timeout=timeout)
+            process.wait()
 
     except Exception as e:
         if logger:
@@ -83,12 +76,16 @@ def run_command_build_fuzz(
     allowed_exit_codes: Maybe[list[int]] = Maybe.empty,
     skip_yes: bool = False
 ) -> int:
-    """run_command used in build_fuzz.py, build_fuzzers.py """
+    """run_command used in build_fuzz.py, build_fuzzers.py"""
     allowed_codes = allowed_exit_codes.value_or([0])
     cmd_str = f"yes | {cmd}" if not skip_yes else cmd
     exit_code, _ = _run_subprocess(cmd_str, cwd=oss_fuzz_dir)
+    
+    # 处理 timeout 的特殊退出码 (124)
+    exit_code = 124 if exit_code == 124 else exit_code
+    
     if exit_code not in allowed_codes:
-        error_msg = f"The command failed（exit code：{exit_code}）"
+        error_msg = f"The command failed (exit code: {exit_code})"
         if project:
             error_msg += f" for project: {project}"
         raise CommandError(error_msg, project=project, exit_code=exit_code)
@@ -106,7 +103,9 @@ def run_command_fuzz_all_targets(
     logger.info(f"▶️ {log_msg}...")
     logger.debug(f"   $ {cmd}")
 
-    allowed_codes = allowed_exit_codes.value_or([])
+    # 允许超时退出码 124
+    allowed_codes = allowed_exit_codes.value_or([]) + [124]
+    
     exit_code, _ = _run_subprocess(
         cmd, 
         capture_output=True, 
@@ -114,7 +113,11 @@ def run_command_fuzz_all_targets(
         logger=logger
     )
     
+    # 返回 124 表示超时
+    if exit_code == 124:
+        logger.warning(f"⌛ Command timed out after {timeout} seconds")
+    
     if exit_code not in [0, *allowed_codes]:
-        logger.error(f"❌ The command failed， exit code：{exit_code}")
+        logger.error(f"❌ The command failed, exit code: {exit_code}")
         return False
     return True
\ No newline at end of file

From 7e91c6c5c2c5e6f32bcd4d54955dfb4dc24e056d Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 29 Jul 2025 21:09:39 +0000
Subject: [PATCH 073/134] correct  in out error and return Popen directly

---
 fuzz/command_util.py | 190 +++++++++++++++++++++++++++++--------------
 1 file changed, 128 insertions(+), 62 deletions(-)

diff --git a/fuzz/command_util.py b/fuzz/command_util.py
index f29c9a3..8270d64 100644
--- a/fuzz/command_util.py
+++ b/fuzz/command_util.py
@@ -3,31 +3,36 @@
 import subprocess
 import logging
 import time
+import os
+import pty
+import tty
+import termios
 from pathlib import Path
-from typing import Optional
+from typing import Optional, Tuple, List
 from returns.maybe import Maybe
 from errors import CommandError
 
-def _run_subprocess(
+def create_popen_object(
     cmd: str,
-    cwd: Optional[Path] = None,  
+    cwd: Optional[Path] = None,
     capture_output: bool = False,
-    timeout: Optional[int] = None,  
-    logger: Optional[logging.Logger] = None,  
-) -> tuple[int, list[str]]:  
+    timeout: Optional[int] = None,
+    logger: Optional[logging.Logger] = None,
+) -> subprocess.Popen:
     """
-    Execute child process commands at a lower level
-    - capture_output=True: Captures stdout, returns a list of outputs
-    - timeout: Uses shell's `timeout` command for timeout handling
-    - logger: For real-time printouts
-    Return: (Exit Code, Output Line List)
+    创建并返回 Popen 对象，不等待进程结束
+    - capture_output: 是否捕获输出
+    - timeout: 使用 shell 的 timeout 命令处理超时
+    - logger: 用于实时打印输出
+    返回: Popen 对象
     """
-    # 如果有超时要求，使用 shell 的 timeout 命令
+    # 添加超时命令
     if timeout and timeout > 0:
-        cmd = f"timeout {timeout} {cmd}"
+        cmd = f"timeout {timeout}s {cmd}"
         if logger:
             logger.debug(f"⌛ Adding timeout ({timeout}s) to command")
 
+    # 创建 Popen 对象
     process = subprocess.Popen(
         cmd,
         shell=True,
@@ -38,36 +43,97 @@ def _run_subprocess(
         encoding="utf-8",
         errors="replace",
     )
+    
+    return process
 
-    output_lines: list[str] = []
+def run_subprocess_with_pty(
+    cmd: str,
+    cwd: Optional[Path] = None,
+    timeout: Optional[int] = None,
+    logger: Optional[logging.Logger] = None,
+) -> Tuple[int, List[str]]:
+    """
+    使用伪终端执行命令，解决终端设置问题
+    - timeout: 使用 shell 的 timeout 命令处理超时
+    - logger: 用于实时打印输出
+    返回: (退出码, 输出行列表)
+    """
+    # 添加超时命令
+    if timeout and timeout > 0:
+        cmd = f"timeout {timeout}s {cmd}"
+        if logger:
+            logger.debug(f"⌛ Adding timeout ({timeout}s) to command")
 
+    # 使用伪终端执行命令
+    master_fd, slave_fd = pty.openpty()
+    
+    # 设置伪终端为原始模式
+    old_settings = termios.tcgetattr(master_fd)
+    tty.setraw(master_fd)
+    
+    process = subprocess.Popen(
+        cmd,
+        shell=True,
+        cwd=str(cwd) if cwd else None,
+        stdin=slave_fd,
+        stdout=slave_fd,
+        stderr=slave_fd,
+        close_fds=True,
+        start_new_session=True
+    )
+    
+    os.close(slave_fd)
+    
+    output_lines = []
     try:
-        if capture_output:
-           
-            if process.stdout is None:
-                raise RuntimeError("stdout is unexpectedly None in capture mode")
-                
-            while True:
-                line = process.stdout.readline()
-                if line:
-                    line_stripped = line.rstrip()
-                    output_lines.append(line_stripped)
-                    if logger:
-                        logger.debug(line_stripped)
-                elif process.poll() is not None:
+        while True:
+            try:
+                data = os.read(master_fd, 1024)
+                if not data:
                     break
-                time.sleep(0.05)
-        else:
-            process.wait()
-
-    except Exception as e:
-        if logger:
-            logger.exception(f"执行命令时出错：{e}")
-        process.kill()
-        raise CommandError(f"Command failed: {str(e)}") from e
-
+                decoded = data.decode("utf-8", "replace")
+                output_lines.append(decoded.strip())
+                if logger:
+                    logger.debug(decoded.strip())
+            except OSError:
+                break
+    finally:
+        # 恢复终端设置
+        termios.tcsetattr(master_fd, termios.TCSADRAIN, old_settings)
+        os.close(master_fd)
+        process.wait()
+    
     return process.returncode, output_lines
 
+def run_command_fuzz_all_targets(
+    cmd: str,
+    log_msg: str,
+    logger: logging.Logger,
+    allowed_exit_codes: Maybe[list[int]] = Maybe.empty,
+    timeout: int = 3600,
+) -> bool:
+    """run_command used in run_fuzz_all_targets_print1.py, run_fuzz_all_targets.py"""
+    logger.info(f"▶️ {log_msg}...")
+    logger.debug(f"   $ {cmd}")
+
+    # 允许超时退出码 124
+    allowed_codes = allowed_exit_codes.value_or([]) + [124]
+    
+    # 使用伪终端解决终端设置问题
+    exit_code, _ = run_subprocess_with_pty(
+        cmd, 
+        timeout=timeout, 
+        logger=logger
+    )
+    
+    # 返回 124 表示超时
+    if exit_code == 124:
+        logger.warning(f"⌛ Command timed out after {timeout} seconds")
+    
+    if exit_code not in [0, *allowed_codes]:
+        logger.error(f"❌ The command failed, exit code: {exit_code}")
+        return False
+    return True
 
 def run_command_build_fuzz(
     cmd: str,
@@ -92,32 +158,32 @@ def run_command_build_fuzz(
     return exit_code
 
 
-def run_command_fuzz_all_targets(
-    cmd: str,
-    log_msg: str,
-    logger: logging.Logger,
-    allowed_exit_codes: Maybe[list[int]] = Maybe.empty,
-    timeout: int = 3600,
-) -> bool:
-    """run_command used in run_fuzz_all_targets_print1.py, run_fuzz_all_targets.py"""
-    logger.info(f"▶️ {log_msg}...")
-    logger.debug(f"   $ {cmd}")
+# def run_command_fuzz_all_targets(
+#     cmd: str,
+#     log_msg: str,
+#     logger: logging.Logger,
+#     allowed_exit_codes: Maybe[list[int]] = Maybe.empty,
+#     timeout: int = 3600,
+# ) -> bool:
+#     """run_command used in run_fuzz_all_targets_print1.py, run_fuzz_all_targets.py"""
+#     logger.info(f"▶️ {log_msg}...")
+#     logger.debug(f"   $ {cmd}")
 
-    # 允许超时退出码 124
-    allowed_codes = allowed_exit_codes.value_or([]) + [124]
+#     # 允许超时退出码 124
+#     allowed_codes = allowed_exit_codes.value_or([]) + [124]
     
-    exit_code, _ = _run_subprocess(
-        cmd, 
-        capture_output=True, 
-        timeout=timeout, 
-        logger=logger
-    )
+#     exit_code, _ = _run_subprocess(
+#         cmd, 
+#         capture_output=True, 
+#         timeout=timeout, 
+#         logger=logger
+#     )
     
-    # 返回 124 表示超时
-    if exit_code == 124:
-        logger.warning(f"⌛ Command timed out after {timeout} seconds")
+#     # 返回 124 表示超时
+#     if exit_code == 124:
+#         logger.warning(f"⌛ Command timed out after {timeout} seconds")
     
-    if exit_code not in [0, *allowed_codes]:
-        logger.error(f"❌ The command failed, exit code: {exit_code}")
-        return False
-    return True
\ No newline at end of file
+#     if exit_code not in [0, *allowed_codes]:
+#         logger.error(f"❌ The command failed, exit code: {exit_code}")
+#         return False
+#     return True
\ No newline at end of file

From f9edfafa40b39549f2951d502a79a26a04a9fc97 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Sat, 2 Aug 2025 02:15:32 +0000
Subject: [PATCH 074/134] ready to change from rust script

---
 fuzz/collect_fuzz_python.py | 267 ++++++++++++++++++++++++++++++++++++
 1 file changed, 267 insertions(+)
 create mode 100644 fuzz/collect_fuzz_python.py

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
new file mode 100644
index 0000000..bcc4b5e
--- /dev/null
+++ b/fuzz/collect_fuzz_python.py
@@ -0,0 +1,267 @@
+"""script for rust fuzzing and transforming test_template"""
+
+import logging
+from typing import Optional
+import fire
+import os
+from UniTSyn.frontend.util import wrap_repo, parallel_subprocess
+import subprocess
+from os.path import join as pjoin, abspath
+from tqdm import tqdm
+from pathos.multiprocessing import ProcessingPool
+import random
+from difflib import SequenceMatcher
+from itertools import islice
+
+
+def transform_repos(repos: list[str], jobs: int):
+    def transform_one_repo(repo_path: str):
+        return subprocess.Popen(
+            ["rust-fuzzer-gen", repo_path],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+        )
+
+    logging.info(f"Running rust-fuzz-gen on {len(repos)} repos")
+    parallel_subprocess(repos, jobs, transform_one_repo, on_exit=None)
+
+
+def get_target_list(p: subprocess.Popen):
+    match p.stdout:
+        case None:
+            return []
+        case _:
+            return p.stdout.read().decode("utf-8").split("\n")
+
+
+def fuzz_one_target(target: tuple[str, str], timeout):
+    repo_path, target_name = target
+    with open(pjoin(repo_path, "fuzz_inputs", target_name), "w") as f:
+        return subprocess.Popen(
+            # todo: find out why -max_total_time doesn't work
+            # ["cargo", "fuzz", "run", target_name, "--", f"-max_total_time={timeout}"],
+            [
+                "bash",
+                "-c",
+                f"timeout {timeout} cargo fuzz run {target_name}",
+            ],
+            cwd=repo_path,
+            stdout=f,
+            stderr=subprocess.DEVNULL,
+        )
+
+
+def build(repos: list[str], jobs: int):
+    logging.info(f"Building fuzzing targets in {len(repos)} repos")
+    _ = parallel_subprocess(
+        repos,
+        jobs,
+        lambda path: subprocess.Popen(
+            ["cargo", "fuzz", "build"],
+            cwd=path,
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+        ),
+        on_exit=None,
+    )
+
+
+def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60):
+    logging.info("Collecting all fuzz targets")
+
+    target_map = parallel_subprocess(
+        repos,
+        jobs,
+        lambda path: subprocess.Popen(
+            ["cargo", "fuzz", "list"], cwd=path, stdout=subprocess.PIPE
+        ),
+        on_exit=get_target_list,
+    )
+    targets: list[tuple[str, str]] = [
+        (k, v) for k, vs in target_map.items() for v in vs if len(v) > 0
+    ]
+    for repo in repos:
+        os.makedirs(pjoin(repo, "fuzz_inputs"), exist_ok=True)
+
+    logging.info(f"Running cargo fuzz on {len(targets)} targets for {timeout} seconds")
+    parallel_subprocess(
+        targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None
+    )
+
+
+def substitute_input(template: str, input_data: str, idx: int) -> str:
+    return template.replace(
+        '[] ; # [doc = "This is a test template"]', f"{input_data} ; "
+    ).replace("fn test_something ()", f"fn test_{idx} ()")
+
+
+def has_similar(selected: list[str], x: str, thresh: float = 0.8) -> bool:
+    def similar(a, b):
+        return SequenceMatcher(None, a, b).ratio()
+
+    return any(map(lambda y: similar(x, y) > thresh, selected))
+
+
+def substitute_one_repo(
+    repo: str,
+    targets: list[str],
+    n_fuzz: int,
+    strategy: str,
+    max_len: int,
+    sim_thresh: float,
+):
+    template_dir = pjoin(repo, "tests-gen")
+    input_dir = pjoin(repo, "fuzz_inputs")
+    for t in targets:
+        if t == "":
+            continue
+
+        # format template before loading
+        template_path = pjoin(template_dir, t + ".rs")
+        try:
+            with open(template_path) as f_template:
+                template = f_template.read()
+            with open(pjoin(input_dir, t), "r") as f_input:
+                all_inputs = [i for i in f_input.read().splitlines() if i != "[]"]
+
+            inputs: list[str]
+            if strategy == "shuffle":
+                random.shuffle(all_inputs)
+                inputs = list(
+                    islice(filter(lambda x: len(x) < max_len, all_inputs), n_fuzz)
+                )
+            elif strategy == "reverse":
+                inputs = []
+                for x in reversed(all_inputs):
+                    if len(inputs) >= n_fuzz:
+                        break
+                    if len(x) > max_len or has_similar(inputs, x, sim_thresh):
+                        continue
+                    inputs.append(x)
+
+            else:
+                inputs = all_inputs[:n_fuzz]
+
+            tests = [
+                substitute_input(template, input_data, i)
+                for i, input_data in enumerate(inputs)
+            ]
+            generated_test_path = pjoin(template_dir, f"{t}.inputs.rs")
+            with open(generated_test_path, "w") as f_template:
+                f_template.write("\n".join(tests))
+
+            # format generated tests
+            subprocess.run(["rustfmt", str(generated_test_path)], check=False)
+        except FileNotFoundError:
+            logging.debug(f"Template {template_path} not found")
+
+
+def testgen_repos(
+    repos: list[str],
+    jobs: int,
+    n_fuzz: int = 100,
+    strategy: str = "shuffle",
+    max_len: int = 100,
+    sim_thresh: float = 0.8,
+):
+    """Generate tests from fuzz inputs
+
+    Args:
+        repos (list[str]): list of repo paths
+        jobs (int): number of parallel jobs to use
+        n_fuzz (int, optional): number of fuzz data to use. Defaults to 100.
+    """
+    target_map = parallel_subprocess(
+        repos,
+        jobs,
+        lambda path: subprocess.Popen(
+            ["cargo", "fuzz", "list"], cwd=path, stdout=subprocess.PIPE
+        ),
+        on_exit=get_target_list,
+        use_tqdm=False,
+    )
+    logging.info("Substitute fuzz data to test templates")
+    with ProcessingPool(jobs) as p:
+        _ = list(
+            tqdm(
+                p.map(
+                    lambda item: substitute_one_repo(
+                        item[0], item[1], n_fuzz, strategy, max_len, sim_thresh
+                    ),
+                    target_map.items(),
+                )
+            )
+        )
+
+
+def main(
+    repo_id: str = "image-rs/image-png",
+    repo_root: str = "data/rust_repos/",
+    timeout: int = 60,
+    jobs: int = 80,
+    limits: Optional[int] = None,
+    pipeline: str = "transform",
+    n_fuzz: int = 100,
+    strategy: str = "shuffle",
+    max_len: int = 100,
+    sim_thresh: float = 0.8,
+):
+    """collect fuzzing data from rust repos
+
+    Args:
+        repo_id (str, optional): repo id. Defaults to "marshallpierce/rust-base64".
+        repo_root (str, optional): directory contains all the repos. Defaults to "data/rust_repos/".
+        timeout (int, optional): max_total_time to fuzz. Defaults to 60.
+        jobs (int, optional): number of parallel jobs to use. Defaults to CORES.
+        limits (Optional[int], optional): number of repos to process, None if use all of them.
+        pipeline (str, optional): what to do. Defaults to "transform".
+
+        --- below only needed for testgen pipeline ---
+        n_fuzz (int, optional): number of fuzz data to use. Defaults to 100.
+        strategy (str, optional): shuffle or reverse,
+        max_len (int, optional): maximum length for fuzzing inputs
+        sim_thresh (float, optional), similarity threshold for fuzzing inputs
+    """
+    try:
+        repo_id_list = [
+            ll for line in open(repo_id, "r").readlines() if len(ll := line.strip()) > 0
+        ]
+    except FileNotFoundError:
+        repo_id_list = [repo_id]
+    if limits is not None:
+        repo_id_list = repo_id_list[:limits]
+    logging.info(f"Loaded {len(repo_id_list)} repos to be processed")
+
+    logging.info("Collecting all rust repos")
+    repos = []
+    for repo_id in repo_id_list:
+        repo_path = os.path.join(repo_root, wrap_repo(repo_id))
+        if os.path.exists(repo_path) and os.path.isdir(repo_path):
+            subdirectories = [
+                os.path.join(repo_path, d)
+                for d in os.listdir(repo_path)
+                if os.path.isdir(os.path.join(repo_path, d))
+            ]
+            repos.append(abspath(subdirectories[0]))
+
+    match pipeline:
+        case "transform":
+            transform_repos(repos, jobs)
+        case "build":
+            build(repos, jobs)
+        case "fuzz":
+            fuzz_repos(repos, jobs, timeout=timeout)
+        case "testgen":
+            testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh)
+        case "all":
+            transform_repos(repos, jobs)
+            build(repos, jobs)
+            fuzz_repos(repos, jobs, timeout=timeout)
+            testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh)
+        case _:
+            logging.error(f"Unknown pipeline {pipeline}")
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    fire.Fire(main)

From 3821133bcaac57a578908117a0aeaa53f616ec76 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Mon, 4 Aug 2025 19:08:15 +0000
Subject: [PATCH 075/134] =?UTF-8?q?=E4=BF=AE=E6=94=B9build=5Fimage?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 fuzz/collect_fuzz_python.py | 265 ++++++++++++++++++++++++++++--------
 1 file changed, 209 insertions(+), 56 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index bcc4b5e..6073b34 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -1,104 +1,214 @@
-"""script for rust fuzzing and transforming test_template"""
-
+"""
+用于 python 项目模糊测试（fuzzing）和测试模板转换的脚本
+"""
+# 导入日志记录模块，用于输出程序运行信息
 import logging
+# 从 typing 模块导入 Optional，用于类型提示，表示一个值可以是某个类型或者 None
 from typing import Optional
+# 导入 fire 库，用于快速创建命令行界面
 import fire
+# 导入 os 模块，用于与操作系统交互，如文件路径操作
 import os
+# 从自定义的 UniTSyn 工具库中导入辅助函数
 from UniTSyn.frontend.util import wrap_repo, parallel_subprocess
+# 导入 subprocess 模块，用于创建和管理子进程
 import subprocess
+# 从 os.path 中导入 join 和 abspath，分别用于拼接路径和获取绝对路径
 from os.path import join as pjoin, abspath
+# 导入 tqdm 库，用于显示进度条
 from tqdm import tqdm
+# 从 pathos.multiprocessing 导入 ProcessingPool，用于创建进程池以实现并行处理
 from pathos.multiprocessing import ProcessingPool
+# 导入 random 模块，用于生成随机数
 import random
+# 从 difflib 导入 SequenceMatcher，用于比较序列（如字符串）的相似度
 from difflib import SequenceMatcher
+# 从 itertools 导入 islice，用于对迭代器进行切片操作
 from itertools import islice
 
 
-def transform_repos(repos: list[str], jobs: int):
-    def transform_one_repo(repo_path: str):
-        return subprocess.Popen(
-            ["rust-fuzzer-gen", repo_path],
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-        )
-
-    logging.info(f"Running rust-fuzz-gen on {len(repos)} repos")
-    parallel_subprocess(repos, jobs, transform_one_repo, on_exit=None)
+# def transform_repos(repos: list[str], jobs: int):
+#     """
+#     对一组仓库执行 `rust-fuzzer-gen` 命令，以生成模糊测试模板。
+
+#     Args:
+#         repos (list[str]): 包含多个仓库路径的列表。
+#         jobs (int): 并行执行的任务数量。
+#     """
+#     def transform_one_repo(repo_path: str):
+#         """
+#         对单个仓库启动 `rust-fuzzer-gen` 进程。
+
+#         Args:
+#             repo_path (str): 单个仓库的路径。
+        
+#         Returns:
+#             subprocess.Popen: 启动的子进程对象。
+#         """
+#         # 启动一个子进程来执行 `rust-fuzzer-gen` 命令
+#         return subprocess.Popen(
+#             ["rust-fuzzer-gen", repo_path],
+#             stdout=subprocess.PIPE,  # 捕获标准输出
+#             stderr=subprocess.PIPE,  # 捕获标准错误
+#         )
+
+#     # 记录日志，说明正在对多少个仓库进行操作
+#     logging.info(f"Running rust-fuzz-gen on {len(repos)} repos")
+#     # 使用并行处理工具来同时对多个仓库执行 transform_one_repo 函数
+#     parallel_subprocess(repos, jobs, transform_one_repo, on_exit=None)
 
 
 def get_target_list(p: subprocess.Popen):
+    """
+    从子进程的输出中解析出模糊测试目标列表。
+
+    Args:
+        p (subprocess.Popen): 一个已完成的子进程对象。
+    
+    Returns:
+        list[str]: 模糊测试目标的名称列表。
+    """
+    # 使用 match 语句检查子进程的标准输出
     match p.stdout:
+        # 如果标准输出为 None，则返回空列表
         case None:
             return []
+        # 否则，读取标准输出，解码为 UTF-8 字符串，并按换行符分割成列表
         case _:
             return p.stdout.read().decode("utf-8").split("\n")
 
 
 def fuzz_one_target(target: tuple[str, str], timeout):
+    """
+    对单个模糊测试目标执行 `cargo fuzz run` 命令。
+
+    Args:
+        target (tuple[str, str]): 一个元组，包含仓库路径和目标名称。
+        timeout (int): 模糊测试的超时时间（秒）。
+    
+    Returns:
+        subprocess.Popen: 启动的模糊测试子进程对象。
+    """
+    # 解包元组，获取仓库路径和目标名称
     repo_path, target_name = target
+    # 创建一个文件用于存放该目标的模糊测试输入
     with open(pjoin(repo_path, "fuzz_inputs", target_name), "w") as f:
+        # 启动一个子进程来执行模糊测试命令
         return subprocess.Popen(
-            # todo: find out why -max_total_time doesn't work
+            # todo: 研究为什么 -max_total_time 参数不起作用
             # ["cargo", "fuzz", "run", target_name, "--", f"-max_total_time={timeout}"],
+            # 使用 bash -c 和 timeout 命令来强制实现超时功能
             [
                 "bash",
                 "-c",
                 f"timeout {timeout} cargo fuzz run {target_name}",
             ],
-            cwd=repo_path,
-            stdout=f,
-            stderr=subprocess.DEVNULL,
+            cwd=repo_path,  # 在指定的仓库路径下执行命令
+            stdout=f,  # 将标准输出重定向到文件
+            stderr=subprocess.DEVNULL,  # 丢弃标准错误输出
         )
 
 
-def build(repos: list[str], jobs: int):
-    logging.info(f"Building fuzzing targets in {len(repos)} repos")
+def build_image(repos: list[str], jobs: int):
+    """
+    构建每个仓库对应的 OSS-Fuzz 项目的 Docker 镜像。
+
+    Args:
+        repos (list[str]): 仓库路径列表（每个应包含一个已接入 OSS-Fuzz 的项目）。
+        jobs (int): 并行任务数。
+    """
+    logging.info(f"Building Docker images for {len(repos)} OSS-Fuzz projects")
+
+    def _build_cmd(path: str):
+        # 获取 OSS-Fuzz 项目名（例如路径最后一段或自定义映射）
+        project_name = os.path.basename(path.rstrip("/"))
+        return subprocess.Popen(
+            ["python3", "infra/helper.py", "build_image", project_name],
+            cwd=os.path.abspath(os.path.join(path, "../../")),  # 仓库路径的上上一级
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+        )
+
     _ = parallel_subprocess(
         repos,
         jobs,
-        lambda path: subprocess.Popen(
-            ["cargo", "fuzz", "build"],
-            cwd=path,
-            stdout=subprocess.DEVNULL,
-            stderr=subprocess.DEVNULL,
-        ),
+        _build_cmd,
         on_exit=None,
     )
 
-
 def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60):
+    """
+    对一组仓库执行模糊测试。
+
+    Args:
+        repos (list[str]): 仓库路径列表。
+        jobs (int): 并行任务数。
+        timeout (int, optional): 每个目标的模糊测试超时时间（秒）。默认为 60。
+    """
+    # 记录日志，说明正在收集所有模糊测试目标
     logging.info("Collecting all fuzz targets")
 
+    # 并行执行 `cargo fuzz list` 来获取所有仓库的模糊测试目标
     target_map = parallel_subprocess(
         repos,
         jobs,
         lambda path: subprocess.Popen(
             ["cargo", "fuzz", "list"], cwd=path, stdout=subprocess.PIPE
         ),
-        on_exit=get_target_list,
+        on_exit=get_target_list, # 使用 get_target_list 函数处理每个子进程的输出
     )
+    # 将 `target_map` 整理成一个 (仓库路径, 目标名称) 的元组列表
     targets: list[tuple[str, str]] = [
         (k, v) for k, vs in target_map.items() for v in vs if len(v) > 0
     ]
+    # 为每个仓库创建存放模糊测试输入的目录
     for repo in repos:
         os.makedirs(pjoin(repo, "fuzz_inputs"), exist_ok=True)
 
+    # 记录日志，说明即将开始模糊测试
     logging.info(f"Running cargo fuzz on {len(targets)} targets for {timeout} seconds")
+    # 并行执行模糊测试
     parallel_subprocess(
         targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None
     )
 
 
 def substitute_input(template: str, input_data: str, idx: int) -> str:
+    """
+    将模糊测试的输入数据替换到测试模板中。
+
+    Args:
+        template (str): 测试模板字符串。
+        input_data (str): 单条模糊测试输入数据。
+        idx (int): 测试用例的索引号。
+    
+    Returns:
+        str: 替换完成后的测试代码字符串。
+    """
+    # 替换模板中的占位符为实际的输入数据
     return template.replace(
         '[] ; # [doc = "This is a test template"]', f"{input_data} ; "
+    # 替换模板中的函数名为唯一的测试函数名
     ).replace("fn test_something ()", f"fn test_{idx} ()")
 
 
 def has_similar(selected: list[str], x: str, thresh: float = 0.8) -> bool:
+    """
+    检查字符串 `x` 是否与 `selected` 列表中的任何字符串足够相似。
+
+    Args:
+        selected (list[str]): 已选择的字符串列表。
+        x (str): 待检查的字符串。
+        thresh (float, optional): 相似度阈值。默认为 0.8。
+    
+    Returns:
+        bool: 如果存在相似字符串，则返回 True，否则返回 False。
+    """
+    # 定义一个内部函数来计算两个字符串的相似度
     def similar(a, b):
         return SequenceMatcher(None, a, b).ratio()
 
+    # 检查列表中是否有任何一个字符串与 x 的相似度超过阈值
     return any(map(lambda y: similar(x, y) > thresh, selected))
 
 
@@ -110,49 +220,73 @@ def substitute_one_repo(
     max_len: int,
     sim_thresh: float,
 ):
+    """
+    处理单个仓库，将其模糊测试输入替换到测试模板中以生成测试文件。
+
+    Args:
+        repo (str): 仓库路径。
+        targets (list[str]): 该仓库的模糊测试目标列表。
+        n_fuzz (int): 要使用的模糊测试输入数量。
+        strategy (str): 选择输入的策略（"shuffle", "reverse" 等）。
+        max_len (int): 模糊测试输入的最大长度。
+        sim_thresh (float): 用于去重的相似度阈值。
+    """
+    # 定义模板目录和输入目录的路径
     template_dir = pjoin(repo, "tests-gen")
     input_dir = pjoin(repo, "fuzz_inputs")
+    # 遍历该仓库的所有模糊测试目标
     for t in targets:
-        if t == "":
+        if t == "": # 跳过空目标
             continue
 
-        # format template before loading
         template_path = pjoin(template_dir, t + ".rs")
         try:
+            # 读取测试模板文件
             with open(template_path) as f_template:
                 template = f_template.read()
+            # 读取对应的模糊测试输入文件
             with open(pjoin(input_dir, t), "r") as f_input:
+                # 过滤掉空的输入行
                 all_inputs = [i for i in f_input.read().splitlines() if i != "[]"]
 
             inputs: list[str]
+            # 根据不同的策略来选择输入数据
             if strategy == "shuffle":
+                # 随机打乱所有输入
                 random.shuffle(all_inputs)
+                # 过滤掉过长的输入，并取前 n_fuzz 个
                 inputs = list(
                     islice(filter(lambda x: len(x) < max_len, all_inputs), n_fuzz)
                 )
             elif strategy == "reverse":
+                # 从后往前选择，同时进行去重和长度过滤
                 inputs = []
                 for x in reversed(all_inputs):
-                    if len(inputs) >= n_fuzz:
+                    if len(inputs) >= n_fuzz: # 如果已选够，则停止
                         break
+                    # 如果输入过长或与已选输入相似，则跳过
                     if len(x) > max_len or has_similar(inputs, x, sim_thresh):
                         continue
                     inputs.append(x)
-
             else:
+                # 默认策略：直接取前 n_fuzz 个输入
                 inputs = all_inputs[:n_fuzz]
 
+            # 使用选定的输入数据生成测试用例代码
             tests = [
                 substitute_input(template, input_data, i)
                 for i, input_data in enumerate(inputs)
             ]
+            # 定义生成的测试文件的路径
             generated_test_path = pjoin(template_dir, f"{t}.inputs.rs")
+            # 将生成的测试代码写入文件
             with open(generated_test_path, "w") as f_template:
                 f_template.write("\n".join(tests))
 
-            # format generated tests
+            # 使用 rustfmt 工具格式化生成的测试文件
             subprocess.run(["rustfmt", str(generated_test_path)], check=False)
         except FileNotFoundError:
+            # 如果找不到模板文件，则记录一条调试信息
             logging.debug(f"Template {template_path} not found")
 
 
@@ -164,13 +298,18 @@ def testgen_repos(
     max_len: int = 100,
     sim_thresh: float = 0.8,
 ):
-    """Generate tests from fuzz inputs
+    """
+    从模糊测试的输入数据生成最终的测试用例。
 
     Args:
-        repos (list[str]): list of repo paths
-        jobs (int): number of parallel jobs to use
-        n_fuzz (int, optional): number of fuzz data to use. Defaults to 100.
+        repos (list[str]): 仓库路径列表。
+        jobs (int): 并行任务数。
+        n_fuzz (int, optional): 每个目标要使用的模糊测试输入数量。默认为 100。
+        strategy (str, optional): 选择输入的策略。默认为 "shuffle"。
+        max_len (int, optional): 输入的最大长度。默认为 100。
+        sim_thresh (float, optional): 输入的相似度阈值。默认为 0.8。
     """
+    # 首先，获取所有仓库的模糊测试目标
     target_map = parallel_subprocess(
         repos,
         jobs,
@@ -178,13 +317,17 @@ def testgen_repos(
             ["cargo", "fuzz", "list"], cwd=path, stdout=subprocess.PIPE
         ),
         on_exit=get_target_list,
-        use_tqdm=False,
+        use_tqdm=False, # 不在此处显示进度条
     )
+    # 记录日志
     logging.info("Substitute fuzz data to test templates")
+    # 使用进程池并行处理每个仓库的替换任务
     with ProcessingPool(jobs) as p:
+        # 使用 tqdm 显示总体进度
         _ = list(
             tqdm(
                 p.map(
+                    # 对 target_map 中的每个项（仓库及其目标列表）调用 substitute_one_repo
                     lambda item: substitute_one_repo(
                         item[0], item[1], n_fuzz, strategy, max_len, sim_thresh
                     ),
@@ -206,37 +349,43 @@ def main(
     max_len: int = 100,
     sim_thresh: float = 0.8,
 ):
-    """collect fuzzing data from rust repos
+    """
+    从 Rust 仓库中收集模糊测试数据的主函数。
 
     Args:
-        repo_id (str, optional): repo id. Defaults to "marshallpierce/rust-base64".
-        repo_root (str, optional): directory contains all the repos. Defaults to "data/rust_repos/".
-        timeout (int, optional): max_total_time to fuzz. Defaults to 60.
-        jobs (int, optional): number of parallel jobs to use. Defaults to CORES.
-        limits (Optional[int], optional): number of repos to process, None if use all of them.
-        pipeline (str, optional): what to do. Defaults to "transform".
-
-        --- below only needed for testgen pipeline ---
-        n_fuzz (int, optional): number of fuzz data to use. Defaults to 100.
-        strategy (str, optional): shuffle or reverse,
-        max_len (int, optional): maximum length for fuzzing inputs
-        sim_thresh (float, optional), similarity threshold for fuzzing inputs
+        repo_id (str, optional): 单个仓库 ID 或包含多个仓库 ID 的文件路径。
+        repo_root (str, optional): 存放所有仓库的根目录。
+        timeout (int, optional): 模糊测试的超时时间。
+        jobs (int, optional): 并行任务数。
+        limits (Optional[int], optional): 要处理的仓库数量上限，None 表示处理所有。
+        pipeline (str, optional): 要执行的流程（"transform", "build", "fuzz", "testgen", "all"）。
+        --- 以下参数仅用于 testgen 流程 ---
+        n_fuzz (int, optional): 使用的模糊测试输入数量。
+        strategy (str, optional): 选择输入的策略。
+        max_len (int, optional): 输入的最大长度。
+        sim_thresh (float, optional): 输入的相似度阈值。
     """
     try:
+        # 尝试将 repo_id 作为一个文件路径打开，读取仓库 ID 列表
         repo_id_list = [
             ll for line in open(repo_id, "r").readlines() if len(ll := line.strip()) > 0
         ]
     except FileNotFoundError:
+        # 如果文件不存在，则认为 repo_id 就是单个仓库的 ID
         repo_id_list = [repo_id]
+    
+    # 如果设置了数量限制，则对列表进行切片
     if limits is not None:
         repo_id_list = repo_id_list[:limits]
     logging.info(f"Loaded {len(repo_id_list)} repos to be processed")
 
     logging.info("Collecting all rust repos")
     repos = []
+    # 遍历仓库 ID 列表，构建完整的本地路径
     for repo_id in repo_id_list:
         repo_path = os.path.join(repo_root, wrap_repo(repo_id))
         if os.path.exists(repo_path) and os.path.isdir(repo_path):
+            # 找到仓库下的第一个子目录（通常是项目本身）
             subdirectories = [
                 os.path.join(repo_path, d)
                 for d in os.listdir(repo_path)
@@ -244,17 +393,18 @@ def main(
             ]
             repos.append(abspath(subdirectories[0]))
 
+    # 根据 `pipeline` 参数选择要执行的流程
     match pipeline:
-        case "transform":
-            transform_repos(repos, jobs)
-        case "build":
-            build(repos, jobs)
+        # case "transform":
+        #     transform_repos(repos, jobs)
+        case "build_image":
+            build_image(repos, jobs)
         case "fuzz":
             fuzz_repos(repos, jobs, timeout=timeout)
         case "testgen":
             testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh)
-        case "all":
-            transform_repos(repos, jobs)
+        case "all": # 执行全部流程
+            # transform_repos(repos, jobs)
             build(repos, jobs)
             fuzz_repos(repos, jobs, timeout=timeout)
             testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh)
@@ -262,6 +412,9 @@ def main(
             logging.error(f"Unknown pipeline {pipeline}")
 
 
+# 当脚本作为主程序执行时
 if __name__ == "__main__":
+    # 配置日志记录的基本设置，级别为 INFO
     logging.basicConfig(level=logging.INFO)
-    fire.Fire(main)
+    # 使用 fire 库将 main 函数暴露为命令行接口
+    fire.Fire(main)
\ No newline at end of file

From 6e5c7644ba6f64c0148e02ac9ac3bdd07bfba949 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Mon, 4 Aug 2025 19:13:24 +0000
Subject: [PATCH 076/134] y/n

---
 fuzz/collect_fuzz_python.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index 6073b34..d590fa5 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -111,7 +111,7 @@ def fuzz_one_target(target: tuple[str, str], timeout):
 
 def build_image(repos: list[str], jobs: int):
     """
-    构建每个仓库对应的 OSS-Fuzz 项目的 Docker 镜像。
+    构建每个仓库对应的 OSS-Fuzz 项目的 Docker 镜像，并自动回答 y/N 提示为 y。
 
     Args:
         repos (list[str]): 仓库路径列表（每个应包含一个已接入 OSS-Fuzz 的项目）。
@@ -120,13 +120,13 @@ def build_image(repos: list[str], jobs: int):
     logging.info(f"Building Docker images for {len(repos)} OSS-Fuzz projects")
 
     def _build_cmd(path: str):
-        # 获取 OSS-Fuzz 项目名（例如路径最后一段或自定义映射）
         project_name = os.path.basename(path.rstrip("/"))
         return subprocess.Popen(
-            ["python3", "infra/helper.py", "build_image", project_name],
-            cwd=os.path.abspath(os.path.join(path, "../../")),  # 仓库路径的上上一级
+            ["yes", "|", "python3", "infra/helper.py", "build_image", project_name],
+            cwd=os.path.abspath(os.path.join(path, "../../")),
             stdout=subprocess.DEVNULL,
             stderr=subprocess.DEVNULL,
+            shell=True  # 使用 shell 来执行带管道的命令
         )
 
     _ = parallel_subprocess(
@@ -136,6 +136,7 @@ def _build_cmd(path: str):
         on_exit=None,
     )
 
+
 def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60):
     """
     对一组仓库执行模糊测试。

From 00290598dc9d888fff8c861ae2bd835f1bf9a5a5 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Mon, 4 Aug 2025 19:28:28 +0000
Subject: [PATCH 077/134] correct repo_id and repo_name in main

---
 fuzz/collect_fuzz_python.py | 62 ++++++++++++++-----------------------
 1 file changed, 24 insertions(+), 38 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index d590fa5..c1ebd44 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -339,8 +339,8 @@ def testgen_repos(
 
 
 def main(
-    repo_id: str = "image-rs/image-png",
-    repo_root: str = "data/rust_repos/",
+    repo_id: str = "../data/valid_projects3.txt",
+    repo_root: str = "oss-fuzz/projects/",
     timeout: int = 60,
     jobs: int = 80,
     limits: Optional[int] = None,
@@ -351,68 +351,54 @@ def main(
     sim_thresh: float = 0.8,
 ):
     """
-    从 Rust 仓库中收集模糊测试数据的主函数。
+    从多个 OSS-Fuzz 项目中执行指定阶段（构建镜像、模糊测试、测试生成等）的主函数。
 
     Args:
-        repo_id (str, optional): 单个仓库 ID 或包含多个仓库 ID 的文件路径。
-        repo_root (str, optional): 存放所有仓库的根目录。
-        timeout (int, optional): 模糊测试的超时时间。
-        jobs (int, optional): 并行任务数。
-        limits (Optional[int], optional): 要处理的仓库数量上限，None 表示处理所有。
-        pipeline (str, optional): 要执行的流程（"transform", "build", "fuzz", "testgen", "all"）。
-        --- 以下参数仅用于 testgen 流程 ---
-        n_fuzz (int, optional): 使用的模糊测试输入数量。
-        strategy (str, optional): 选择输入的策略。
-        max_len (int, optional): 输入的最大长度。
-        sim_thresh (float, optional): 输入的相似度阈值。
+        repo_id (str): 文件路径，包含 OSS-Fuzz 项目名称（每行一个）。
+        repo_root (str): 所有 OSS-Fuzz 项目所在的根目录。
+        timeout (int): 模糊测试的超时时间。
+        jobs (int): 并行任务数。
+        limits (Optional[int]): 处理项目数量的上限。
+        pipeline (str): 执行阶段：build_image, fuzz, testgen, all。
+        n_fuzz, strategy, max_len, sim_thresh: testgen 参数。
     """
     try:
-        # 尝试将 repo_id 作为一个文件路径打开，读取仓库 ID 列表
-        repo_id_list = [
-            ll for line in open(repo_id, "r").readlines() if len(ll := line.strip()) > 0
-        ]
+        with open(repo_id, "r") as f:
+            repo_id_list = [line.strip() for line in f if line.strip()]
     except FileNotFoundError:
-        # 如果文件不存在，则认为 repo_id 就是单个仓库的 ID
         repo_id_list = [repo_id]
-    
-    # 如果设置了数量限制，则对列表进行切片
+
     if limits is not None:
         repo_id_list = repo_id_list[:limits]
+
     logging.info(f"Loaded {len(repo_id_list)} repos to be processed")
 
-    logging.info("Collecting all rust repos")
+    logging.info("Collecting all OSS-Fuzz project directories")
     repos = []
-    # 遍历仓库 ID 列表，构建完整的本地路径
     for repo_id in repo_id_list:
-        repo_path = os.path.join(repo_root, wrap_repo(repo_id))
-        if os.path.exists(repo_path) and os.path.isdir(repo_path):
-            # 找到仓库下的第一个子目录（通常是项目本身）
-            subdirectories = [
-                os.path.join(repo_path, d)
-                for d in os.listdir(repo_path)
-                if os.path.isdir(os.path.join(repo_path, d))
-            ]
-            repos.append(abspath(subdirectories[0]))
+        repo_path = abspath(os.path.join(repo_root, repo_id))
+        if os.path.isdir(repo_path):
+            repos.append(repo_path)
 
-    # 根据 `pipeline` 参数选择要执行的流程
     match pipeline:
-        # case "transform":
-        #     transform_repos(repos, jobs)
         case "build_image":
             build_image(repos, jobs)
         case "fuzz":
             fuzz_repos(repos, jobs, timeout=timeout)
         case "testgen":
             testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh)
-        case "all": # 执行全部流程
-            # transform_repos(repos, jobs)
-            build(repos, jobs)
+        case "all":
+            build_image(repos, jobs)
             fuzz_repos(repos, jobs, timeout=timeout)
             testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh)
         case _:
             logging.error(f"Unknown pipeline {pipeline}")
 
 
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    fire.Fire(main)
+
 # 当脚本作为主程序执行时
 if __name__ == "__main__":
     # 配置日志记录的基本设置，级别为 INFO

From 1d815ebd8d75a3c739cc253737346cc822c73bee Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Mon, 4 Aug 2025 19:48:25 +0000
Subject: [PATCH 078/134] =?UTF-8?q?test=20build=5Fimage=20=E6=9E=84?=
 =?UTF-8?q?=E5=BB=BA=E6=97=A5=E5=BF=97?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 fuzz/collect_fuzz_python.py | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index c1ebd44..32152ab 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -109,9 +109,11 @@ def fuzz_one_target(target: tuple[str, str], timeout):
         )
 
 
+from datetime import datetime
+
 def build_image(repos: list[str], jobs: int):
     """
-    构建每个仓库对应的 OSS-Fuzz 项目的 Docker 镜像，并自动回答 y/N 提示为 y。
+    构建每个仓库对应的 OSS-Fuzz 项目的 Docker 镜像，并将构建日志写入 fuzz_pipeline_log 目录。
 
     Args:
         repos (list[str]): 仓库路径列表（每个应包含一个已接入 OSS-Fuzz 的项目）。
@@ -119,14 +121,21 @@ def build_image(repos: list[str], jobs: int):
     """
     logging.info(f"Building Docker images for {len(repos)} OSS-Fuzz projects")
 
+    log_dir = os.path.abspath("fuzz_pipeline_log")
+    os.makedirs(log_dir, exist_ok=True)
+
     def _build_cmd(path: str):
         project_name = os.path.basename(path.rstrip("/"))
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        log_file = os.path.join(log_dir, f"{project_name}_{timestamp}.log")
+
+        logging.info(f"Start building {project_name}, logging to {log_file}")
         return subprocess.Popen(
-            ["yes", "|", "python3", "infra/helper.py", "build_image", project_name],
+            f"yes | python3 infra/helper.py build_image {project_name}",
             cwd=os.path.abspath(os.path.join(path, "../../")),
-            stdout=subprocess.DEVNULL,
-            stderr=subprocess.DEVNULL,
-            shell=True  # 使用 shell 来执行带管道的命令
+            stdout=open(log_file, "w"),
+            stderr=subprocess.STDOUT,
+            shell=True,
         )
 
     _ = parallel_subprocess(
@@ -339,8 +348,8 @@ def testgen_repos(
 
 
 def main(
-    repo_id: str = "../data/valid_projects3.txt",
-    repo_root: str = "oss-fuzz/projects/",
+    repo_id: str = "data/valid_projects3.txt",
+    repo_root: str = "fuzz/oss-fuzz/projects/",
     timeout: int = 60,
     jobs: int = 80,
     limits: Optional[int] = None,

From 4333456e89ba6e9ffd537dc2000379bea5908daf Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Mon, 4 Aug 2025 19:59:30 +0000
Subject: [PATCH 079/134] add build_fuzzer

---
 fuzz/collect_fuzz_python.py | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index 32152ab..8dc5350 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -144,6 +144,39 @@ def _build_cmd(path: str):
         _build_cmd,
         on_exit=None,
     )
+def build_fuzzer(repos: list[str], jobs: int):
+    """
+    对构建成功的项目并行构建模糊测试器
+    
+    Args:
+        repos (list[str]): 仓库路径列表（每个应包含一个已接入 OSS-Fuzz 的项目）。
+        jobs (int): 并行任务数。
+    """
+    logging.info(f"Building fuzzers for {len(repos)} OSS-Fuzz projects")
+    
+    log_dir = os.path.abspath("fuzz_pipeline_log")
+    os.makedirs(log_dir, exist_ok=True)
+
+    def _build_cmd(path: str):
+        project_name = os.path.basename(path.rstrip("/"))
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        log_file = os.path.join(log_dir, f"{project_name}_fuzzer_{timestamp}.log")
+
+        logging.info(f"Start building fuzzers for {project_name}, logging to {log_file}")
+        return subprocess.Popen(
+            f"python3 infra/helper.py build_fuzzers --sanitizer address {project_name}",
+            cwd=os.path.abspath(os.path.join(path, "../../")),
+            stdout=open(log_file, "w"),
+            stderr=subprocess.STDOUT,
+            shell=True,
+        )
+
+    _ = parallel_subprocess(
+        repos,
+        jobs,
+        _build_cmd,
+        on_exit=None,
+    )
 
 
 def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60):
@@ -392,18 +425,22 @@ def main(
     match pipeline:
         case "build_image":
             build_image(repos, jobs)
+        case "build_fuzzer":
+            build_fuzzer(repos, jobs)
         case "fuzz":
             fuzz_repos(repos, jobs, timeout=timeout)
         case "testgen":
             testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh)
         case "all":
             build_image(repos, jobs)
+            build_fuzzer(repos, jobs)  # 在构建镜像后添加构建模糊测试器阶段
             fuzz_repos(repos, jobs, timeout=timeout)
             testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh)
         case _:
             logging.error(f"Unknown pipeline {pipeline}")
 
 
+
 if __name__ == "__main__":
     logging.basicConfig(level=logging.INFO)
     fire.Fire(main)

From 3a0565f62a9effc0900f537a84420a051bc3c166 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Mon, 4 Aug 2025 23:35:37 +0000
Subject: [PATCH 080/134] fuzz and testgen

---
 fuzz/collect_fuzz_python.py | 385 ++++++++++++++++++++++++------------
 1 file changed, 258 insertions(+), 127 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index 8dc5350..7b0256c 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -1,6 +1,9 @@
 """
 用于 python 项目模糊测试（fuzzing）和测试模板转换的脚本
+
+PYTHONPATH=. python3 fuzz/collect_fuzz_python.py --pipeline build_fuzzers
 """
+from pathlib import Path
 # 导入日志记录模块，用于输出程序运行信息
 import logging
 # 从 typing 模块导入 Optional，用于类型提示，表示一个值可以是某个类型或者 None
@@ -25,6 +28,7 @@
 from difflib import SequenceMatcher
 # 从 itertools 导入 islice，用于对迭代器进行切片操作
 from itertools import islice
+from datetime import datetime
 
 
 # def transform_repos(repos: list[str], jobs: int):
@@ -78,38 +82,38 @@ def get_target_list(p: subprocess.Popen):
             return p.stdout.read().decode("utf-8").split("\n")
 
 
-def fuzz_one_target(target: tuple[str, str], timeout):
-    """
-    对单个模糊测试目标执行 `cargo fuzz run` 命令。
+# def fuzz_one_target(target: tuple[str, str], timeout):
+#     """
+#     对单个模糊测试目标执行 `cargo fuzz run` 命令。
 
-    Args:
-        target (tuple[str, str]): 一个元组，包含仓库路径和目标名称。
-        timeout (int): 模糊测试的超时时间（秒）。
+#     Args:
+#         target (tuple[str, str]): 一个元组，包含仓库路径和目标名称。
+#         timeout (int): 模糊测试的超时时间（秒）。
     
-    Returns:
-        subprocess.Popen: 启动的模糊测试子进程对象。
-    """
-    # 解包元组，获取仓库路径和目标名称
-    repo_path, target_name = target
-    # 创建一个文件用于存放该目标的模糊测试输入
-    with open(pjoin(repo_path, "fuzz_inputs", target_name), "w") as f:
-        # 启动一个子进程来执行模糊测试命令
-        return subprocess.Popen(
-            # todo: 研究为什么 -max_total_time 参数不起作用
-            # ["cargo", "fuzz", "run", target_name, "--", f"-max_total_time={timeout}"],
-            # 使用 bash -c 和 timeout 命令来强制实现超时功能
-            [
-                "bash",
-                "-c",
-                f"timeout {timeout} cargo fuzz run {target_name}",
-            ],
-            cwd=repo_path,  # 在指定的仓库路径下执行命令
-            stdout=f,  # 将标准输出重定向到文件
-            stderr=subprocess.DEVNULL,  # 丢弃标准错误输出
-        )
+#     Returns:
+#         subprocess.Popen: 启动的模糊测试子进程对象。
+#     """
+#     # 解包元组，获取仓库路径和目标名称
+#     repo_path, target_name = target
+#     # 创建一个文件用于存放该目标的模糊测试输入
+#     with open(pjoin(repo_path, "fuzz_inputs", target_name), "w") as f:
+#         # 启动一个子进程来执行模糊测试命令
+#         return subprocess.Popen(
+#             # todo: 研究为什么 -max_total_time 参数不起作用
+#             # ["cargo", "fuzz", "run", target_name, "--", f"-max_total_time={timeout}"],
+#             # 使用 bash -c 和 timeout 命令来强制实现超时功能
+#             [
+#                 "bash",
+#                 "-c",
+#                 f"timeout {timeout} python3 infra/helper.py run_fuzzer {target_name}",
+#             ],
+#             cwd=repo_path,  # 在指定的仓库路径下执行命令
+#             stdout=f,  # 将标准输出重定向到文件
+#             stderr=subprocess.DEVNULL,  # 丢弃标准错误输出
+#         )
 
 
-from datetime import datetime
+# from datetime import datetime
 
 def build_image(repos: list[str], jobs: int):
     """
@@ -179,82 +183,213 @@ def _build_cmd(path: str):
     )
 
 
+# 添加新的目标发现函数
+def discover_targets(project_name: str, oss_fuzz_dir: Path, logger: logging.Logger) -> list[str]:
+    """Discover fuzz targets for a project (starting with 'fuzz_', ending with 'print1', no extension, and executable)"""
+    out_dir = oss_fuzz_dir / "build" / "out" / project_name
+    targets: list[str] = []
+    logger = logger.getChild("discover_targets")  # 使用子日志器
+
+    if not out_dir.is_dir():
+        logger.warning(f"Build output directory for project {project_name} does not exist: {out_dir}")
+        return targets
+
+    try:
+        for f in out_dir.iterdir():
+            try:
+                if (f.is_file() and
+                        f.name.startswith("fuzz_") and
+                        '.' not in f.name and
+                        f.name.endswith("print1") and
+                        os.access(f, os.X_OK)):
+                    logger.info(f"🔍 Discovered target: {f.name}")
+                    targets.append(f.name)
+            except OSError as e:
+                logger.warning(f"⚠️ Error while checking file {f.name}, skipped: {e}")
+        logger.info(f"🎯 Found {len(targets)} valid targets for {project_name}")
+
+    except PermissionError:
+        logger.error(f"🔒 Insufficient permissions to access directory: {out_dir}")
+    except OSError as e:
+        logger.exception(f"💥 Operating system error occurred while discovering targets: {e}")
+    
+    return targets
+
+
+# 重写fuzz_repos函数使用新的目标发现机制
 def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60):
     """
-    对一组仓库执行模糊测试。
-
+    对一组仓库执行模糊测试（使用新的目标发现机制和infra/helper.py run_fuzzer）。
+    
     Args:
         repos (list[str]): 仓库路径列表。
         jobs (int): 并行任务数。
-        timeout (int, optional): 每个目标的模糊测试超时时间（秒）。默认为 60。
+        timeout (int, optional): 每个目标的模糊测试超时时间（秒）。默认为 60.
     """
+    # 删除有问题的旧代码（f"timeout {timeout} cargo fuzz run {target_name}"）
+    
     # 记录日志，说明正在收集所有模糊测试目标
-    logging.info("Collecting all fuzz targets")
-
-    # 并行执行 `cargo fuzz list` 来获取所有仓库的模糊测试目标
-    target_map = parallel_subprocess(
-        repos,
-        jobs,
-        lambda path: subprocess.Popen(
-            ["cargo", "fuzz", "list"], cwd=path, stdout=subprocess.PIPE
-        ),
-        on_exit=get_target_list, # 使用 get_target_list 函数处理每个子进程的输出
-    )
-    # 将 `target_map` 整理成一个 (仓库路径, 目标名称) 的元组列表
+    logging.info("🔍 使用infra/helper.py方法发现模糊测试目标")
+    
+    # 确保日志目录存在
+    log_dir = Path("fuzz_run_logs")
+    log_dir.mkdir(exist_ok=True, parents=True)
+    
+    # 定义获取目标列表的函数
+    def get_targets_for_repo(repo: str) -> list[str]:
+        """获取单个仓库的目标列表"""
+        project_name = os.path.basename(repo)
+        oss_fuzz_dir = Path(repo).parent.parent
+        logger = logging.getLogger(f"targets.{project_name}")
+        return discover_targets(project_name, oss_fuzz_dir, logger)
+    
+    # 并行获取目标列表
+    with ProcessingPool(jobs) as p:
+        targets_list = list(tqdm(
+            p.map(get_targets_for_repo, repos),
+            total=len(repos),
+            desc="Discovering targets"
+        ))
+    
+    # 创建目标映射
+    target_map = {repo: targets for repo, targets in zip(repos, targets_list)}
+    
+    # 将目标映射整理成元组列表
     targets: list[tuple[str, str]] = [
         (k, v) for k, vs in target_map.items() for v in vs if len(v) > 0
     ]
+    
+    # 记录发现的目标数量
+    logging.info(f"🎯 在 {len(repos)} 个项目中发现了 {len(targets)} 个目标")
+    
     # 为每个仓库创建存放模糊测试输入的目录
     for repo in repos:
-        os.makedirs(pjoin(repo, "fuzz_inputs"), exist_ok=True)
-
-    # 记录日志，说明即将开始模糊测试
-    logging.info(f"Running cargo fuzz on {len(targets)} targets for {timeout} seconds")
+        inputs_dir = pjoin(repo, "fuzz_inputs")
+        os.makedirs(inputs_dir, exist_ok=True)
+    
+    # 启动模糊测试
+    logging.info(f"🚀 开始在 {len(targets)} 个目标上运行模糊测试（每个目标 {timeout} 秒）")
+    
     # 并行执行模糊测试
-    parallel_subprocess(
-        targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None
+    results = parallel_subprocess(
+        targets, 
+        jobs, 
+        lambda p: fuzz_one_target(p, timeout), 
+        on_exit=None
+        # desc="Running fuzzers"
     )
+    
+    # 记录结果统计
+    failed = sum(1 for r in results if r is None)
+    logging.info(f"✅ 模糊测试完成：成功 {len(results) - failed} 个目标，失败 {failed} 个目标")
 
 
-def substitute_input(template: str, input_data: str, idx: int) -> str:
+# 修改fuzz_one_target函数使用infra/helper.py
+def fuzz_one_target(target: tuple[str, str], timeout: int):
     """
-    将模糊测试的输入数据替换到测试模板中。
+    对单个模糊测试目标执行模糊测试命令。
 
     Args:
-        template (str): 测试模板字符串。
-        input_data (str): 单条模糊测试输入数据。
-        idx (int): 测试用例的索引号。
+        target (tuple[str, str]): 一个元组，包含仓库路径和目标名称。
+        timeout (int): 模糊测试的超时时间（秒）。
     
     Returns:
-        str: 替换完成后的测试代码字符串。
+        subprocess.Popen: 启动的模糊测试子进程对象。
     """
-    # 替换模板中的占位符为实际的输入数据
-    return template.replace(
-        '[] ; # [doc = "This is a test template"]', f"{input_data} ; "
-    # 替换模板中的函数名为唯一的测试函数名
-    ).replace("fn test_something ()", f"fn test_{idx} ()")
-
+    # 解包元组，获取仓库路径和目标名称
+    repo_path, target_name = target
+    project_name = os.path.basename(repo_path)
+    oss_fuzz_root = os.path.dirname(os.path.dirname(repo_path))
+    
+    # 创建一个文件用于存放该目标的模糊测试输入
+    input_file_path = pjoin(repo_path, "fuzz_inputs", target_name)
+    os.makedirs(os.path.dirname(input_file_path), exist_ok=True)
+    
+    # 创建日志文件路径
+    log_dir = os.path.abspath("fuzz_run_logs")
+    os.makedirs(log_dir, exist_ok=True)
+    log_file_path = pjoin(log_dir, f"{project_name}_{target_name}.log")
+    
+    try:
+        # 启动一个子进程来执行模糊测试命令
+        return subprocess.Popen(
+            [
+                "bash",
+                "-c",
+                f"timeout {timeout} python3 infra/helper.py run_fuzzer {project_name} {target_name}"
+            ],
+            cwd=oss_fuzz_root,  # 在OSS-Fuzz根目录下执行命令
+            stdout=open(input_file_path, "w"),  # 将模糊测试输入重定向到文件
+            stderr=open(log_file_path, "w"),    # 将日志输出重定向到日志文件
+        )
+    except Exception as e:
+        logging.error(f"Error starting fuzzer for target {target_name} in project {project_name}: {e}")
+        return None
 
-def has_similar(selected: list[str], x: str, thresh: float = 0.8) -> bool:
+# 修改testgen_repos函数使用新的目标发现方法
+def testgen_repos(
+    repos: list[str],
+    jobs: int,
+    n_fuzz: int = 100,
+    strategy: str = "shuffle",
+    max_len: int = 100,
+    sim_thresh: float = 0.8,
+):
     """
-    检查字符串 `x` 是否与 `selected` 列表中的任何字符串足够相似。
+    从模糊测试的输入数据生成最终的测试用例（使用新的目标发现方法）。
 
     Args:
-        selected (list[str]): 已选择的字符串列表。
-        x (str): 待检查的字符串。
-        thresh (float, optional): 相似度阈值。默认为 0.8。
-    
-    Returns:
-        bool: 如果存在相似字符串，则返回 True，否则返回 False。
+        repos (list[str]): 仓库路径列表。
+        jobs (int): 并行任务数。
+        n_fuzz (int, optional): 每个目标要使用的模糊测试输入数量。默认为 100。
+        strategy (str, optional): 选择输入的策略。默认为 "shuffle"。
+        max_len (int, optional): 输入的最大长度。默认为 100。
+        sim_thresh (float, optional): 输入的相似度阈值。默认为 0.8。
     """
-    # 定义一个内部函数来计算两个字符串的相似度
-    def similar(a, b):
-        return SequenceMatcher(None, a, b).ratio()
-
-    # 检查列表中是否有任何一个字符串与 x 的相似度超过阈值
-    return any(map(lambda y: similar(x, y) > thresh, selected))
+    # 使用新的目标发现方法
+    def get_targets_for_repo(repo: str) -> list[str]:
+        """获取单个仓库的目标列表"""
+        project_name = os.path.basename(repo)
+        oss_fuzz_dir = Path(repo).parent.parent
+        logger = logging.getLogger(f"testgen.{project_name}")
+        return discover_targets(project_name, oss_fuzz_dir, logger)
+    
+    # 并行获取目标列表
+    with ProcessingPool(jobs) as p:
+        targets_list = list(tqdm(
+            p.map(get_targets_for_repo, repos),
+            total=len(repos),
+            desc="Discovering targets for testgen"
+        ))
+    
+    # 创建目标映射
+    target_map = {repo: targets for repo, targets in zip(repos, targets_list)}
+    
+    # 记录日志
+    logging.info("📝 Substitute fuzz data to test templates")
+    
+    # 使用进程池并行处理每个仓库的替换任务
+    with ProcessingPool(jobs) as p:
+        # 使用 tqdm 显示总体进度
+        results = list(
+            tqdm(
+                p.imap(
+                    lambda item: substitute_one_repo(
+                        item[0], item[1], n_fuzz, strategy, max_len, sim_thresh
+                    ),
+                    target_map.items(),
+                ),
+                total=len(target_map),
+                desc="Generating tests"
+            )
+        )
+    
+    # 记录完成情况
+    successful_repos = sum(1 for r in results if r is not None)
+    logging.info(f"✅ Completed test generation for {successful_repos}/{len(repos)} projects")
 
 
+# 修改substitute_one_repo以返回状态
 def substitute_one_repo(
     repo: str,
     targets: list[str],
@@ -262,7 +397,7 @@ def substitute_one_repo(
     strategy: str,
     max_len: int,
     sim_thresh: float,
-):
+) -> Optional[int]:
     """
     处理单个仓库，将其模糊测试输入替换到测试模板中以生成测试文件。
 
@@ -273,24 +408,49 @@ def substitute_one_repo(
         strategy (str): 选择输入的策略（"shuffle", "reverse" 等）。
         max_len (int): 模糊测试输入的最大长度。
         sim_thresh (float): 用于去重的相似度阈值。
+
+    Returns:
+        int: 成功处理的目标数量，或出错时为None
     """
+    logger = logging.getLogger(f"substitute.{os.path.basename(repo)}")
     # 定义模板目录和输入目录的路径
     template_dir = pjoin(repo, "tests-gen")
     input_dir = pjoin(repo, "fuzz_inputs")
+    
+    success_count = 0
+    
     # 遍历该仓库的所有模糊测试目标
     for t in targets:
-        if t == "": # 跳过空目标
+        if not t:  # 跳过空目标
             continue
 
         template_path = pjoin(template_dir, t + ".rs")
+        input_path = pjoin(input_dir, t)
+        
         try:
+            # 检查文件是否存在
+            if not os.path.exists(template_path):
+                logger.warning(f"📄 Template file not found: {template_path}")
+                continue
+                
+            if not os.path.exists(input_path):
+                logger.warning(f"📄 Input file not found: {input_path}")
+                continue
+                
             # 读取测试模板文件
             with open(template_path) as f_template:
                 template = f_template.read()
+                
             # 读取对应的模糊测试输入文件
-            with open(pjoin(input_dir, t), "r") as f_input:
+            with open(input_path, "r") as f_input:
                 # 过滤掉空的输入行
                 all_inputs = [i for i in f_input.read().splitlines() if i != "[]"]
+                
+            if not all_inputs:
+                logger.warning(f"⚠️ No valid inputs found for {t}")
+                continue
+                
+            logger.info(f"📥 Loaded {len(all_inputs)} inputs for {t}")
 
             inputs: list[str]
             # 根据不同的策略来选择输入数据
@@ -315,69 +475,40 @@ def substitute_one_repo(
                 # 默认策略：直接取前 n_fuzz 个输入
                 inputs = all_inputs[:n_fuzz]
 
+            logger.info(f"✅ Selected {len(inputs)} inputs after {strategy} strategy")
+
             # 使用选定的输入数据生成测试用例代码
             tests = [
                 substitute_input(template, input_data, i)
                 for i, input_data in enumerate(inputs)
             ]
+            
             # 定义生成的测试文件的路径
             generated_test_path = pjoin(template_dir, f"{t}.inputs.rs")
+            
             # 将生成的测试代码写入文件
             with open(generated_test_path, "w") as f_template:
                 f_template.write("\n".join(tests))
+                
+            logger.info(f"📝 Generated test file: {generated_test_path}")
 
             # 使用 rustfmt 工具格式化生成的测试文件
-            subprocess.run(["rustfmt", str(generated_test_path)], check=False)
-        except FileNotFoundError:
-            # 如果找不到模板文件，则记录一条调试信息
-            logging.debug(f"Template {template_path} not found")
-
+            fmt_result = subprocess.run(["rustfmt", generated_test_path], capture_output=True, text=True)
+            if fmt_result.returncode != 0:
+                logger.warning(f"⚠️ rustfmt failed for {generated_test_path}: {fmt_result.stderr}")
+            else:
+                logger.info("✨ Formatted with rustfmt")
+                
+            success_count += 1
+            
+        except FileNotFoundError as e:
+            logger.error(f"❌ File not found: {e}")
+        except Exception as e:
+            logger.exception(f"💥 Unexpected error processing {t}: {e}")
+            
+    return success_count if success_count > 0 else None
 
-def testgen_repos(
-    repos: list[str],
-    jobs: int,
-    n_fuzz: int = 100,
-    strategy: str = "shuffle",
-    max_len: int = 100,
-    sim_thresh: float = 0.8,
-):
-    """
-    从模糊测试的输入数据生成最终的测试用例。
 
-    Args:
-        repos (list[str]): 仓库路径列表。
-        jobs (int): 并行任务数。
-        n_fuzz (int, optional): 每个目标要使用的模糊测试输入数量。默认为 100。
-        strategy (str, optional): 选择输入的策略。默认为 "shuffle"。
-        max_len (int, optional): 输入的最大长度。默认为 100。
-        sim_thresh (float, optional): 输入的相似度阈值。默认为 0.8。
-    """
-    # 首先，获取所有仓库的模糊测试目标
-    target_map = parallel_subprocess(
-        repos,
-        jobs,
-        lambda path: subprocess.Popen(
-            ["cargo", "fuzz", "list"], cwd=path, stdout=subprocess.PIPE
-        ),
-        on_exit=get_target_list,
-        use_tqdm=False, # 不在此处显示进度条
-    )
-    # 记录日志
-    logging.info("Substitute fuzz data to test templates")
-    # 使用进程池并行处理每个仓库的替换任务
-    with ProcessingPool(jobs) as p:
-        # 使用 tqdm 显示总体进度
-        _ = list(
-            tqdm(
-                p.map(
-                    # 对 target_map 中的每个项（仓库及其目标列表）调用 substitute_one_repo
-                    lambda item: substitute_one_repo(
-                        item[0], item[1], n_fuzz, strategy, max_len, sim_thresh
-                    ),
-                    target_map.items(),
-                )
-            )
-        )
 
 
 def main(

From 779fb408a2ef8b651260715a15a653af147bb123 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 5 Aug 2025 01:04:27 +0000
Subject: [PATCH 081/134] correct run_one_target

---
 fuzz/collect_fuzz_python.py | 44 ++++++++++++++-----------------------
 1 file changed, 16 insertions(+), 28 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index 7b0256c..d4cecb8 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -284,47 +284,41 @@ def get_targets_for_repo(repo: str) -> list[str]:
     logging.info(f"✅ 模糊测试完成：成功 {len(results) - failed} 个目标，失败 {failed} 个目标")
 
 
-# 修改fuzz_one_target函数使用infra/helper.py
 def fuzz_one_target(target: tuple[str, str], timeout: int):
-    """
-    对单个模糊测试目标执行模糊测试命令。
-
-    Args:
-        target (tuple[str, str]): 一个元组，包含仓库路径和目标名称。
-        timeout (int): 模糊测试的超时时间（秒）。
-    
-    Returns:
-        subprocess.Popen: 启动的模糊测试子进程对象。
-    """
-    # 解包元组，获取仓库路径和目标名称
     repo_path, target_name = target
     project_name = os.path.basename(repo_path)
     oss_fuzz_root = os.path.dirname(os.path.dirname(repo_path))
     
-    # 创建一个文件用于存放该目标的模糊测试输入
+    # 创建输入文件路径
     input_file_path = pjoin(repo_path, "fuzz_inputs", target_name)
     os.makedirs(os.path.dirname(input_file_path), exist_ok=True)
     
     # 创建日志文件路径
-    log_dir = os.path.abspath("fuzz_run_logs")
-    os.makedirs(log_dir, exist_ok=True)
-    log_file_path = pjoin(log_dir, f"{project_name}_{target_name}.log")
+    log_dir = Path("fuzz_run_logs")
+    log_dir.mkdir(exist_ok=True, parents=True)
+    log_file_path = log_dir / f"{project_name}_{target_name}.log"
     
     try:
-        # 启动一个子进程来执行模糊测试命令
-        return subprocess.Popen(
+        # 打开输入文件和日志文件
+        input_file = open(input_file_path, "w")
+        log_file = open(log_file_path, "w")
+        
+        # 启动子进程
+        proc = subprocess.Popen(
             [
                 "bash",
                 "-c",
                 f"timeout {timeout} python3 infra/helper.py run_fuzzer {project_name} {target_name}"
             ],
-            cwd=oss_fuzz_root,  # 在OSS-Fuzz根目录下执行命令
-            stdout=open(input_file_path, "w"),  # 将模糊测试输入重定向到文件
-            stderr=open(log_file_path, "w"),    # 将日志输出重定向到日志文件
+            cwd=oss_fuzz_root,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
         )
+        return proc
     except Exception as e:
-        logging.error(f"Error starting fuzzer for target {target_name} in project {project_name}: {e}")
+        logging.error(f"Error starting fuzzer for {project_name}/{target_name}: {e}")
         return None
+    
 
 # 修改testgen_repos函数使用新的目标发现方法
 def testgen_repos(
@@ -576,9 +570,3 @@ def main(
     logging.basicConfig(level=logging.INFO)
     fire.Fire(main)
 
-# 当脚本作为主程序执行时
-if __name__ == "__main__":
-    # 配置日志记录的基本设置，级别为 INFO
-    logging.basicConfig(level=logging.INFO)
-    # 使用 fire 库将 main 函数暴露为命令行接口
-    fire.Fire(main)
\ No newline at end of file

From 0afec3b4edd24268d411f8ce5a99bda4801dcbd9 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 5 Aug 2025 04:15:21 +0000
Subject: [PATCH 082/134] fuzz ok

---
 fuzz/collect_fuzz_python.py | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index d4cecb8..1fafe0d 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -280,9 +280,9 @@ def get_targets_for_repo(repo: str) -> list[str]:
     )
     
     # 记录结果统计
-    failed = sum(1 for r in results if r is None)
-    logging.info(f"✅ 模糊测试完成：成功 {len(results) - failed} 个目标，失败 {failed} 个目标")
-
+    failed = sum(1 for r in results.values() if r != 0)
+    success = len(results) - failed
+    logging.info(f"✅ 模糊测试完成：成功 {success} 个目标，失败 {failed} 个目标")
 
 def fuzz_one_target(target: tuple[str, str], timeout: int):
     repo_path, target_name = target
@@ -294,10 +294,10 @@ def fuzz_one_target(target: tuple[str, str], timeout: int):
     os.makedirs(os.path.dirname(input_file_path), exist_ok=True)
     
     # 创建日志文件路径
-    log_dir = Path("fuzz_run_logs")
+    log_dir = Path("fuzz_run_logs3")
     log_dir.mkdir(exist_ok=True, parents=True)
     log_file_path = log_dir / f"{project_name}_{target_name}.log"
-    
+    logging.info(f"[START] Fuzzing: project={project_name}, target={target_name}, timeout={timeout}s")
     try:
         # 打开输入文件和日志文件
         input_file = open(input_file_path, "w")
@@ -308,16 +308,19 @@ def fuzz_one_target(target: tuple[str, str], timeout: int):
             [
                 "bash",
                 "-c",
-                f"timeout {timeout} python3 infra/helper.py run_fuzzer {project_name} {target_name}"
+                f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}"
             ],
             cwd=oss_fuzz_root,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
+            stdout=input_file,
+            stderr=log_file,
         )
+        logging.info(f"[RUNNING] Subprocess started for {project_name}/{target_name} (PID: {proc.pid})")
         return proc
     except Exception as e:
-        logging.error(f"Error starting fuzzer for {project_name}/{target_name}: {e}")
+        logging.error(f"[ERROR] Failed to start fuzzer for {project_name}/{target_name}: {e}")
         return None
+    finally:
+        logging.info(f"[END] Fuzzing launch attempt completed for {project_name}/{target_name}")
     
 
 # 修改testgen_repos函数使用新的目标发现方法
@@ -418,7 +421,7 @@ def substitute_one_repo(
         if not t:  # 跳过空目标
             continue
 
-        template_path = pjoin(template_dir, t + ".rs")
+        template_path = pjoin(template_dir, t + ".py")
         input_path = pjoin(input_dir, t)
         
         try:
@@ -509,7 +512,7 @@ def main(
     repo_id: str = "data/valid_projects3.txt",
     repo_root: str = "fuzz/oss-fuzz/projects/",
     timeout: int = 60,
-    jobs: int = 80,
+    jobs: int = 2,
     limits: Optional[int] = None,
     pipeline: str = "transform",
     n_fuzz: int = 100,

From ba61ca1c8eb825cfcbfd3b7fdc0aa4e0b6b80aaf Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 5 Aug 2025 04:24:58 +0000
Subject: [PATCH 083/134] transform

---
 fuzz/collect_fuzz_python.py | 589 ++++++++++--------------------------
 1 file changed, 164 insertions(+), 425 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index 1fafe0d..796386c 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -1,130 +1,24 @@
 """
-用于 python 项目模糊测试（fuzzing）和测试模板转换的脚本
-
-PYTHONPATH=. python3 fuzz/collect_fuzz_python.py --pipeline build_fuzzers
+用于Python项目模糊测试(fuzzing)和测试模板转换的脚本
 """
 from pathlib import Path
-# 导入日志记录模块，用于输出程序运行信息
 import logging
-# 从 typing 模块导入 Optional，用于类型提示，表示一个值可以是某个类型或者 None
-from typing import Optional
-# 导入 fire 库，用于快速创建命令行界面
+from typing import Optional, List, Tuple
 import fire
-# 导入 os 模块，用于与操作系统交互，如文件路径操作
 import os
-# 从自定义的 UniTSyn 工具库中导入辅助函数
 from UniTSyn.frontend.util import wrap_repo, parallel_subprocess
-# 导入 subprocess 模块，用于创建和管理子进程
 import subprocess
-# 从 os.path 中导入 join 和 abspath，分别用于拼接路径和获取绝对路径
 from os.path import join as pjoin, abspath
-# 导入 tqdm 库，用于显示进度条
 from tqdm import tqdm
-# 从 pathos.multiprocessing 导入 ProcessingPool，用于创建进程池以实现并行处理
 from pathos.multiprocessing import ProcessingPool
-# 导入 random 模块，用于生成随机数
 import random
-# 从 difflib 导入 SequenceMatcher，用于比较序列（如字符串）的相似度
 from difflib import SequenceMatcher
-# 从 itertools 导入 islice，用于对迭代器进行切片操作
 from itertools import islice
 from datetime import datetime
 
-
-# def transform_repos(repos: list[str], jobs: int):
-#     """
-#     对一组仓库执行 `rust-fuzzer-gen` 命令，以生成模糊测试模板。
-
-#     Args:
-#         repos (list[str]): 包含多个仓库路径的列表。
-#         jobs (int): 并行执行的任务数量。
-#     """
-#     def transform_one_repo(repo_path: str):
-#         """
-#         对单个仓库启动 `rust-fuzzer-gen` 进程。
-
-#         Args:
-#             repo_path (str): 单个仓库的路径。
-        
-#         Returns:
-#             subprocess.Popen: 启动的子进程对象。
-#         """
-#         # 启动一个子进程来执行 `rust-fuzzer-gen` 命令
-#         return subprocess.Popen(
-#             ["rust-fuzzer-gen", repo_path],
-#             stdout=subprocess.PIPE,  # 捕获标准输出
-#             stderr=subprocess.PIPE,  # 捕获标准错误
-#         )
-
-#     # 记录日志，说明正在对多少个仓库进行操作
-#     logging.info(f"Running rust-fuzz-gen on {len(repos)} repos")
-#     # 使用并行处理工具来同时对多个仓库执行 transform_one_repo 函数
-#     parallel_subprocess(repos, jobs, transform_one_repo, on_exit=None)
-
-
-def get_target_list(p: subprocess.Popen):
-    """
-    从子进程的输出中解析出模糊测试目标列表。
-
-    Args:
-        p (subprocess.Popen): 一个已完成的子进程对象。
-    
-    Returns:
-        list[str]: 模糊测试目标的名称列表。
-    """
-    # 使用 match 语句检查子进程的标准输出
-    match p.stdout:
-        # 如果标准输出为 None，则返回空列表
-        case None:
-            return []
-        # 否则，读取标准输出，解码为 UTF-8 字符串，并按换行符分割成列表
-        case _:
-            return p.stdout.read().decode("utf-8").split("\n")
-
-
-# def fuzz_one_target(target: tuple[str, str], timeout):
-#     """
-#     对单个模糊测试目标执行 `cargo fuzz run` 命令。
-
-#     Args:
-#         target (tuple[str, str]): 一个元组，包含仓库路径和目标名称。
-#         timeout (int): 模糊测试的超时时间（秒）。
-    
-#     Returns:
-#         subprocess.Popen: 启动的模糊测试子进程对象。
-#     """
-#     # 解包元组，获取仓库路径和目标名称
-#     repo_path, target_name = target
-#     # 创建一个文件用于存放该目标的模糊测试输入
-#     with open(pjoin(repo_path, "fuzz_inputs", target_name), "w") as f:
-#         # 启动一个子进程来执行模糊测试命令
-#         return subprocess.Popen(
-#             # todo: 研究为什么 -max_total_time 参数不起作用
-#             # ["cargo", "fuzz", "run", target_name, "--", f"-max_total_time={timeout}"],
-#             # 使用 bash -c 和 timeout 命令来强制实现超时功能
-#             [
-#                 "bash",
-#                 "-c",
-#                 f"timeout {timeout} python3 infra/helper.py run_fuzzer {target_name}",
-#             ],
-#             cwd=repo_path,  # 在指定的仓库路径下执行命令
-#             stdout=f,  # 将标准输出重定向到文件
-#             stderr=subprocess.DEVNULL,  # 丢弃标准错误输出
-#         )
-
-
-# from datetime import datetime
-
 def build_image(repos: list[str], jobs: int):
-    """
-    构建每个仓库对应的 OSS-Fuzz 项目的 Docker 镜像，并将构建日志写入 fuzz_pipeline_log 目录。
-
-    Args:
-        repos (list[str]): 仓库路径列表（每个应包含一个已接入 OSS-Fuzz 的项目）。
-        jobs (int): 并行任务数。
-    """
+    """构建每个仓库对应的OSS-Fuzz项目的Docker镜像"""
     logging.info(f"Building Docker images for {len(repos)} OSS-Fuzz projects")
-
     log_dir = os.path.abspath("fuzz_pipeline_log")
     os.makedirs(log_dir, exist_ok=True)
 
@@ -132,8 +26,6 @@ def _build_cmd(path: str):
         project_name = os.path.basename(path.rstrip("/"))
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         log_file = os.path.join(log_dir, f"{project_name}_{timestamp}.log")
-
-        logging.info(f"Start building {project_name}, logging to {log_file}")
         return subprocess.Popen(
             f"yes | python3 infra/helper.py build_image {project_name}",
             cwd=os.path.abspath(os.path.join(path, "../../")),
@@ -142,22 +34,11 @@ def _build_cmd(path: str):
             shell=True,
         )
 
-    _ = parallel_subprocess(
-        repos,
-        jobs,
-        _build_cmd,
-        on_exit=None,
-    )
+    _ = parallel_subprocess(repos, jobs, _build_cmd, on_exit=None)
+
 def build_fuzzer(repos: list[str], jobs: int):
-    """
-    对构建成功的项目并行构建模糊测试器
-    
-    Args:
-        repos (list[str]): 仓库路径列表（每个应包含一个已接入 OSS-Fuzz 的项目）。
-        jobs (int): 并行任务数。
-    """
+    """对构建成功的项目并行构建模糊测试器"""
     logging.info(f"Building fuzzers for {len(repos)} OSS-Fuzz projects")
-    
     log_dir = os.path.abspath("fuzz_pipeline_log")
     os.makedirs(log_dir, exist_ok=True)
 
@@ -165,8 +46,6 @@ def _build_cmd(path: str):
         project_name = os.path.basename(path.rstrip("/"))
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         log_file = os.path.join(log_dir, f"{project_name}_fuzzer_{timestamp}.log")
-
-        logging.info(f"Start building fuzzers for {project_name}, logging to {log_file}")
         return subprocess.Popen(
             f"python3 infra/helper.py build_fuzzers --sanitizer address {project_name}",
             cwd=os.path.abspath(os.path.join(path, "../../")),
@@ -175,218 +54,132 @@ def _build_cmd(path: str):
             shell=True,
         )
 
-    _ = parallel_subprocess(
-        repos,
-        jobs,
-        _build_cmd,
-        on_exit=None,
-    )
-
+    _ = parallel_subprocess(repos, jobs, _build_cmd, on_exit=None)
 
-# 添加新的目标发现函数
-def discover_targets(project_name: str, oss_fuzz_dir: Path, logger: logging.Logger) -> list[str]:
-    """Discover fuzz targets for a project (starting with 'fuzz_', ending with 'print1', no extension, and executable)"""
+def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]:
+    """发现模糊测试目标"""
     out_dir = oss_fuzz_dir / "build" / "out" / project_name
-    targets: list[str] = []
-    logger = logger.getChild("discover_targets")  # 使用子日志器
-
+    targets = []
+    
     if not out_dir.is_dir():
-        logger.warning(f"Build output directory for project {project_name} does not exist: {out_dir}")
+        logging.warning(f"Build output directory for {project_name} does not exist")
         return targets
 
     try:
         for f in out_dir.iterdir():
-            try:
-                if (f.is_file() and
-                        f.name.startswith("fuzz_") and
-                        '.' not in f.name and
-                        f.name.endswith("print1") and
-                        os.access(f, os.X_OK)):
-                    logger.info(f"🔍 Discovered target: {f.name}")
-                    targets.append(f.name)
-            except OSError as e:
-                logger.warning(f"⚠️ Error while checking file {f.name}, skipped: {e}")
-        logger.info(f"🎯 Found {len(targets)} valid targets for {project_name}")
-
-    except PermissionError:
-        logger.error(f"🔒 Insufficient permissions to access directory: {out_dir}")
-    except OSError as e:
-        logger.exception(f"💥 Operating system error occurred while discovering targets: {e}")
+            if (f.is_file() and f.name.startswith("fuzz_") and 
+                '.' not in f.name and f.name.endswith("print1") and 
+                os.access(f, os.X_OK)):
+                targets.append(f.name)
+    except Exception as e:
+        logging.error(f"Error discovering targets: {e}")
     
     return targets
 
-
-# 重写fuzz_repos函数使用新的目标发现机制
-def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60):
-    """
-    对一组仓库执行模糊测试（使用新的目标发现机制和infra/helper.py run_fuzzer）。
-    
-    Args:
-        repos (list[str]): 仓库路径列表。
-        jobs (int): 并行任务数。
-        timeout (int, optional): 每个目标的模糊测试超时时间（秒）。默认为 60.
-    """
-    # 删除有问题的旧代码（f"timeout {timeout} cargo fuzz run {target_name}"）
+def fuzz_one_target(target: tuple[str, str], timeout: int):
+    """对单个模糊测试目标执行模糊测试"""
+    repo_path, target_name = target
+    project_name = os.path.basename(repo_path)
+    oss_fuzz_root = os.path.dirname(os.path.dirname(repo_path))
     
-    # 记录日志，说明正在收集所有模糊测试目标
-    logging.info("🔍 使用infra/helper.py方法发现模糊测试目标")
+    # 创建输入文件路径
+    input_file_path = pjoin(repo_path, "fuzz_inputs", target_name)
+    os.makedirs(os.path.dirname(input_file_path), exist_ok=True)
     
-    # 确保日志目录存在
-    log_dir = Path("fuzz_run_logs")
-    log_dir.mkdir(exist_ok=True, parents=True)
+    try:
+        with open(input_file_path, "w") as input_file:
+            return subprocess.Popen(
+                [
+                    "bash",
+                    "-c",
+                    f"timeout {timeout} python3 infra/helper.py run_fuzzer {project_name} {target_name}"
+                ],
+                cwd=oss_fuzz_root,
+                stdout=input_file,
+                stderr=subprocess.DEVNULL,
+            )
+    except Exception as e:
+        logging.error(f"Error starting fuzzer: {e}")
+        return None
+
+def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60):
+    """对一组仓库执行模糊测试"""
+    logging.info("Discovering fuzz targets")
     
-    # 定义获取目标列表的函数
-    def get_targets_for_repo(repo: str) -> list[str]:
-        """获取单个仓库的目标列表"""
+    # 获取所有目标
+    targets_list = []
+    for repo in repos:
         project_name = os.path.basename(repo)
         oss_fuzz_dir = Path(repo).parent.parent
-        logger = logging.getLogger(f"targets.{project_name}")
-        return discover_targets(project_name, oss_fuzz_dir, logger)
-    
-    # 并行获取目标列表
-    with ProcessingPool(jobs) as p:
-        targets_list = list(tqdm(
-            p.map(get_targets_for_repo, repos),
-            total=len(repos),
-            desc="Discovering targets"
-        ))
+        targets = discover_targets(project_name, oss_fuzz_dir)
+        targets_list.append(targets)
     
     # 创建目标映射
     target_map = {repo: targets for repo, targets in zip(repos, targets_list)}
-    
-    # 将目标映射整理成元组列表
     targets: list[tuple[str, str]] = [
-        (k, v) for k, vs in target_map.items() for v in vs if len(v) > 0
+        (k, v) for k, vs in target_map.items() for v in vs
     ]
     
-    # 记录发现的目标数量
-    logging.info(f"🎯 在 {len(repos)} 个项目中发现了 {len(targets)} 个目标")
+    logging.info(f"Running fuzzing on {len(targets)} targets")
     
-    # 为每个仓库创建存放模糊测试输入的目录
+    # 创建输入目录
     for repo in repos:
-        inputs_dir = pjoin(repo, "fuzz_inputs")
-        os.makedirs(inputs_dir, exist_ok=True)
-    
-    # 启动模糊测试
-    logging.info(f"🚀 开始在 {len(targets)} 个目标上运行模糊测试（每个目标 {timeout} 秒）")
+        os.makedirs(pjoin(repo, "fuzz_inputs"), exist_ok=True)
     
     # 并行执行模糊测试
-    results = parallel_subprocess(
-        targets, 
-        jobs, 
-        lambda p: fuzz_one_target(p, timeout), 
-        on_exit=None
-        # desc="Running fuzzers"
-    )
-    
-    # 记录结果统计
-    failed = sum(1 for r in results.values() if r != 0)
-    success = len(results) - failed
-    logging.info(f"✅ 模糊测试完成：成功 {success} 个目标，失败 {failed} 个目标")
+    parallel_subprocess(targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None)
 
-def fuzz_one_target(target: tuple[str, str], timeout: int):
-    repo_path, target_name = target
-    project_name = os.path.basename(repo_path)
-    oss_fuzz_root = os.path.dirname(os.path.dirname(repo_path))
+def generate_test_template(target_name: str, repo_path: str):
+    """为单个目标生成测试模板"""
+    template_dir = pjoin(repo_path, "tests-gen")
+    os.makedirs(template_dir, exist_ok=True)
+    template_path = pjoin(template_dir, f"{target_name}.rs")
     
-    # 创建输入文件路径
-    input_file_path = pjoin(repo_path, "fuzz_inputs", target_name)
-    os.makedirs(os.path.dirname(input_file_path), exist_ok=True)
+    # 基本测试模板
+    template = f"""
+    #[test]
+    fn test_{target_name}() {{
+        // 测试逻辑将在这里生成
+        let input = []; // 模糊测试输入将替换这里
+        let result = process_input(&input);
+        assert!(result.is_ok());
+    }}
+    """
     
-    # 创建日志文件路径
-    log_dir = Path("fuzz_run_logs3")
-    log_dir.mkdir(exist_ok=True, parents=True)
-    log_file_path = log_dir / f"{project_name}_{target_name}.log"
-    logging.info(f"[START] Fuzzing: project={project_name}, target={target_name}, timeout={timeout}s")
-    try:
-        # 打开输入文件和日志文件
-        input_file = open(input_file_path, "w")
-        log_file = open(log_file_path, "w")
-        
-        # 启动子进程
-        proc = subprocess.Popen(
-            [
-                "bash",
-                "-c",
-                f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}"
-            ],
-            cwd=oss_fuzz_root,
-            stdout=input_file,
-            stderr=log_file,
-        )
-        logging.info(f"[RUNNING] Subprocess started for {project_name}/{target_name} (PID: {proc.pid})")
-        return proc
-    except Exception as e:
-        logging.error(f"[ERROR] Failed to start fuzzer for {project_name}/{target_name}: {e}")
-        return None
-    finally:
-        logging.info(f"[END] Fuzzing launch attempt completed for {project_name}/{target_name}")
+    with open(template_path, "w") as f:
+        f.write(template)
     
+    return template_path
 
-# 修改testgen_repos函数使用新的目标发现方法
-def testgen_repos(
-    repos: list[str],
-    jobs: int,
-    n_fuzz: int = 100,
-    strategy: str = "shuffle",
-    max_len: int = 100,
-    sim_thresh: float = 0.8,
-):
-    """
-    从模糊测试的输入数据生成最终的测试用例（使用新的目标发现方法）。
-
-    Args:
-        repos (list[str]): 仓库路径列表。
-        jobs (int): 并行任务数。
-        n_fuzz (int, optional): 每个目标要使用的模糊测试输入数量。默认为 100。
-        strategy (str, optional): 选择输入的策略。默认为 "shuffle"。
-        max_len (int, optional): 输入的最大长度。默认为 100。
-        sim_thresh (float, optional): 输入的相似度阈值。默认为 0.8。
-    """
-    # 使用新的目标发现方法
-    def get_targets_for_repo(repo: str) -> list[str]:
-        """获取单个仓库的目标列表"""
+def transform_repos(repos: list[str], jobs: int):
+    """为所有目标生成测试模板"""
+    logging.info("Generating test templates")
+    
+    def _transform_repo(repo: str):
         project_name = os.path.basename(repo)
         oss_fuzz_dir = Path(repo).parent.parent
-        logger = logging.getLogger(f"testgen.{project_name}")
-        return discover_targets(project_name, oss_fuzz_dir, logger)
+        targets = discover_targets(project_name, oss_fuzz_dir)
+        return [generate_test_template(t, repo) for t in targets]
     
-    # 并行获取目标列表
     with ProcessingPool(jobs) as p:
-        targets_list = list(tqdm(
-            p.map(get_targets_for_repo, repos),
-            total=len(repos),
-            desc="Discovering targets for testgen"
-        ))
-    
-    # 创建目标映射
-    target_map = {repo: targets for repo, targets in zip(repos, targets_list)}
-    
-    # 记录日志
-    logging.info("📝 Substitute fuzz data to test templates")
-    
-    # 使用进程池并行处理每个仓库的替换任务
-    with ProcessingPool(jobs) as p:
-        # 使用 tqdm 显示总体进度
-        results = list(
-            tqdm(
-                p.imap(
-                    lambda item: substitute_one_repo(
-                        item[0], item[1], n_fuzz, strategy, max_len, sim_thresh
-                    ),
-                    target_map.items(),
-                ),
-                total=len(target_map),
-                desc="Generating tests"
-            )
-        )
-    
-    # 记录完成情况
-    successful_repos = sum(1 for r in results if r is not None)
-    logging.info(f"✅ Completed test generation for {successful_repos}/{len(repos)} projects")
+        return list(p.map(_transform_repo, repos))
+
+def substitute_input(template: str, input_data: str, idx: int) -> str:
+    """将模糊测试输入替换到测试模板中"""
+    return template.replace(
+        'let input = []; // 模糊测试输入将替换这里',
+        f"let input = {input_data};"
+    ).replace(
+        f"fn test_",
+        f"fn test_{idx}_"
+    )
 
+def has_similar(selected: list[str], x: str, thresh: float = 0.8) -> bool:
+    """检查字符串是否与已选列表中的任何字符串足够相似"""
+    def similar(a, b):
+        return SequenceMatcher(None, a, b).ratio()
+    return any(similar(x, y) > thresh for y in selected)
 
-# 修改substitute_one_repo以返回状态
 def substitute_one_repo(
     repo: str,
     targets: list[str],
@@ -394,182 +187,128 @@ def substitute_one_repo(
     strategy: str,
     max_len: int,
     sim_thresh: float,
-) -> Optional[int]:
-    """
-    处理单个仓库，将其模糊测试输入替换到测试模板中以生成测试文件。
-
-    Args:
-        repo (str): 仓库路径。
-        targets (list[str]): 该仓库的模糊测试目标列表。
-        n_fuzz (int): 要使用的模糊测试输入数量。
-        strategy (str): 选择输入的策略（"shuffle", "reverse" 等）。
-        max_len (int): 模糊测试输入的最大长度。
-        sim_thresh (float): 用于去重的相似度阈值。
-
-    Returns:
-        int: 成功处理的目标数量，或出错时为None
-    """
-    logger = logging.getLogger(f"substitute.{os.path.basename(repo)}")
-    # 定义模板目录和输入目录的路径
+):
+    """处理单个仓库，将模糊测试输入替换到测试模板中"""
     template_dir = pjoin(repo, "tests-gen")
     input_dir = pjoin(repo, "fuzz_inputs")
     
-    success_count = 0
-    
-    # 遍历该仓库的所有模糊测试目标
     for t in targets:
-        if not t:  # 跳过空目标
-            continue
-
-        template_path = pjoin(template_dir, t + ".py")
+        template_path = pjoin(template_dir, f"{t}.rs")
         input_path = pjoin(input_dir, t)
         
         try:
-            # 检查文件是否存在
-            if not os.path.exists(template_path):
-                logger.warning(f"📄 Template file not found: {template_path}")
-                continue
-                
-            if not os.path.exists(input_path):
-                logger.warning(f"📄 Input file not found: {input_path}")
-                continue
-                
-            # 读取测试模板文件
             with open(template_path) as f_template:
                 template = f_template.read()
                 
-            # 读取对应的模糊测试输入文件
             with open(input_path, "r") as f_input:
-                # 过滤掉空的输入行
-                all_inputs = [i for i in f_input.read().splitlines() if i != "[]"]
-                
-            if not all_inputs:
-                logger.warning(f"⚠️ No valid inputs found for {t}")
-                continue
-                
-            logger.info(f"📥 Loaded {len(all_inputs)} inputs for {t}")
-
-            inputs: list[str]
-            # 根据不同的策略来选择输入数据
+                all_inputs = [i for i in f_input.read().splitlines() if i]
+            
+            # 选择输入策略
             if strategy == "shuffle":
-                # 随机打乱所有输入
                 random.shuffle(all_inputs)
-                # 过滤掉过长的输入，并取前 n_fuzz 个
-                inputs = list(
-                    islice(filter(lambda x: len(x) < max_len, all_inputs), n_fuzz)
-                )
+                inputs = list(islice(
+                    (x for x in all_inputs if len(x) < max_len), n_fuzz))
             elif strategy == "reverse":
-                # 从后往前选择，同时进行去重和长度过滤
                 inputs = []
                 for x in reversed(all_inputs):
-                    if len(inputs) >= n_fuzz: # 如果已选够，则停止
+                    if len(inputs) >= n_fuzz:
                         break
-                    # 如果输入过长或与已选输入相似，则跳过
                     if len(x) > max_len or has_similar(inputs, x, sim_thresh):
                         continue
                     inputs.append(x)
             else:
-                # 默认策略：直接取前 n_fuzz 个输入
                 inputs = all_inputs[:n_fuzz]
-
-            logger.info(f"✅ Selected {len(inputs)} inputs after {strategy} strategy")
-
-            # 使用选定的输入数据生成测试用例代码
+            
+            # 生成测试用例
             tests = [
                 substitute_input(template, input_data, i)
                 for i, input_data in enumerate(inputs)
             ]
             
-            # 定义生成的测试文件的路径
-            generated_test_path = pjoin(template_dir, f"{t}.inputs.rs")
-            
-            # 将生成的测试代码写入文件
-            with open(generated_test_path, "w") as f_template:
-                f_template.write("\n".join(tests))
-                
-            logger.info(f"📝 Generated test file: {generated_test_path}")
-
-            # 使用 rustfmt 工具格式化生成的测试文件
-            fmt_result = subprocess.run(["rustfmt", generated_test_path], capture_output=True, text=True)
-            if fmt_result.returncode != 0:
-                logger.warning(f"⚠️ rustfmt failed for {generated_test_path}: {fmt_result.stderr}")
-            else:
-                logger.info("✨ Formatted with rustfmt")
+            # 写入生成的测试文件
+            generated_path = pjoin(template_dir, f"{t}.inputs.rs")
+            with open(generated_path, "w") as f:
+                f.write("\n".join(tests))
                 
-            success_count += 1
+            # 格式化代码
+            subprocess.run(["rustfmt", generated_path], check=False)
             
-        except FileNotFoundError as e:
-            logger.error(f"❌ File not found: {e}")
         except Exception as e:
-            logger.exception(f"💥 Unexpected error processing {t}: {e}")
-            
-    return success_count if success_count > 0 else None
-
-
+            logging.error(f"Error processing {t}: {e}")
 
+def testgen_repos(
+    repos: list[str],
+    jobs: int,
+    n_fuzz: int = 100,
+    strategy: str = "shuffle",
+    max_len: int = 100,
+    sim_thresh: float = 0.8,
+):
+    """从模糊测试输入生成测试用例"""
+    # 首先获取所有目标
+    targets_list = []
+    for repo in repos:
+        project_name = os.path.basename(repo)
+        oss_fuzz_dir = Path(repo).parent.parent
+        targets = discover_targets(project_name, oss_fuzz_dir)
+        targets_list.append(targets)
+    
+    target_map = {repo: targets for repo, targets in zip(repos, targets_list)}
+    
+    # 并行处理每个仓库
+    with ProcessingPool(jobs) as p:
+        list(p.map(
+            lambda item: substitute_one_repo(
+                item[0], item[1], n_fuzz, strategy, max_len, sim_thresh
+            ),
+            target_map.items()
+        ))
 
 def main(
     repo_id: str = "data/valid_projects3.txt",
     repo_root: str = "fuzz/oss-fuzz/projects/",
     timeout: int = 60,
-    jobs: int = 2,
-    limits: Optional[int] = None,
-    pipeline: str = "transform",
+    jobs: int = 4,
+    pipeline: str = "all",
     n_fuzz: int = 100,
     strategy: str = "shuffle",
     max_len: int = 100,
     sim_thresh: float = 0.8,
 ):
-    """
-    从多个 OSS-Fuzz 项目中执行指定阶段（构建镜像、模糊测试、测试生成等）的主函数。
-
-    Args:
-        repo_id (str): 文件路径，包含 OSS-Fuzz 项目名称（每行一个）。
-        repo_root (str): 所有 OSS-Fuzz 项目所在的根目录。
-        timeout (int): 模糊测试的超时时间。
-        jobs (int): 并行任务数。
-        limits (Optional[int]): 处理项目数量的上限。
-        pipeline (str): 执行阶段：build_image, fuzz, testgen, all。
-        n_fuzz, strategy, max_len, sim_thresh: testgen 参数。
-    """
+    """主函数，控制整个模糊测试流程"""
     try:
         with open(repo_id, "r") as f:
             repo_id_list = [line.strip() for line in f if line.strip()]
     except FileNotFoundError:
         repo_id_list = [repo_id]
 
-    if limits is not None:
-        repo_id_list = repo_id_list[:limits]
-
-    logging.info(f"Loaded {len(repo_id_list)} repos to be processed")
-
-    logging.info("Collecting all OSS-Fuzz project directories")
+    # 收集仓库路径
     repos = []
     for repo_id in repo_id_list:
         repo_path = abspath(os.path.join(repo_root, repo_id))
         if os.path.isdir(repo_path):
             repos.append(repo_path)
 
-    match pipeline:
-        case "build_image":
-            build_image(repos, jobs)
-        case "build_fuzzer":
-            build_fuzzer(repos, jobs)
-        case "fuzz":
-            fuzz_repos(repos, jobs, timeout=timeout)
-        case "testgen":
-            testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh)
-        case "all":
-            build_image(repos, jobs)
-            build_fuzzer(repos, jobs)  # 在构建镜像后添加构建模糊测试器阶段
-            fuzz_repos(repos, jobs, timeout=timeout)
-            testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh)
-        case _:
-            logging.error(f"Unknown pipeline {pipeline}")
-
-
+    # 执行指定流程
+    if pipeline == "build_image":
+        build_image(repos, jobs)
+    elif pipeline == "build_fuzzer":
+        build_fuzzer(repos, jobs)
+    elif pipeline == "fuzz":
+        fuzz_repos(repos, jobs, timeout)
+    elif pipeline == "testgen":
+        testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh)
+    elif pipeline == "transform":
+        transform_repos(repos, jobs)
+    elif pipeline == "all":
+        build_image(repos, jobs)
+        build_fuzzer(repos, jobs)
+        transform_repos(repos, jobs)  # 关键添加：模板生成
+        fuzz_repos(repos, jobs, timeout)
+        testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh)
+    else:
+        logging.error(f"Unknown pipeline: {pipeline}")
 
 if __name__ == "__main__":
     logging.basicConfig(level=logging.INFO)
     fire.Fire(main)
-

From 58d1f76e3a6539e17a059abb7ce6f57eaa7e31fd Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 5 Aug 2025 04:37:46 +0000
Subject: [PATCH 084/134] testgen  need to ^ help: add `;` here

---
 fuzz/collect_fuzz_python.py | 209 +++++++++++++++++++++++++++++++-----
 1 file changed, 181 insertions(+), 28 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index 796386c..ab94676 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -15,9 +15,16 @@
 from difflib import SequenceMatcher
 from itertools import islice
 from datetime import datetime
+import re
 
 def build_image(repos: list[str], jobs: int):
-    """构建每个仓库对应的OSS-Fuzz项目的Docker镜像"""
+    """
+    构建每个仓库对应的OSS-Fuzz项目的Docker镜像
+    
+    Args:
+        repos (list[str]): 仓库路径列表
+        jobs (int): 并行任务数
+    """
     logging.info(f"Building Docker images for {len(repos)} OSS-Fuzz projects")
     log_dir = os.path.abspath("fuzz_pipeline_log")
     os.makedirs(log_dir, exist_ok=True)
@@ -26,6 +33,8 @@ def _build_cmd(path: str):
         project_name = os.path.basename(path.rstrip("/"))
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         log_file = os.path.join(log_dir, f"{project_name}_{timestamp}.log")
+
+        logging.info(f"Start building {project_name}, logging to {log_file}")
         return subprocess.Popen(
             f"yes | python3 infra/helper.py build_image {project_name}",
             cwd=os.path.abspath(os.path.join(path, "../../")),
@@ -37,7 +46,13 @@ def _build_cmd(path: str):
     _ = parallel_subprocess(repos, jobs, _build_cmd, on_exit=None)
 
 def build_fuzzer(repos: list[str], jobs: int):
-    """对构建成功的项目并行构建模糊测试器"""
+    """
+    对构建成功的项目并行构建模糊测试器
+    
+    Args:
+        repos (list[str]): 仓库路径列表
+        jobs (int): 并行任务数
+    """
     logging.info(f"Building fuzzers for {len(repos)} OSS-Fuzz projects")
     log_dir = os.path.abspath("fuzz_pipeline_log")
     os.makedirs(log_dir, exist_ok=True)
@@ -46,6 +61,8 @@ def _build_cmd(path: str):
         project_name = os.path.basename(path.rstrip("/"))
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         log_file = os.path.join(log_dir, f"{project_name}_fuzzer_{timestamp}.log")
+
+        logging.info(f"Start building fuzzers for {project_name}, logging to {log_file}")
         return subprocess.Popen(
             f"python3 infra/helper.py build_fuzzers --sanitizer address {project_name}",
             cwd=os.path.abspath(os.path.join(path, "../../")),
@@ -57,7 +74,16 @@ def _build_cmd(path: str):
     _ = parallel_subprocess(repos, jobs, _build_cmd, on_exit=None)
 
 def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]:
-    """发现模糊测试目标"""
+    """
+    发现模糊测试目标
+    
+    Args:
+        project_name (str): 项目名称
+        oss_fuzz_dir (Path): OSS-Fuzz根目录
+        
+    Returns:
+        list[str]: 目标名称列表
+    """
     out_dir = oss_fuzz_dir / "build" / "out" / project_name
     targets = []
     
@@ -77,7 +103,16 @@ def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]:
     return targets
 
 def fuzz_one_target(target: tuple[str, str], timeout: int):
-    """对单个模糊测试目标执行模糊测试"""
+    """
+    对单个模糊测试目标执行模糊测试
+    
+    Args:
+        target (tuple[str, str]): (仓库路径, 目标名称)
+        timeout (int): 超时时间(秒)
+        
+    Returns:
+        subprocess.Popen: 子进程对象
+    """
     repo_path, target_name = target
     project_name = os.path.basename(repo_path)
     oss_fuzz_root = os.path.dirname(os.path.dirname(repo_path))
@@ -103,7 +138,14 @@ def fuzz_one_target(target: tuple[str, str], timeout: int):
         return None
 
 def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60):
-    """对一组仓库执行模糊测试"""
+    """
+    对一组仓库执行模糊测试
+    
+    Args:
+        repos (list[str]): 仓库路径列表
+        jobs (int): 并行任务数
+        timeout (int): 超时时间(秒)
+    """
     logging.info("Discovering fuzz targets")
     
     # 获取所有目标
@@ -130,17 +172,26 @@ def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60):
     parallel_subprocess(targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None)
 
 def generate_test_template(target_name: str, repo_path: str):
-    """为单个目标生成测试模板"""
+    """
+    为单个目标生成测试模板
+    
+    Args:
+        target_name (str): 目标名称
+        repo_path (str): 仓库路径
+        
+    Returns:
+        str: 模板文件路径
+    """
     template_dir = pjoin(repo_path, "tests-gen")
     os.makedirs(template_dir, exist_ok=True)
     template_path = pjoin(template_dir, f"{target_name}.rs")
     
-    # 基本测试模板
+    # 基本测试模板 - 使用字节数组而不是字节字符
     template = f"""
     #[test]
     fn test_{target_name}() {{
         // 测试逻辑将在这里生成
-        let input = []; // 模糊测试输入将替换这里
+        let input = b""; // 模糊测试输入将替换这里
         let result = process_input(&input);
         assert!(result.is_ok());
     }}
@@ -152,7 +203,13 @@ def generate_test_template(target_name: str, repo_path: str):
     return template_path
 
 def transform_repos(repos: list[str], jobs: int):
-    """为所有目标生成测试模板"""
+    """
+    为所有目标生成测试模板
+    
+    Args:
+        repos (list[str]): 仓库路径列表
+        jobs (int): 并行任务数
+    """
     logging.info("Generating test templates")
     
     def _transform_repo(repo: str):
@@ -164,18 +221,67 @@ def _transform_repo(repo: str):
     with ProcessingPool(jobs) as p:
         return list(p.map(_transform_repo, repos))
 
-def substitute_input(template: str, input_data: str, idx: int) -> str:
-    """将模糊测试输入替换到测试模板中"""
-    return template.replace(
-        'let input = []; // 模糊测试输入将替换这里',
-        f"let input = {input_data};"
-    ).replace(
-        f"fn test_",
-        f"fn test_{idx}_"
+def escape_special_chars(input_data: str) -> str:
+    """
+    转义输入数据中的特殊字符
+    
+    Args:
+        input_data (str): 原始输入数据
+        
+    Returns:
+        str: 转义后的输入数据
+    """
+    # 转义反斜杠和双引号
+    escaped = input_data.replace('\\', '\\\\').replace('"', '\\"')
+    
+    # 处理非ASCII字符
+    if any(ord(c) > 127 for c in escaped):
+        # 如果包含非ASCII字符，使用字节数组表示
+        byte_array = [str(b) for b in input_data.encode()]
+        return f"b\"\" // Original: {input_data}\n        let input = vec![{', '.join(byte_array)}];"
+    
+    return f"b\"{escaped}\""
+
+def substitute_input(template: str, input_data: str, idx: int, target_name: str) -> str:
+    """
+    将模糊测试输入替换到测试模板中
+    
+    Args:
+        template (str): 模板内容
+        input_data (str): 输入数据
+        idx (int): 测试索引
+        target_name (str): 目标名称
+        
+    Returns:
+        str: 替换后的测试代码
+    """
+    # 转义特殊字符并处理非ASCII字符
+    escaped_input = escape_special_chars(input_data)
+    
+    # 替换输入占位符
+    new_template = template.replace(
+        'let input = b""; // 模糊测试输入将替换这里',
+        escaped_input
+    )
+    
+    # 替换函数名避免重复
+    return new_template.replace(
+        f"fn test_{target_name}()",
+        f"fn test_{target_name}_{idx}()"
     )
 
 def has_similar(selected: list[str], x: str, thresh: float = 0.8) -> bool:
-    """检查字符串是否与已选列表中的任何字符串足够相似"""
+    """
+    检查字符串是否与已选列表中的任何字符串足够相似
+    
+    Args:
+        selected (list[str]): 已选字符串列表
+        x (str): 待检查字符串
+        thresh (float): 相似度阈值
+        
+    Returns:
+        bool: 是否相似
+    """
     def similar(a, b):
         return SequenceMatcher(None, a, b).ratio()
     return any(similar(x, y) > thresh for y in selected)
@@ -188,21 +294,45 @@ def substitute_one_repo(
     max_len: int,
     sim_thresh: float,
 ):
-    """处理单个仓库，将模糊测试输入替换到测试模板中"""
+    """
+    处理单个仓库，将模糊测试输入替换到测试模板中
+    
+    Args:
+        repo (str): 仓库路径
+        targets (list[str]): 目标列表
+        n_fuzz (int): 使用的输入数量
+        strategy (str): 选择策略
+        max_len (int): 最大长度
+        sim_thresh (float): 相似度阈值
+    """
     template_dir = pjoin(repo, "tests-gen")
     input_dir = pjoin(repo, "fuzz_inputs")
     
-    for t in targets:
-        template_path = pjoin(template_dir, f"{t}.rs")
-        input_path = pjoin(input_dir, t)
+    for target_name in targets:  # 使用target_name作为循环变量
+        template_path = pjoin(template_dir, f"{target_name}.rs")
+        input_path = pjoin(input_dir, target_name)
         
         try:
+            if not os.path.exists(template_path):
+                logging.warning(f"Template file not found: {template_path}")
+                continue
+                
+            if not os.path.exists(input_path):
+                logging.warning(f"Input file not found: {input_path}")
+                continue
+                
             with open(template_path) as f_template:
                 template = f_template.read()
                 
             with open(input_path, "r") as f_input:
-                all_inputs = [i for i in f_input.read().splitlines() if i]
+                all_inputs = [line.strip() for line in f_input if line.strip()]
             
+            if not all_inputs:
+                logging.warning(f"No valid inputs found for {target_name}")
+                continue
+                
+            logging.info(f"Loaded {len(all_inputs)} inputs for {target_name}")
+
             # 选择输入策略
             if strategy == "shuffle":
                 random.shuffle(all_inputs)
@@ -221,12 +351,12 @@ def substitute_one_repo(
             
             # 生成测试用例
             tests = [
-                substitute_input(template, input_data, i)
+                substitute_input(template, input_data, i, target_name)  # 传递target_name
                 for i, input_data in enumerate(inputs)
             ]
             
             # 写入生成的测试文件
-            generated_path = pjoin(template_dir, f"{t}.inputs.rs")
+            generated_path = pjoin(template_dir, f"{target_name}.inputs.rs")
             with open(generated_path, "w") as f:
                 f.write("\n".join(tests))
                 
@@ -234,7 +364,7 @@ def substitute_one_repo(
             subprocess.run(["rustfmt", generated_path], check=False)
             
         except Exception as e:
-            logging.error(f"Error processing {t}: {e}")
+            logging.error(f"Error processing {target_name}: {e}")
 
 def testgen_repos(
     repos: list[str],
@@ -244,7 +374,17 @@ def testgen_repos(
     max_len: int = 100,
     sim_thresh: float = 0.8,
 ):
-    """从模糊测试输入生成测试用例"""
+    """
+    从模糊测试输入生成测试用例
+    
+    Args:
+        repos (list[str]): 仓库路径列表
+        jobs (int): 并行任务数
+        n_fuzz (int): 使用的输入数量
+        strategy (str): 选择策略
+        max_len (int): 最大长度
+        sim_thresh (float): 相似度阈值
+    """
     # 首先获取所有目标
     targets_list = []
     for repo in repos:
@@ -275,7 +415,20 @@ def main(
     max_len: int = 100,
     sim_thresh: float = 0.8,
 ):
-    """主函数，控制整个模糊测试流程"""
+    """
+    主函数，控制整个模糊测试流程
+    
+    Args:
+        repo_id (str): 项目ID文件路径
+        repo_root (str): 项目根目录
+        timeout (int): 超时时间
+        jobs (int): 并行任务数
+        pipeline (str): 流程类型
+        n_fuzz (int): 使用的输入数量
+        strategy (str): 选择策略
+        max_len (int): 最大长度
+        sim_thresh (float): 相似度阈值
+    """
     try:
         with open(repo_id, "r") as f:
             repo_id_list = [line.strip() for line in f if line.strip()]

From 92368225ed98260aecbb363ac75b2d7a3a1ee59a Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 5 Aug 2025 06:10:36 +0000
Subject: [PATCH 085/134] test successful

---
 fuzz/collect_fuzz_python.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index ab94676..7ae4933 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -238,9 +238,9 @@ def escape_special_chars(input_data: str) -> str:
     if any(ord(c) > 127 for c in escaped):
         # 如果包含非ASCII字符，使用字节数组表示
         byte_array = [str(b) for b in input_data.encode()]
-        return f"b\"\" // Original: {input_data}\n        let input = vec![{', '.join(byte_array)}];"
+        return f"let input = vec![{', '.join(byte_array)}];"
     
-    return f"b\"{escaped}\""
+    return f"let input = b\"{escaped}\";"
 
 def substitute_input(template: str, input_data: str, idx: int, target_name: str) -> str:
     """
@@ -408,7 +408,7 @@ def main(
     repo_id: str = "data/valid_projects3.txt",
     repo_root: str = "fuzz/oss-fuzz/projects/",
     timeout: int = 60,
-    jobs: int = 4,
+    jobs: int = 80,
     pipeline: str = "all",
     n_fuzz: int = 100,
     strategy: str = "shuffle",

From 310b079dcccc0bdce8937fc47e3cb637b068e124 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 5 Aug 2025 06:13:33 +0000
Subject: [PATCH 086/134] example output project

---
 UniTSyn       | 2 +-
 fuzz/oss-fuzz | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/UniTSyn b/UniTSyn
index 45c7bd1..0d9e0df 160000
--- a/UniTSyn
+++ b/UniTSyn
@@ -1 +1 @@
-Subproject commit 45c7bd1152ce420781d4b5ce6d4bf8b1e6c7b3ca
+Subproject commit 0d9e0df455655773eaf0acabd9008aa34f0e3f73
diff --git a/fuzz/oss-fuzz b/fuzz/oss-fuzz
index 4bbbeb5..8f6559b 160000
--- a/fuzz/oss-fuzz
+++ b/fuzz/oss-fuzz
@@ -1 +1 @@
-Subproject commit 4bbbeb59599ad38b7984191e2e83bc9a61f7fd4b
+Subproject commit 8f6559b916e0d7ca6e7f974394ce6f651783c163

From 5ceb873c885960c7a3651f3a1214a327154bb17a Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 5 Aug 2025 06:25:47 +0000
Subject: [PATCH 087/134] type error

---
 fuzz/collect_fuzz.py                |   1 -
 fuzz/collect_fuzz_python.py         |  10 +-
 fuzz/command_util.py                | 179 +++++++++-------------------
 fuzz/run_fuzz_all_targets_print1.py | 143 ++++++++++++++--------
 4 files changed, 155 insertions(+), 178 deletions(-)

diff --git a/fuzz/collect_fuzz.py b/fuzz/collect_fuzz.py
index bcc4b5e..95bba86 100644
--- a/fuzz/collect_fuzz.py
+++ b/fuzz/collect_fuzz.py
@@ -1,5 +1,4 @@
 """script for rust fuzzing and transforming test_template"""
-
 import logging
 from typing import Optional
 import fire
diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index 7ae4933..816cd63 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -85,7 +85,7 @@ def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]:
         list[str]: 目标名称列表
     """
     out_dir = oss_fuzz_dir / "build" / "out" / project_name
-    targets = []
+    targets: list[str] = []  
     
     if not out_dir.is_dir():
         logging.warning(f"Build output directory for {project_name} does not exist")
@@ -158,18 +158,18 @@ def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60):
     
     # 创建目标映射
     target_map = {repo: targets for repo, targets in zip(repos, targets_list)}
-    targets: list[tuple[str, str]] = [
+    all_targets: list[tuple[str, str]] = [  # 修复: 重命名变量避免冲突
         (k, v) for k, vs in target_map.items() for v in vs
     ]
     
-    logging.info(f"Running fuzzing on {len(targets)} targets")
+    logging.info(f"Running fuzzing on {len(all_targets)} targets")
     
     # 创建输入目录
     for repo in repos:
         os.makedirs(pjoin(repo, "fuzz_inputs"), exist_ok=True)
     
     # 并行执行模糊测试
-    parallel_subprocess(targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None)
+    parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None)
 
 def generate_test_template(target_name: str, repo_path: str):
     """
@@ -405,7 +405,7 @@ def testgen_repos(
         ))
 
 def main(
-    repo_id: str = "data/valid_projects3.txt",
+    repo_id: str = "data/valid_projects.txt",
     repo_root: str = "fuzz/oss-fuzz/projects/",
     timeout: int = 60,
     jobs: int = 80,
diff --git a/fuzz/command_util.py b/fuzz/command_util.py
index 8270d64..e446f42 100644
--- a/fuzz/command_util.py
+++ b/fuzz/command_util.py
@@ -1,14 +1,8 @@
-# command_util.py
-
 import subprocess
 import logging
 import time
-import os
-import pty
-import tty
-import termios
 from pathlib import Path
-from typing import Optional, Tuple, List
+from typing import Optional
 from returns.maybe import Maybe
 from errors import CommandError
 
@@ -46,94 +40,65 @@ def create_popen_object(
     
     return process
 
-def run_subprocess_with_pty(
-    cmd: str,
-    cwd: Optional[Path] = None,
-    timeout: Optional[int] = None,
-    logger: Optional[logging.Logger] = None,
-) -> Tuple[int, List[str]]:
+def parallel_subprocess(
+    tasks: list[tuple[str, Path, Optional[int], logging.Logger]]
+) -> list[tuple[subprocess.Popen, str, Path]]:
     """
-    使用伪终端执行命令，解决终端设置问题
-    - timeout: 使用 shell 的 timeout 命令处理超时
-    - logger: 用于实时打印输出
-    返回: (退出码, 输出行列表)
+    并行执行多个子进程
+    - tasks: 任务列表，每个任务是元组 (cmd, cwd, timeout, logger)
+    返回: 包含 (Popen对象, 命令, 工作目录) 的列表
     """
-    # 添加超时命令
-    if timeout and timeout > 0:
-        cmd = f"timeout {timeout}s {cmd}"
-        if logger:
-            logger.debug(f"⌛ Adding timeout ({timeout}s) to command")
-
-    # 使用伪终端执行命令
-    master_fd, slave_fd = pty.openpty()
-    
-    # 设置伪终端为原始模式
-    old_settings = termios.tcgetattr(master_fd)
-    tty.setraw(master_fd)
-    
-    process = subprocess.Popen(
-        cmd,
-        shell=True,
-        cwd=str(cwd) if cwd else None,
-        stdin=slave_fd,
-        stdout=slave_fd,
-        stderr=slave_fd,
-        close_fds=True,
-        start_new_session=True
-    )
-    
-    os.close(slave_fd)
+    processes = []
+    for cmd, cwd, timeout, logger in tasks:
+        process = create_popen_object(
+            cmd,
+            cwd=cwd,
+            capture_output=True,
+            timeout=timeout,
+            logger=logger
+        )
+        processes.append((process, cmd, cwd))
     
-    output_lines = []
-    try:
-        while True:
-            try:
-                data = os.read(master_fd, 1024)
-                if not data:
-                    break
-                decoded = data.decode("utf-8", "replace")
-                output_lines.append(decoded.strip())
-                if logger:
-                    logger.debug(decoded.strip())
-            except OSError:
-                break
-    finally:
-        # 恢复终端设置
-        termios.tcsetattr(master_fd, termios.TCSADRAIN, old_settings)
-        os.close(master_fd)
-        process.wait()
-    
-    return process.returncode, output_lines
+    return processes
 
-def run_command_fuzz_all_targets(
-    cmd: str,
-    log_msg: str,
+def wait_for_processes(
+    processes: list[tuple[subprocess.Popen, str, Path]],
     logger: logging.Logger,
-    allowed_exit_codes: Maybe[list[int]] = Maybe.empty,
-    timeout: int = 3600,
-) -> bool:
-    """run_command used in run_fuzz_all_targets_print1.py, run_fuzz_all_targets.py"""
-    logger.info(f"▶️ {log_msg}...")
-    logger.debug(f"   $ {cmd}")
-
-    # 允许超时退出码 124
-    allowed_codes = allowed_exit_codes.value_or([]) + [124]
-    
-    # 使用伪终端解决终端设置问题
-    exit_code, _ = run_subprocess_with_pty(
-        cmd, 
-        timeout=timeout, 
-        logger=logger
-    )
+    allowed_exit_codes: list[int] = [0]
+) -> list[tuple[bool, str, Path]]:
+    """
+    等待所有进程完成并处理结果
+    - processes: 进程列表
+    - logger: 日志记录器
+    - allowed_exit_codes: 允许的退出码列表
+    返回: 结果列表 (成功状态, 命令, 工作目录)
+    """
+    results = []
     
-    # 返回 124 表示超时
-    if exit_code == 124:
-        logger.warning(f"⌛ Command timed out after {timeout} seconds")
+    for process, cmd, cwd in processes:
+        # 实时读取输出
+        output_lines = []
+        while True:
+            line = process.stdout.readline()
+            if not line and process.poll() is not None:
+                break
+            if line:
+                stripped_line = line.strip()
+                output_lines.append(stripped_line)
+                if logger:
+                    logger.debug(stripped_line)
+        
+        exit_code = process.returncode
+        
+        # 检查是否超时 (124 是 timeout 命令的退出码)
+        if exit_code == 124:
+            logger.warning(f"⌛ Command timed out: {cmd}")
+        
+        # 检查是否成功
+        success = exit_code in allowed_exit_codes
+        results.append((success, cmd, cwd))
     
-    if exit_code not in [0, *allowed_codes]:
-        logger.error(f"❌ The command failed, exit code: {exit_code}")
-        return False
-    return True
+    return results
 
 def run_command_build_fuzz(
     cmd: str,
@@ -145,45 +110,15 @@ def run_command_build_fuzz(
     """run_command used in build_fuzz.py, build_fuzzers.py"""
     allowed_codes = allowed_exit_codes.value_or([0])
     cmd_str = f"yes | {cmd}" if not skip_yes else cmd
-    exit_code, _ = _run_subprocess(cmd_str, cwd=oss_fuzz_dir)
+    process = create_popen_object(cmd_str, cwd=oss_fuzz_dir)
+    process.wait()
     
     # 处理 timeout 的特殊退出码 (124)
-    exit_code = 124 if exit_code == 124 else exit_code
+    exit_code = 124 if process.returncode == 124 else process.returncode
     
     if exit_code not in allowed_codes:
         error_msg = f"The command failed (exit code: {exit_code})"
         if project:
             error_msg += f" for project: {project}"
         raise CommandError(error_msg, project=project, exit_code=exit_code)
-    return exit_code
-
-
-# def run_command_fuzz_all_targets(
-#     cmd: str,
-#     log_msg: str,
-#     logger: logging.Logger,
-#     allowed_exit_codes: Maybe[list[int]] = Maybe.empty,
-#     timeout: int = 3600,
-# ) -> bool:
-#     """run_command used in run_fuzz_all_targets_print1.py, run_fuzz_all_targets.py"""
-#     logger.info(f"▶️ {log_msg}...")
-#     logger.debug(f"   $ {cmd}")
-
-#     # 允许超时退出码 124
-#     allowed_codes = allowed_exit_codes.value_or([]) + [124]
-    
-#     exit_code, _ = _run_subprocess(
-#         cmd, 
-#         capture_output=True, 
-#         timeout=timeout, 
-#         logger=logger
-#     )
-    
-#     # 返回 124 表示超时
-#     if exit_code == 124:
-#         logger.warning(f"⌛ Command timed out after {timeout} seconds")
-    
-#     if exit_code not in [0, *allowed_codes]:
-#         logger.error(f"❌ The command failed, exit code: {exit_code}")
-#         return False
-#     return True
\ No newline at end of file
+    return exit_code
\ No newline at end of file
diff --git a/fuzz/run_fuzz_all_targets_print1.py b/fuzz/run_fuzz_all_targets_print1.py
index 04400c6..ea45ddb 100644
--- a/fuzz/run_fuzz_all_targets_print1.py
+++ b/fuzz/run_fuzz_all_targets_print1.py
@@ -8,7 +8,7 @@
 1. Discovery phase: Traverse all specified projects to collect all executable fuzz targets which start with "fuzz_" and end with "print1" in each project
 2. Execution phase: Create a task pool containing all (project, target) pairs and execute them in parallel using multiprocessing
 
-This approach maximizes CPU utilization and provides clear overall progress[2](@ref).
+This approach maximizes CPU utilization and provides clear overall progress.
 
 Usage: python3 run_fuzz_all_targets_print1.py [project_list_file] [--oss-fuzz-dir /your/custom/path/to/oss-fuzz] [--timeout seconds] [--workers N]
 Example: python3 fuzz/run_fuzz_all_targets_print1.py data/valid_projects3.txt --oss-fuzz-dir /home/jiayiguo/FuzzAug/fuzz/oss-fuzz --timeout 60 --workers 4
@@ -17,16 +17,18 @@
 
 import os
 import sys
+fuzzaug_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+sys.path.insert(0, fuzzaug_root)
 import subprocess
 import argparse
 import logging
 import time
 from datetime import datetime
 from pathlib import Path
-from multiprocessing import Pool, cpu_count
+from multiprocessing import cpu_count
 from returns.maybe import Maybe, Nothing, Some
-from command_util import run_command_fuzz_all_targets as run_command
-
+from UniTSyn.frontend.util import parallel_subprocess
+    
 
 
 def discover_targets(project_name: str, oss_fuzz_dir: Path, logger: logging.Logger) -> list[str]:
@@ -57,12 +59,12 @@ def discover_targets(project_name: str, oss_fuzz_dir: Path, logger: logging.Logg
 
     return targets
 
-
-def run_single_target(project_name: str, target_name: str, timeout: int, oss_fuzz_dir: Path) -> tuple[bool, str, str]:
-    """Execute fuzz testing workflow for a single (project, target) pair"""
+def setup_task_logger(project_name: str, target_name: str, oss_fuzz_dir: Path) -> logging.Logger:
+    """为单个任务设置日志记录器"""
     task_id = f"{project_name}_{target_name}"
     logger = logging.getLogger(task_id)
-    LOG_DIR = oss_fuzz_dir / "run_fuzz_all_targets_logs"
+    LOG_DIR = oss_fuzz_dir / "run1_fuzz_all_targets_logs_print1_parallel"
+    
     try:
         logger.setLevel(logging.DEBUG)
         LOG_DIR.mkdir(parents=True, exist_ok=True)
@@ -75,38 +77,64 @@ def run_single_target(project_name: str, target_name: str, timeout: int, oss_fuz
         )
         file_handler.setFormatter(formatter)
         logger.addHandler(file_handler)
-        os.chdir(oss_fuzz_dir)
-
+        return logger
     except (OSError, PermissionError) as e:
         print(f"❌ Critical error occurred during initialization of task {task_id}: {e}")
-        return False, project_name, target_name
-
-    logger.info(f"🚀 Starting test -> Project: {project_name}, Target: {target_name}")
-    try:
-        cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}"
-        success = run_command(
-            cmd,
-            f"Running Target '{target_name}' (timeout={timeout}s)",
-            logger,
-            allowed_exit_codes=Some([1, 124]),  # 1=Crashes found, 124=Timeout
-            timeout=timeout + 300
-        )
-
-        if success:
-            logger.info(f"✅ Target '{target_name}' completed successfully.")
-        else:
-            logger.error(f"❌ Target '{target_name}' failed.")
+        return None
 
-        return success, project_name, target_name
+def create_task_command(project_name: str, target_name: str, timeout: int) -> str:
+    """创建任务命令字符串"""
+    return f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}"
 
-    except Exception as e:
-        logger.exception(f"💥 Unexpected error occurred while running target '{target_name}': {e}")
-        return False, project_name, target_name
-    finally:
-        for handler in logger.handlers[:]:
-            handler.close()
-            logger.removeHandler(handler)
+def create_subprocess(task: tuple) -> subprocess.Popen:
+    """为每个任务创建子进程"""
+    project_name, target_name, timeout, oss_fuzz_dir, logger = task
+    cmd = create_task_command(project_name, target_name, timeout)
+    
+    # 设置日志文件，存放测试过程的输出
+    task_id = f"{project_name}_{target_name}"
+    LOG_DIR = oss_fuzz_dir / "run2_fuzz_all_targets_logs"
+    LOG_DIR.mkdir(parents=True, exist_ok=True)
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    log_file = LOG_DIR / f"run_{task_id}_{timestamp}.log"
+    
+    # 创建并返回 Popen 对象
+    process = subprocess.Popen(
+        cmd,
+        shell=True,
+        cwd=str(oss_fuzz_dir),
+        stdout=open(log_file, 'w'),
+        stderr=subprocess.STDOUT,
+        text=True,
+        encoding="utf-8",
+        errors="replace",
+    )
+    
+    # 将任务数据附加到进程对象以便后续使用
+    process.task_data = task
+    return process
 
+def on_process_exit(process: subprocess.Popen) -> tuple[bool, str, str]:
+    """处理进程退出"""
+    project_name, target_name, _, oss_fuzz_dir, logger = process.task_data
+    
+    # 等待进程结束
+    process.wait()
+    exit_code = process.returncode
+    
+    # 记录结果
+    if exit_code == 124:
+        logger.warning(f"⌛ Command timed out: {project_name}/{target_name}")
+    
+    # 检查是否成功
+    success = exit_code in [0, 1, 124]  # 0=成功, 1=发现崩溃, 124=超时
+    
+    if success:
+        logger.info(f"✅ Target '{target_name}' completed successfully.")
+    else:
+        logger.error(f"❌ Target '{target_name}' failed with exit code: {exit_code}")
+    
+    return success, project_name, target_name
 
 def main():
     # Configure main process logging
@@ -126,7 +154,6 @@ def main():
     args = parser.parse_args()
 
     OSS_FUZZ_DIR = args.oss_fuzz_dir.resolve()
-    LOG_DIR = OSS_FUZZ_DIR / "run_ds_logs"
 
     # 1. Read project list file
     try:
@@ -174,25 +201,41 @@ def main():
     logger.info("=" * 20 + " Phase 2: Parallel Fuzzing Execution " + "=" * 23)
     logger.info(f"🚀 Starting parallel testing with {args.workers} worker processes (timeout per target: {args.timeout}s)...")
 
-    # Prepare task parameters (project, target, timeout)
-    tasks_with_args = [(p, t, args.timeout, OSS_FUZZ_DIR) for p, t in all_fuzz_tasks]
-    results: list[tuple[bool, str, str]] = []  # Store results (success, project, target)
-    
-    # Execute in parallel using process pool
-    with Pool(args.workers) as pool:
-        try:
-            results = pool.starmap(run_single_target, tasks_with_args)
-        except Exception as e:
-            logger.error(f"💥 Critical error occurred during parallel execution: {e}")
-            pool.terminate()
-            pool.join()
+    # 准备任务列表
+    tasks = []
+    for project, target in all_fuzz_tasks:
+        task_logger = setup_task_logger(project, target, OSS_FUZZ_DIR)
+        if task_logger:
+            # 每个任务包含: (project, target, timeout, oss_fuzz_dir, logger)
+            task = (project, target, args.timeout, OSS_FUZZ_DIR, task_logger)
+            tasks.append(task)
+            task_logger.info(f"🚀 Starting test -> Project: {project}, Target: {target}")
+
+    # 使用 parallel_subprocess 并行执行
+    results = parallel_subprocess(
+        iterable=tasks,
+        jobs=args.workers,
+        subprocess_creator=create_subprocess,
+        on_exit=on_process_exit,
+        use_tqdm=True,
+        tqdm_leave=True,
+        tqdm_msg="Running fuzz targets"
+    )
 
     # 4. Result summary and reporting
     logger.info("\n" + "=" * 20 + " Phase 3: Result Summary " + "=" * 28)
-    failed_tasks = [(p, t) for success, p, t in results if not success]  # List of failed tasks
+    success_count = 0
+    failed_tasks = []
+    
+    for task, result in results.items():
+        success, project, target = result
+        if success:
+            success_count += 1
+        else:
+            failed_tasks.append((project, target))
+    
     total_tasks = len(all_fuzz_tasks)
     failed_count = len(failed_tasks)
-    success_count = total_tasks - failed_count
 
     # Output statistical summary
     logger.info(f"📊 Fuzzing completed: Success {success_count}/{total_tasks}, Failed {failed_count}/{total_tasks}")

From d31a33352c57c462c9b60d52b424a9f740693ea9 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 5 Aug 2025 06:29:54 +0000
Subject: [PATCH 088/134] English ver

---
 fuzz/collect_fuzz_python.py | 187 ++++++++++++++++++------------------
 1 file changed, 94 insertions(+), 93 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index 816cd63..c772975 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -1,5 +1,6 @@
 """
-用于Python项目模糊测试(fuzzing)和测试模板转换的脚本
+Script for Python project fuzzing and test template conversion
+usage: PYTHONPATH=. python3 fuzz/collect_fuzz_python.py --pipeline all
 """
 from pathlib import Path
 import logging
@@ -19,11 +20,11 @@
 
 def build_image(repos: list[str], jobs: int):
     """
-    构建每个仓库对应的OSS-Fuzz项目的Docker镜像
+    Build Docker images for OSS-Fuzz projects corresponding to each repository
     
     Args:
-        repos (list[str]): 仓库路径列表
-        jobs (int): 并行任务数
+        repos (list[str]): List of repository paths
+        jobs (int): Number of parallel tasks
     """
     logging.info(f"Building Docker images for {len(repos)} OSS-Fuzz projects")
     log_dir = os.path.abspath("fuzz_pipeline_log")
@@ -47,11 +48,11 @@ def _build_cmd(path: str):
 
 def build_fuzzer(repos: list[str], jobs: int):
     """
-    对构建成功的项目并行构建模糊测试器
+    Build fuzzers in parallel for successfully built projects
     
     Args:
-        repos (list[str]): 仓库路径列表
-        jobs (int): 并行任务数
+        repos (list[str]): List of repository paths
+        jobs (int): Number of parallel tasks
     """
     logging.info(f"Building fuzzers for {len(repos)} OSS-Fuzz projects")
     log_dir = os.path.abspath("fuzz_pipeline_log")
@@ -75,17 +76,17 @@ def _build_cmd(path: str):
 
 def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]:
     """
-    发现模糊测试目标
+    Discover fuzzing targets
     
     Args:
-        project_name (str): 项目名称
-        oss_fuzz_dir (Path): OSS-Fuzz根目录
+        project_name (str): Project name
+        oss_fuzz_dir (Path): OSS-Fuzz root directory
         
     Returns:
-        list[str]: 目标名称列表
+        list[str]: List of target names
     """
     out_dir = oss_fuzz_dir / "build" / "out" / project_name
-    targets: list[str] = []  
+    targets: list[str] = []  # Fix: Add type annotation
     
     if not out_dir.is_dir():
         logging.warning(f"Build output directory for {project_name} does not exist")
@@ -104,20 +105,20 @@ def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]:
 
 def fuzz_one_target(target: tuple[str, str], timeout: int):
     """
-    对单个模糊测试目标执行模糊测试
+    Perform fuzzing on a single fuzzing target
     
     Args:
-        target (tuple[str, str]): (仓库路径, 目标名称)
-        timeout (int): 超时时间(秒)
+        target (tuple[str, str]): (Repository path, target name)
+        timeout (int): Timeout duration (seconds)
         
     Returns:
-        subprocess.Popen: 子进程对象
+        subprocess.Popen: Subprocess object
     """
     repo_path, target_name = target
     project_name = os.path.basename(repo_path)
     oss_fuzz_root = os.path.dirname(os.path.dirname(repo_path))
     
-    # 创建输入文件路径
+    # Create input file path
     input_file_path = pjoin(repo_path, "fuzz_inputs", target_name)
     os.makedirs(os.path.dirname(input_file_path), exist_ok=True)
     
@@ -139,16 +140,16 @@ def fuzz_one_target(target: tuple[str, str], timeout: int):
 
 def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60):
     """
-    对一组仓库执行模糊测试
+    Perform fuzzing on a set of repositories
     
     Args:
-        repos (list[str]): 仓库路径列表
-        jobs (int): 并行任务数
-        timeout (int): 超时时间(秒)
+        repos (list[str]): List of repository paths
+        jobs (int): Number of parallel tasks
+        timeout (int): Timeout duration (seconds)
     """
     logging.info("Discovering fuzz targets")
     
-    # 获取所有目标
+    # Get all targets
     targets_list = []
     for repo in repos:
         project_name = os.path.basename(repo)
@@ -156,42 +157,42 @@ def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60):
         targets = discover_targets(project_name, oss_fuzz_dir)
         targets_list.append(targets)
     
-    # 创建目标映射
+    # Create target mapping
     target_map = {repo: targets for repo, targets in zip(repos, targets_list)}
-    all_targets: list[tuple[str, str]] = [  # 修复: 重命名变量避免冲突
+    all_targets: list[tuple[str, str]] = [  # Fix: Rename variable to avoid conflict
         (k, v) for k, vs in target_map.items() for v in vs
     ]
     
     logging.info(f"Running fuzzing on {len(all_targets)} targets")
     
-    # 创建输入目录
+    # Create input directory
     for repo in repos:
         os.makedirs(pjoin(repo, "fuzz_inputs"), exist_ok=True)
     
-    # 并行执行模糊测试
+    # Execute fuzzing in parallel
     parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None)
 
 def generate_test_template(target_name: str, repo_path: str):
     """
-    为单个目标生成测试模板
+    Generate test template for a single target
     
     Args:
-        target_name (str): 目标名称
-        repo_path (str): 仓库路径
+        target_name (str): Target name
+        repo_path (str): Repository path
         
     Returns:
-        str: 模板文件路径
+        str: Template file path
     """
     template_dir = pjoin(repo_path, "tests-gen")
     os.makedirs(template_dir, exist_ok=True)
     template_path = pjoin(template_dir, f"{target_name}.rs")
     
-    # 基本测试模板 - 使用字节数组而不是字节字符
+    # Basic test template - use byte array instead of byte characters
     template = f"""
     #[test]
     fn test_{target_name}() {{
-        // 测试逻辑将在这里生成
-        let input = b""; // 模糊测试输入将替换这里
+        // Test logic will be generated here
+        let input = b""; // Fuzzing input will be replaced here
         let result = process_input(&input);
         assert!(result.is_ok());
     }}
@@ -204,11 +205,11 @@ def generate_test_template(target_name: str, repo_path: str):
 
 def transform_repos(repos: list[str], jobs: int):
     """
-    为所有目标生成测试模板
+    Generate test templates for all targets
     
     Args:
-        repos (list[str]): 仓库路径列表
-        jobs (int): 并行任务数
+        repos (list[str]): List of repository paths
+        jobs (int): Number of parallel tasks
     """
     logging.info("Generating test templates")
     
@@ -223,20 +224,20 @@ def _transform_repo(repo: str):
 
 def escape_special_chars(input_data: str) -> str:
     """
-    转义输入数据中的特殊字符
+    Escape special characters in input data
     
     Args:
-        input_data (str): 原始输入数据
+        input_data (str): Raw input data
         
     Returns:
-        str: 转义后的输入数据
+        str: Input data with escaped characters
     """
-    # 转义反斜杠和双引号
+    # Escape backslashes and double quotes
     escaped = input_data.replace('\\', '\\\\').replace('"', '\\"')
     
-    # 处理非ASCII字符
+    # Handle non-ASCII characters
     if any(ord(c) > 127 for c in escaped):
-        # 如果包含非ASCII字符，使用字节数组表示
+        # If containing non-ASCII characters, use byte array representation
         byte_array = [str(b) for b in input_data.encode()]
         return f"let input = vec![{', '.join(byte_array)}];"
     
@@ -244,27 +245,27 @@ def escape_special_chars(input_data: str) -> str:
 
 def substitute_input(template: str, input_data: str, idx: int, target_name: str) -> str:
     """
-    将模糊测试输入替换到测试模板中
+    Replace fuzzing input into test template
     
     Args:
-        template (str): 模板内容
-        input_data (str): 输入数据
-        idx (int): 测试索引
-        target_name (str): 目标名称
+        template (str): Template content
+        input_data (str): Input data
+        idx (int): Test index
+        target_name (str): Target name
         
     Returns:
-        str: 替换后的测试代码
+        str: Test code after substitution
     """
-    # 转义特殊字符并处理非ASCII字符
+    # Escape special characters and handle non-ASCII characters
     escaped_input = escape_special_chars(input_data)
     
-    # 替换输入占位符
+    # Replace input placeholder
     new_template = template.replace(
-        'let input = b""; // 模糊测试输入将替换这里',
+        'let input = b""; // Fuzzing input will be replaced here',
         escaped_input
     )
     
-    # 替换函数名避免重复
+    # Replace function name to avoid duplication
     return new_template.replace(
         f"fn test_{target_name}()",
         f"fn test_{target_name}_{idx}()"
@@ -272,15 +273,15 @@ def substitute_input(template: str, input_data: str, idx: int, target_name: str)
 
 def has_similar(selected: list[str], x: str, thresh: float = 0.8) -> bool:
     """
-    检查字符串是否与已选列表中的任何字符串足够相似
+    Check if a string is sufficiently similar to any string in the selected list
     
     Args:
-        selected (list[str]): 已选字符串列表
-        x (str): 待检查字符串
-        thresh (float): 相似度阈值
+        selected (list[str]): List of selected strings
+        x (str): String to check
+        thresh (float): Similarity threshold
         
     Returns:
-        bool: 是否相似
+        bool: Whether they are similar
     """
     def similar(a, b):
         return SequenceMatcher(None, a, b).ratio()
@@ -295,20 +296,20 @@ def substitute_one_repo(
     sim_thresh: float,
 ):
     """
-    处理单个仓库，将模糊测试输入替换到测试模板中
+    Process a single repository, replace fuzzing inputs into test templates
     
     Args:
-        repo (str): 仓库路径
-        targets (list[str]): 目标列表
-        n_fuzz (int): 使用的输入数量
-        strategy (str): 选择策略
-        max_len (int): 最大长度
-        sim_thresh (float): 相似度阈值
+        repo (str): Repository path
+        targets (list[str]): List of targets
+        n_fuzz (int): Number of inputs to use
+        strategy (str): Selection strategy
+        max_len (int): Maximum length
+        sim_thresh (float): Similarity threshold
     """
     template_dir = pjoin(repo, "tests-gen")
     input_dir = pjoin(repo, "fuzz_inputs")
     
-    for target_name in targets:  # 使用target_name作为循环变量
+    for target_name in targets:  # Use target_name as loop variable
         template_path = pjoin(template_dir, f"{target_name}.rs")
         input_path = pjoin(input_dir, target_name)
         
@@ -333,7 +334,7 @@ def substitute_one_repo(
                 
             logging.info(f"Loaded {len(all_inputs)} inputs for {target_name}")
 
-            # 选择输入策略
+            # Input selection strategy
             if strategy == "shuffle":
                 random.shuffle(all_inputs)
                 inputs = list(islice(
@@ -349,18 +350,18 @@ def substitute_one_repo(
             else:
                 inputs = all_inputs[:n_fuzz]
             
-            # 生成测试用例
+            # Generate test cases
             tests = [
-                substitute_input(template, input_data, i, target_name)  # 传递target_name
+                substitute_input(template, input_data, i, target_name)  # Pass target_name
                 for i, input_data in enumerate(inputs)
             ]
             
-            # 写入生成的测试文件
+            # Write generated test file
             generated_path = pjoin(template_dir, f"{target_name}.inputs.rs")
             with open(generated_path, "w") as f:
                 f.write("\n".join(tests))
                 
-            # 格式化代码
+            # Format code
             subprocess.run(["rustfmt", generated_path], check=False)
             
         except Exception as e:
@@ -375,17 +376,17 @@ def testgen_repos(
     sim_thresh: float = 0.8,
 ):
     """
-    从模糊测试输入生成测试用例
+    Generate test cases from fuzzing inputs
     
     Args:
-        repos (list[str]): 仓库路径列表
-        jobs (int): 并行任务数
-        n_fuzz (int): 使用的输入数量
-        strategy (str): 选择策略
-        max_len (int): 最大长度
-        sim_thresh (float): 相似度阈值
+        repos (list[str]): List of repository paths
+        jobs (int): Number of parallel tasks
+        n_fuzz (int): Number of inputs to use
+        strategy (str): Selection strategy
+        max_len (int): Maximum length
+        sim_thresh (float): Similarity threshold
     """
-    # 首先获取所有目标
+    # First get all targets
     targets_list = []
     for repo in repos:
         project_name = os.path.basename(repo)
@@ -395,7 +396,7 @@ def testgen_repos(
     
     target_map = {repo: targets for repo, targets in zip(repos, targets_list)}
     
-    # 并行处理每个仓库
+    # Process each repository in parallel
     with ProcessingPool(jobs) as p:
         list(p.map(
             lambda item: substitute_one_repo(
@@ -416,18 +417,18 @@ def main(
     sim_thresh: float = 0.8,
 ):
     """
-    主函数，控制整个模糊测试流程
+    Main function, controlling the entire fuzzing process
     
     Args:
-        repo_id (str): 项目ID文件路径
-        repo_root (str): 项目根目录
-        timeout (int): 超时时间
-        jobs (int): 并行任务数
-        pipeline (str): 流程类型
-        n_fuzz (int): 使用的输入数量
-        strategy (str): 选择策略
-        max_len (int): 最大长度
-        sim_thresh (float): 相似度阈值
+        repo_id (str): Project ID file path
+        repo_root (str): Project root directory
+        timeout (int): Timeout duration
+        jobs (int): Number of parallel tasks
+        pipeline (str): Pipeline type
+        n_fuzz (int): Number of inputs to use
+        strategy (str): Selection strategy
+        max_len (int): Maximum length
+        sim_thresh (float): Similarity threshold
     """
     try:
         with open(repo_id, "r") as f:
@@ -435,14 +436,14 @@ def main(
     except FileNotFoundError:
         repo_id_list = [repo_id]
 
-    # 收集仓库路径
+    # Collect repository paths
     repos = []
     for repo_id in repo_id_list:
         repo_path = abspath(os.path.join(repo_root, repo_id))
         if os.path.isdir(repo_path):
             repos.append(repo_path)
 
-    # 执行指定流程
+    # Execute specified pipeline
     if pipeline == "build_image":
         build_image(repos, jobs)
     elif pipeline == "build_fuzzer":
@@ -456,7 +457,7 @@ def main(
     elif pipeline == "all":
         build_image(repos, jobs)
         build_fuzzer(repos, jobs)
-        transform_repos(repos, jobs)  # 关键添加：模板生成
+        transform_repos(repos, jobs)  # Key addition: Template generation
         fuzz_repos(repos, jobs, timeout)
         testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh)
     else:
@@ -464,4 +465,4 @@ def main(
 
 if __name__ == "__main__":
     logging.basicConfig(level=logging.INFO)
-    fire.Fire(main)
+    fire.Fire(main)
\ No newline at end of file

From 6717dad9a67b1b3c607fc9906f88c3041633ceb4 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 5 Aug 2025 06:35:12 +0000
Subject: [PATCH 089/134] delete privious scripts

---
 fuzz/build_fuzz.py                  | 259 ----------------------------
 fuzz/build_fuzzers.py               | 229 ------------------------
 fuzz/command_util.py                | 124 -------------
 fuzz/errors.py                      |  20 ---
 fuzz/run_fuzz_all_targets.py        | 211 ----------------------
 fuzz/run_fuzz_all_targets_print1.py | 256 ---------------------------
 6 files changed, 1099 deletions(-)
 delete mode 100644 fuzz/build_fuzz.py
 delete mode 100644 fuzz/build_fuzzers.py
 delete mode 100644 fuzz/command_util.py
 delete mode 100644 fuzz/errors.py
 delete mode 100644 fuzz/run_fuzz_all_targets.py
 delete mode 100644 fuzz/run_fuzz_all_targets_print1.py

diff --git a/fuzz/build_fuzz.py b/fuzz/build_fuzz.py
deleted file mode 100644
index 00f8af6..0000000
--- a/fuzz/build_fuzz.py
+++ /dev/null
@@ -1,259 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-"""
-OSS-Fuzz Build System
-
-Combines Docker image building and fuzzer building capabilities.
-Supports three modes: 'image', 'fuzzer', or 'both'.
-
-Usage:
-  Build images:
-    python3 build_fuzz.py --mode image [project_list] --oss-fuzz-dir /path/to/oss-fuzz
-
-  Build fuzzers:
-    python3 build_fuzz.py --mode fuzzer [project_list] --oss-fuzz-dir /path/to/oss-fuzz --image-results results.json
-
-  Build both:
-    python3 build_fuzz.py --mode both [project_list] --oss-fuzz-dir /path/to/oss-fuzz --sanitizer address
-
-Example:
-    python3 fuzz/build_fuzz.py --mode both data/valid_projects3.txt \
-        --oss-fuzz-dir ./fuzz/oss-fuzz \
-        --sanitizer address \
-        --workers 8
-"""
-
-import os
-import sys
-import subprocess
-import argparse
-import logging
-import json
-from pathlib import Path
-from returns.maybe import Maybe
-from multiprocessing import Pool
-from errors import BuildError, CommandError, PathError, ConfigError
-from command_util import run_command_build_fuzz as run_command
-
-# ========================================================================================
-# Build Functions
-# ========================================================================================
-def build_image(project_name: str, oss_fuzz_dir: Path) -> tuple[bool, str]:
-    """Docker image build workflow"""
-    try:
-        logging.info(f"Building Docker image: {project_name}")
-
-        # Validate paths
-        helper_script = oss_fuzz_dir / "infra" / "helper.py"
-        if not helper_script.exists():
-            raise PathError(f"Missing helper script: {helper_script}", project=project_name)
-
-        # Execute image build command
-        run_command(
-            f"python3 infra/helper.py build_image {project_name}",
-            oss_fuzz_dir,
-            project=project_name
-        )
-
-        logging.info(f"✅ Docker image built: {project_name}")
-        return (True, project_name)
-
-    except CommandError as e:
-        logging.error(f"❌ Docker build failed: {project_name} - {str(e)}")
-        return (False, project_name)
-    except Exception as e:
-        logging.error(f"💥 Unhandled exception: {project_name} - {repr(e)}")
-        return (False, project_name)
-
-def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> tuple[bool, str]:
-    """Fuzzer build workflow"""
-    try:
-        logging.info(f"Building fuzzers: {project_name} ({sanitizer} sanitizer)")
-
-        # Validate paths
-        helper_script = oss_fuzz_dir / "infra" / "helper.py"
-        if not helper_script.exists():
-            raise PathError(f"Missing helper script: {helper_script}", project=project_name)
-
-        # Execute fuzzer build command
-        run_command(
-            f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}",
-            oss_fuzz_dir,
-            project=project_name,
-            skip_yes=True
-        )
-
-        logging.info(f"✅ Fuzzers built: {project_name}")
-        return (True, project_name)
-
-    except BuildError as e:
-        logging.error(f"❌ Fuzzer build failed: {project_name} - {str(e)}")
-        return (False, project_name)
-    except Exception as e:
-        logging.error(f"💥 Unhandled exception: {project_name} - {repr(e)}")
-        return (False, project_name)
-
-# ========================================================================================
-# Main Execution
-# ========================================================================================
-def load_projects(file_path: Path) -> list[str]:
-    """Load project list from file"""
-    if not file_path.exists():
-        raise FileNotFoundError(f"Project list not found: {file_path}")
-
-    with open(file_path, "r", encoding="utf-8") as f:
-        projects = [line.strip() for line in f if line.strip()]
-
-    if not projects:
-        raise ConfigError("Project list is empty")
-
-    logging.info(f"Loaded {len(projects)} projects from {file_path.name}")
-    return projects
-
-def execute_builds(
-    func,
-    args_list: list[tuple],
-    worker_count: int,
-    success_msg: str,
-    failure_msg: str
-) -> tuple[dict[str, bool], list[str]]:
-    """Execute build tasks in parallel and return results"""
-    results = {}
-    with Pool(worker_count) as pool:
-        for success, project in pool.starmap(func, args_list):
-            results[project] = success
-
-    failed = [p for p, success in results.items() if not success]
-    success_count = len(results) - len(failed)
-
-    if failed:
-        logging.error(f"\n❌ {failure_msg}: {len(failed)}/{len(results)} projects")
-    logging.info(f"\n📊 {success_msg}: {success_count}/{len(results)} projects")
-
-    return results, failed
-
-def main():
-    parser = argparse.ArgumentParser(description="OSS-Fuzz Build System")
-    parser.add_argument("project_list", help="Project list file path")
-    parser.add_argument("--oss-fuzz-dir", required=True, type=str,
-                        help="OSS-Fuzz directory path")
-    parser.add_argument("--mode", choices=['image', 'fuzzer', 'both'], default='both',
-                        help="Build mode: 'image', 'fuzzer', or 'both'")
-    parser.add_argument("--workers", type=int, default=os.cpu_count(),
-                        help="Number of parallel worker processes")
-    parser.add_argument("--sanitizer", default="address",
-                        choices=["address", "memory", "undefined"],
-                        help="Fuzzer sanitizer type")
-    parser.add_argument("--image-results", default="image_build_results.json",
-                        help="Image build results file (JSON)")
-    parser.add_argument("--log-level", default="INFO",
-                        choices=["DEBUG", "INFO", "WARNING", "ERROR"],
-                        help="Logging detail level")
-    args = parser.parse_args()
-
-    # Configure logging
-    logging.basicConfig(
-        level=getattr(logging, args.log_level),
-        format='[%(levelname)s] [PID:%(process)d] %(message)s'
-    )
-
-    oss_fuzz_dir = Path(args.oss_fuzz_dir).resolve()
-    project_file = Path(args.project_list).resolve()
-    output_file = Path(args.image_results)
-
-    # Sanity checks
-    if not oss_fuzz_dir.exists():
-        logging.critical(f"OSS-Fuzz directory not found: {oss_fuzz_dir}")
-        sys.exit(1)
-
-    # Load projects
-    try:
-        projects = load_projects(project_file)
-    except Exception as e:
-        logging.critical(f"❌ Failed to load projects: {e}")
-        sys.exit(1)
-
-    # Image building workflow
-    image_results: dict[str, bool] = {} 
-    if args.mode in ['image', 'both']:
-        logging.info("\n" + "="*60)
-        logging.info(f"Starting Docker image builds for {len(projects)} projects")
-        logging.info("="*60 + "\n")
-
-        image_args = [(p, oss_fuzz_dir) for p in projects]
-        image_results, image_failures = execute_builds(
-            build_image,
-            image_args,
-            args.workers,
-            "✅ Docker image builds succeeded",
-            "🚫 Docker image builds failed"
-        )
-
-        # Save image build results
-        try:
-            with output_file.open("w") as f:
-                json.dump(image_results, f, indent=4)
-            logging.info(f"💾 Image build results saved to: {output_file}")
-        except Exception as e:
-            logging.error(f"❌ Failed to save image results: {e}")
-
-    # Fuzzer building workflow
-    fuzzer_results: dict[str, bool] = {}
-    fuzz_projects = []
-    if args.mode in ['fuzzer', 'both']:
-        logging.info("\n" + "="*60)
-        logging.info(f"Starting fuzzer builds ({args.sanitizer} sanitizer)")
-        logging.info("="*60 + "\n")
-
-        # Load image results for fuzzer mode
-        if args.mode == 'fuzzer':
-            try:
-                with output_file.open("r") as f:
-                    image_results = json.load(f)
-                logging.info(f"📋 Loaded image build results from: {output_file}")
-            except Exception as e:
-                logging.critical(f"❌ Failed to load image results: {e}")
-                sys.exit(1)
-
-        # Filter projects with successful image builds
-        fuzz_projects = [p for p in projects if image_results.get(p, False)]
-        if not fuzz_projects:
-            logging.critical("❌ No projects with successful image builds to fuzz.")
-            sys.exit(0)
-        
-        logging.info(f"Attempting to build fuzzers for {len(fuzz_projects)} projects with successful image builds.")
-
-        fuzzer_args = [(p, args.sanitizer, oss_fuzz_dir) for p in fuzz_projects]
-        fuzzer_results, fuzzer_failures = execute_builds(
-            build_fuzzers,
-            fuzzer_args,
-            args.workers,
-            "✅ Fuzzer builds succeeded",
-            "🚫 Fuzzer builds failed"
-        )
-
-    # Final summary
-    logging.info("\n" + "="*60)
-    logging.info("Build Summary")
-    logging.info("="*60)
-
-    if args.mode in ['image', 'both']:
-        image_success = sum(1 for r in image_results.values() if r)
-        logging.info(f"📦 Docker Images: {image_success}/{len(projects)} succeeded")
-
-    if args.mode in ['fuzzer', 'both'] and fuzz_projects:
-        fuzzer_success = sum(1 for r in fuzzer_results.values() if r)
-        logging.info(f"🔧 Fuzzers: {fuzzer_success}/{len(fuzz_projects)} succeeded")
-
-    logging.info("="*60)
-
-if __name__ == "__main__":
-    try:
-        main()
-    except KeyboardInterrupt:
-        print("\n🛑 Operation interrupted by user")
-        sys.exit(1)
-    except Exception as e:
-        logging.critical(f"💥 Critical error: {str(e)}")
-        sys.exit(1)
\ No newline at end of file
diff --git a/fuzz/build_fuzzers.py b/fuzz/build_fuzzers.py
deleted file mode 100644
index ab8ac7d..0000000
--- a/fuzz/build_fuzzers.py
+++ /dev/null
@@ -1,229 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-"""
-build_fuzzers.py
-
-Parallel build of OSS-Fuzz fuzzers.
-Requires Docker images to be built first (using build_images.py).
-
-Usage: python3 build_fuzzers.py [project_list_file] --oss-fuzz-dir /path/to/oss-fuzz \
-    --image-results image_build_results.json \
-    [--sanitizer type] [--workers N]
-Example: python3 fuzz/build_fuzzers.py data/valid_projects.txt \
-    --oss-fuzz-dir ./fuzz/oss-fuzz \
-    --image-results image_build_results.json \
-    --sanitizer address \
-    --workers 8
-"""
-
-import os
-import sys
-import subprocess
-import argparse
-import logging
-import json
-from pathlib import Path
-from typing import Optional
-from multiprocessing import Pool, cpu_count
-from errors import BuildError, CommandError, PathError, ConfigError
-from command_util import run_command_build_fuzz as run_command
-
-# def run_command(
-#     cmd: str,
-#     oss_fuzz_dir: Path,
-#     project: str = "",
-#     allowed_exit_codes: Optional[list[int]] = None
-# ) -> int:
-#     """Execute a command and return the exit code"""
-#     allowed_exit_codes = allowed_exit_codes or [0]
-#     logging.info(f"▶️ Executing command: {cmd}")
-    
-#     try:
-#         process = subprocess.Popen(
-#             cmd,
-#             shell=True,
-#             cwd=str(oss_fuzz_dir),
-#             stdout=subprocess.PIPE,
-#             stderr=subprocess.PIPE,
-#             text=True
-#         )
-        
-#         stdout, stderr = process.communicate()
-#         exit_code = process.returncode
-        
-#         if exit_code in allowed_exit_codes:
-#             return exit_code
-            
-#         # Build detailed error message
-#         error_msg = f"Command failed (exit code: {exit_code})"
-#         if project:
-#             error_msg += f" for project: {project}"
-            
-#         if stderr.strip():
-#             error_msg += f"\nError output:\n{stderr.strip()}"
-            
-#         if stdout.strip():
-#             error_msg += f"\nOutput:\n{stdout.strip()}"
-            
-#         raise CommandError(error_msg, project=project, exit_code=exit_code)
-    
-#     except FileNotFoundError as e:
-#         raise CommandError(f"Command not found: {cmd.split()[0]}", project=project) from e
-#     except OSError as e:
-#         raise CommandError(f"System error: {e}", project=project) from e
-#     except subprocess.SubprocessError as e:
-#         raise CommandError(f"Subprocess error: {e}", project=project) from e
-
-def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> tuple[bool, str]:
-    """Fuzzer build workflow"""
-    try:
-        logging.info("=" * 60)
-        logging.info(f"🔧 Building fuzzers for: {project_name}")
-        logging.info(f"📁 OSS-Fuzz directory: {oss_fuzz_dir}")
-        logging.info("=" * 60)
-        
-        # Validate paths
-        helper_script = oss_fuzz_dir / "infra" / "helper.py"
-        if not helper_script.exists():
-            raise PathError(f"Missing helper script: {helper_script}", project=project_name)
-        
-        # Execute fuzzer build command
-        run_command(
-            f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}",
-            oss_fuzz_dir,
-            project=project_name
-        )
-        
-        logging.info(f"✅ Fuzzers built: {project_name}")
-        return (True, project_name)
-    
-    except BuildError as e:
-        logging.error(f"❌ Build failed: {project_name}")
-        logging.error(f"   Reason: {str(e)}")
-        return (False, project_name)
-    except Exception as e:
-        logging.error(f"🔥 Unhandled exception: {project_name}")
-        logging.exception(f"   Exception details: {e}")
-        return (False, project_name)
-
-def main():
-    parser = argparse.ArgumentParser(description="OSS-Fuzz Fuzzer Builder")
-    parser.add_argument("project_list", help="Project list file path")
-    parser.add_argument("--oss-fuzz-dir", required=True, type=str, 
-                        help="OSS-Fuzz directory path")
-    parser.add_argument("--sanitizer", default="address", 
-                        choices=["address", "memory", "undefined"],
-                        help="Fuzzer sanitizer type")
-    parser.add_argument("--workers", type=int, default=cpu_count(),
-                        help="Number of parallel worker processes")
-    parser.add_argument("--image-results", required=True,
-                        help="JSON file with image build results from build_images.py")
-    args = parser.parse_args()
-
-    logging.basicConfig(
-        level=logging.INFO,
-        format='[%(levelname)s] %(message)s'
-    )
-
-    # Process paths
-    oss_fuzz_dir = Path(args.oss_fuzz_dir).resolve()
-    logging.info(f"📁 Using OSS-Fuzz directory: {oss_fuzz_dir}")
-
-    # Read project list
-    try:
-        project_file = Path(args.project_list)
-        if not project_file.exists():
-            raise FileNotFoundError(f"Project list file not found: {project_file}")
-            
-        with open(project_file, "r", encoding="utf-8") as f:
-            all_projects = [line.strip() for line in f if line.strip()]
-            
-        if not all_projects:
-            raise ConfigError("Project list is empty")
-            
-        logging.info(f"📋 Loaded {len(all_projects)} projects")
-    except Exception as e:
-        logging.error(f"❌ Failed to read project list: {e}")
-        sys.exit(1)
-
-    # Load image build results
-    try:
-        image_results_file = Path(args.image_results)
-        if not image_results_file.exists():
-            raise FileNotFoundError(f"Image results file not found: {image_results_file}")
-            
-        with open(image_results_file, "r") as f:
-            image_results = json.load(f)
-            
-        if not isinstance(image_results, dict):
-            raise ConfigError("Image results should be a JSON object")
-            
-        logging.info(f"📋 Loaded image build results: {args.image_results}")
-    except json.JSONDecodeError as e:
-        logging.error(f"❌ Failed to parse image build results: {e}")
-        sys.exit(1)
-    except Exception as e:
-        logging.error(f"❌ Failed to load image build results: {e}")
-        sys.exit(1)
-
-    # Filter projects with successful image builds
-    projects_to_build = [p for p in all_projects if p in image_results and image_results[p]]
-    image_failures = [p for p in all_projects if p not in image_results or not image_results[p]]
-    
-    if not projects_to_build:
-        logging.error("❌ No projects with successful image builds")
-        if image_failures:
-            logging.error(f"   Projects with image build failures: {', '.join(image_failures[:10])}{'...' if len(image_failures) > 10 else ''}")
-        sys.exit(1)
-        
-    skipped = len(all_projects) - len(projects_to_build)
-    logging.info(f"🔍 Building {len(projects_to_build)} projects (skipped {skipped} due to image failures)")
-
-    # Parallel fuzzer builds
-    with Pool(args.workers) as pool:
-        results = pool.starmap(
-            build_fuzzers, 
-            [(p, args.sanitizer, oss_fuzz_dir) for p in projects_to_build]
-        )
-
-    # Output results
-    fuzzer_results = {project: success for success, project in results}
-    failed = [p for p in projects_to_build if not fuzzer_results[p]]
-    
-    success_count = len(projects_to_build) - len(failed)
-    logging.info(f"\n📊 Build completed: {success_count}/{len(projects_to_build)}")
-    
-    if failed:
-        logging.error(f"❌ Failed builds ({len(failed)} projects):")
-        for project in failed:
-            logging.error(f"   - {project}")
-
-    # Generate overall status report
-    overall_results = {}
-    for project in all_projects:
-        status = "❌"
-        if project in image_results and image_results[project]:
-            if project in fuzzer_results and fuzzer_results[project]:
-                status = "✅"
-            elif project in fuzzer_results:
-                status = "❌ (fuzzer)"
-            else:
-                status = "❌ (not built)"
-        else:
-            status = "❌ (image)"
-        overall_results[project] = status
-
-    logging.info("\n📊 Overall status:")
-    for project, status in overall_results.items():
-        logging.info(f"  {project}: {status}")
-
-if __name__ == "__main__":
-    try:
-        main()
-    except KeyboardInterrupt:
-        print("\n🛑 Operation interrupted")
-        sys.exit(1)
-    except Exception as e:
-        print(f"💥 Critical error: {e}")
-        sys.exit(1)
\ No newline at end of file
diff --git a/fuzz/command_util.py b/fuzz/command_util.py
deleted file mode 100644
index e446f42..0000000
--- a/fuzz/command_util.py
+++ /dev/null
@@ -1,124 +0,0 @@
-import subprocess
-import logging
-import time
-from pathlib import Path
-from typing import Optional
-from returns.maybe import Maybe
-from errors import CommandError
-
-def create_popen_object(
-    cmd: str,
-    cwd: Optional[Path] = None,
-    capture_output: bool = False,
-    timeout: Optional[int] = None,
-    logger: Optional[logging.Logger] = None,
-) -> subprocess.Popen:
-    """
-    创建并返回 Popen 对象，不等待进程结束
-    - capture_output: 是否捕获输出
-    - timeout: 使用 shell 的 timeout 命令处理超时
-    - logger: 用于实时打印输出
-    返回: Popen 对象
-    """
-    # 添加超时命令
-    if timeout and timeout > 0:
-        cmd = f"timeout {timeout}s {cmd}"
-        if logger:
-            logger.debug(f"⌛ Adding timeout ({timeout}s) to command")
-
-    # 创建 Popen 对象
-    process = subprocess.Popen(
-        cmd,
-        shell=True,
-        cwd=str(cwd) if cwd else None,
-        stdout=subprocess.PIPE if capture_output else None,
-        stderr=subprocess.STDOUT if capture_output else None,
-        text=True,
-        encoding="utf-8",
-        errors="replace",
-    )
-    
-    return process
-
-def parallel_subprocess(
-    tasks: list[tuple[str, Path, Optional[int], logging.Logger]]
-) -> list[tuple[subprocess.Popen, str, Path]]:
-    """
-    并行执行多个子进程
-    - tasks: 任务列表，每个任务是元组 (cmd, cwd, timeout, logger)
-    返回: 包含 (Popen对象, 命令, 工作目录) 的列表
-    """
-    processes = []
-    for cmd, cwd, timeout, logger in tasks:
-        process = create_popen_object(
-            cmd,
-            cwd=cwd,
-            capture_output=True,
-            timeout=timeout,
-            logger=logger
-        )
-        processes.append((process, cmd, cwd))
-    
-    return processes
-
-def wait_for_processes(
-    processes: list[tuple[subprocess.Popen, str, Path]],
-    logger: logging.Logger,
-    allowed_exit_codes: list[int] = [0]
-) -> list[tuple[bool, str, Path]]:
-    """
-    等待所有进程完成并处理结果
-    - processes: 进程列表
-    - logger: 日志记录器
-    - allowed_exit_codes: 允许的退出码列表
-    返回: 结果列表 (成功状态, 命令, 工作目录)
-    """
-    results = []
-    
-    for process, cmd, cwd in processes:
-        # 实时读取输出
-        output_lines = []
-        while True:
-            line = process.stdout.readline()
-            if not line and process.poll() is not None:
-                break
-            if line:
-                stripped_line = line.strip()
-                output_lines.append(stripped_line)
-                if logger:
-                    logger.debug(stripped_line)
-        
-        exit_code = process.returncode
-        
-        # 检查是否超时 (124 是 timeout 命令的退出码)
-        if exit_code == 124:
-            logger.warning(f"⌛ Command timed out: {cmd}")
-        
-        # 检查是否成功
-        success = exit_code in allowed_exit_codes
-        results.append((success, cmd, cwd))
-    
-    return results
-
-def run_command_build_fuzz(
-    cmd: str,
-    oss_fuzz_dir: Path,
-    project: str = "",
-    allowed_exit_codes: Maybe[list[int]] = Maybe.empty,
-    skip_yes: bool = False
-) -> int:
-    """run_command used in build_fuzz.py, build_fuzzers.py"""
-    allowed_codes = allowed_exit_codes.value_or([0])
-    cmd_str = f"yes | {cmd}" if not skip_yes else cmd
-    process = create_popen_object(cmd_str, cwd=oss_fuzz_dir)
-    process.wait()
-    
-    # 处理 timeout 的特殊退出码 (124)
-    exit_code = 124 if process.returncode == 124 else process.returncode
-    
-    if exit_code not in allowed_codes:
-        error_msg = f"The command failed (exit code: {exit_code})"
-        if project:
-            error_msg += f" for project: {project}"
-        raise CommandError(error_msg, project=project, exit_code=exit_code)
-    return exit_code
\ No newline at end of file
diff --git a/fuzz/errors.py b/fuzz/errors.py
deleted file mode 100644
index 294e642..0000000
--- a/fuzz/errors.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# fuzz/errors.py
-
-class BuildError(Exception):
-    """Base exception for build failures"""
-    def __init__(self, message: str, project: str = "", exit_code: int | None = None):
-        super().__init__(message)
-        self.project = project
-        self.exit_code = exit_code
-
-class CommandError(BuildError):
-    """Exception for command execution failures"""
-    pass
-
-class PathError(BuildError):
-    """Exception for missing paths or files"""
-    pass
-
-class ConfigError(BuildError):
-    """Exception for configuration errors"""
-    pass
diff --git a/fuzz/run_fuzz_all_targets.py b/fuzz/run_fuzz_all_targets.py
deleted file mode 100644
index 8990e33..0000000
--- a/fuzz/run_fuzz_all_targets.py
+++ /dev/null
@@ -1,211 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-"""
-run_fuzz_all_targets.py
-
-This script employs a two-phase approach for fuzz testing:
-1. Discovery phase: Traverse all specified projects to collect all executable fuzz targets in each project
-2. Execution phase: Create a task pool containing all (project, target) pairs and execute them in parallel using multiprocessing
-
-This approach maximizes CPU utilization and provides clear overall progress[2](@ref).
-
-Usage: python3 run_fuzz_all_targets.py [project_list_file] [--oss-fuzz-dir /your/custom/path/to/oss-fuzz] [--timeout seconds] [--workers N]
-Example: python3 fuzz/run_fuzz_all_targets.py data/valid_projects3.txt --oss-fuzz-dir /home/jiayiguo/FuzzAug/fuzz/oss-fuzz --timeout 60 --workers 4
-
-"""
-
-import os
-import sys
-import subprocess
-import argparse
-import logging
-import time
-from datetime import datetime
-from pathlib import Path
-from multiprocessing import Pool, cpu_count
-from returns.maybe import Maybe, Nothing, Some
-from command_util import run_command_fuzz_all_targets as run_command
-
-def discover_targets(project_name: str, oss_fuzz_dir: Path, logger: logging.Logger) -> list[str]:
-    """Discover fuzz targets for a project (starting with 'fuzz_', no extension, and executable)"""
-    out_dir = oss_fuzz_dir / "build" / "out" / project_name
-    targets: list[str] = []
-
-    if not out_dir.is_dir():
-        logger.warning(f"Build output directory for project {project_name} does not exist: {out_dir}")
-        return targets
-
-    try:
-        for f in out_dir.iterdir():
-            try:
-                if (f.is_file() and
-                        f.name.startswith("fuzz_") and
-                        '.' not in f.name and
-                        # f.name.endswith("print1") and
-                        os.access(f, os.X_OK)):
-                    targets.append(f.name)
-            except OSError as e:
-                logger.warning(f"⚠️ Error while checking file {f.name}, skipped: {e}")
-
-    except PermissionError:
-        logger.error(f"🔒 Insufficient permissions to access directory: {out_dir}")
-    except OSError as e:
-        logger.exception(f"💥 Operating system error occurred while discovering targets: {e}")
-
-    return targets
-
-
-def run_single_target(project_name: str, target_name: str, timeout: int, oss_fuzz_dir: Path) -> tuple[bool, str, str]:
-    """Execute fuzz testing workflow for a single (project, target) pair"""
-    task_id = f"{project_name}_{target_name}"
-    logger = logging.getLogger(task_id)
-    LOG_DIR = oss_fuzz_dir / "run_pj3_logs"
-    try:
-        logger.setLevel(logging.DEBUG)
-        LOG_DIR.mkdir(parents=True, exist_ok=True)
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        log_file = LOG_DIR / f"run_{task_id}_{timestamp}.log"
-        file_handler = logging.FileHandler(log_file, encoding="utf-8")
-        formatter = logging.Formatter(
-            "%(asctime)s [PID:%(process)d] %(levelname)s: %(message)s",
-            datefmt="%Y-%m-%d %H:%M:%S"
-        )
-        file_handler.setFormatter(formatter)
-        logger.addHandler(file_handler)
-        os.chdir(oss_fuzz_dir)
-
-    except (OSError, PermissionError) as e:
-        print(f"❌ Critical error occurred during initialization of task {task_id}: {e}")
-        return False, project_name, target_name
-
-    logger.info(f"🚀 Starting test -> Project: {project_name}, Target: {target_name}")
-    try:
-        cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}"
-        success = run_command(
-            cmd,
-            f"Running Target '{target_name}' (timeout={timeout}s)",
-            logger,
-            allowed_exit_codes=Some([1, 124]),  # 1=Crashes found, 124=Timeout
-            timeout=timeout + 300
-        )
-
-        if success:
-            logger.info(f"✅ Target '{target_name}' completed successfully.")
-        else:
-            logger.error(f"❌ Target '{target_name}' failed.")
-
-        return success, project_name, target_name
-
-    except Exception as e:
-        logger.exception(f"💥 Unexpected error occurred while running target '{target_name}': {e}")
-        return False, project_name, target_name
-    finally:
-        for handler in logger.handlers[:]:
-            handler.close()
-            logger.removeHandler(handler)
-
-
-def main():
-    # Configure main process logging
-    logging.basicConfig(
-        level=logging.INFO,
-        format="[%(levelname)s] %(message)s",
-        stream=sys.stdout
-    )
-    logger = logging.getLogger("Main")
-
-    # Set up command line argument parsing
-    parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Fuzz Testing Tool")
-    parser.add_argument("project_list", help="File path containing list of project names")
-    parser.add_argument("--oss-fuzz-dir", type=Path, required=True, help="Path to the oss-fuzz directory (e.g., /path/to/oss-fuzz)")
-    parser.add_argument("--timeout", type=int, default=60, help="Timeout duration per Fuzz Target (seconds)")
-    parser.add_argument("--workers", type=int, default=cpu_count(), help="Number of parallel worker processes")
-    args = parser.parse_args()
-
-    OSS_FUZZ_DIR = args.oss_fuzz_dir.resolve()
-    LOG_DIR = OSS_FUZZ_DIR / "run_fuzz_all_targets_logs"
-
-    # 1. Read project list file
-    try:
-        project_path = Path(args.project_list)
-        with open(project_path, "r", encoding="utf-8") as f:
-            projects = [line.strip() for line in f if line.strip()]
-        logger.info(f"📋 Loaded {len(projects)} projects from {project_path.name}.")
-    except FileNotFoundError:
-        logger.error(f"❌ Project list file not found: {args.project_list}")
-        sys.exit(1)
-    except (OSError, PermissionError) as e:
-        logger.exception(f"💥 Error occurred while reading project list: {e}")
-        sys.exit(1)
-
-    # 2. Discovery phase: Collect all fuzz targets
-    logger.info("\n" + "=" * 20 + " Phase 1: Discover all Fuzz Targets " + "=" * 20)
-    all_fuzz_tasks: list[tuple[str, str]] = []  # Store (project, target) tuples
-    try:
-        original_cwd = Path.cwd()  # Save current working directory
-        os.chdir(OSS_FUZZ_DIR)     # Switch to OSS-Fuzz directory
-        for project_name in projects:
-            targets = discover_targets(project_name, OSS_FUZZ_DIR, logger)
-
-            if targets:
-                logger.info(f"🔍 Discovered {len(targets)} targets in project '{project_name}': {', '.join(targets)}")
-                for target in targets:
-                    all_fuzz_tasks.append((project_name, target))
-            else:
-                logger.warning(f"⚠️ No Fuzz Targets found in project '{project_name}'.")
-        os.chdir(original_cwd)  # Restore original working directory
-    except FileNotFoundError:
-        logger.error(f"❌ OSS-Fuzz directory does not exist: {OSS_FUZZ_DIR}")
-        sys.exit(1)
-    except Exception as e:
-        logger.exception(f"💥 Unknown error occurred during discovery phase: {e}")
-        sys.exit(1)
-
-    # Check if any valid targets were found
-    if not all_fuzz_tasks:
-        logger.info("🤷 No executable Fuzz Targets found. Program exits.")
-        sys.exit(0)
-
-    # 3. Execution phase: Parallel fuzz testing
-    logger.info(f"\n✅ Discovery phase completed. Found a total of {len(all_fuzz_tasks)} fuzz tasks.")
-    logger.info("=" * 20 + " Phase 2: Parallel Fuzzing Execution " + "=" * 23)
-    logger.info(f"🚀 Starting parallel testing with {args.workers} worker processes (timeout per target: {args.timeout}s)...")
-
-    # Prepare task parameters (project, target, timeout)
-    tasks_with_args = [(p, t, args.timeout, OSS_FUZZ_DIR) for p, t in all_fuzz_tasks]
-    results: list[tuple[bool, str, str]] = []  # Store results (success, project, target)
-    
-    # Execute in parallel using process pool
-    with Pool(args.workers) as pool:
-        try:
-            results = pool.starmap(run_single_target, tasks_with_args)
-        except Exception as e:
-            logger.error(f"💥 Critical error occurred during parallel execution: {e}")
-            pool.terminate()
-            pool.join()
-
-    # 4. Result summary and reporting
-    logger.info("\n" + "=" * 20 + " Phase 3: Result Summary " + "=" * 28)
-    failed_tasks = [(p, t) for success, p, t in results if not success]  # List of failed tasks
-    total_tasks = len(all_fuzz_tasks)
-    failed_count = len(failed_tasks)
-    success_count = total_tasks - failed_count
-
-    # Output statistical summary
-    logger.info(f"📊 Fuzzing completed: Success {success_count}/{total_tasks}, Failed {failed_count}/{total_tasks}")
-    if failed_tasks:
-        logger.error("❌ The following Fuzz Targets failed:")
-        for project, target in failed_tasks:
-            logger.error(f"  - Project: {project}, Target: {target}")  # List detailed failures
-
-
-if __name__ == "__main__":
-    try:
-        main()
-    except KeyboardInterrupt:
-        print("\n🛑 Operation interrupted by user.")
-        sys.exit(1)
-    except Exception as e:
-        print(f"\n💥 Fatal error in main program: {e}")
-        sys.exit(1)
\ No newline at end of file
diff --git a/fuzz/run_fuzz_all_targets_print1.py b/fuzz/run_fuzz_all_targets_print1.py
deleted file mode 100644
index ea45ddb..0000000
--- a/fuzz/run_fuzz_all_targets_print1.py
+++ /dev/null
@@ -1,256 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-"""
-run_fuzz_all_targets_print1.py
-
-This script employs a two-phase approach for fuzz testing:
-1. Discovery phase: Traverse all specified projects to collect all executable fuzz targets which start with "fuzz_" and end with "print1" in each project
-2. Execution phase: Create a task pool containing all (project, target) pairs and execute them in parallel using multiprocessing
-
-This approach maximizes CPU utilization and provides clear overall progress.
-
-Usage: python3 run_fuzz_all_targets_print1.py [project_list_file] [--oss-fuzz-dir /your/custom/path/to/oss-fuzz] [--timeout seconds] [--workers N]
-Example: python3 fuzz/run_fuzz_all_targets_print1.py data/valid_projects3.txt --oss-fuzz-dir /home/jiayiguo/FuzzAug/fuzz/oss-fuzz --timeout 60 --workers 4
-
-"""
-
-import os
-import sys
-fuzzaug_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
-sys.path.insert(0, fuzzaug_root)
-import subprocess
-import argparse
-import logging
-import time
-from datetime import datetime
-from pathlib import Path
-from multiprocessing import cpu_count
-from returns.maybe import Maybe, Nothing, Some
-from UniTSyn.frontend.util import parallel_subprocess
-    
-
-
-def discover_targets(project_name: str, oss_fuzz_dir: Path, logger: logging.Logger) -> list[str]:
-    """Discover fuzz targets for a project (starting with 'fuzz_', ending with 'print1', no extension, and executable)"""
-    out_dir = oss_fuzz_dir / "build" / "out" / project_name
-    targets: list[str] = []
-
-    if not out_dir.is_dir():
-        logger.warning(f"Build output directory for project {project_name} does not exist: {out_dir}")
-        return targets
-
-    try:
-        for f in out_dir.iterdir():
-            try:
-                if (f.is_file() and
-                        f.name.startswith("fuzz_") and
-                        '.' not in f.name and
-                        f.name.endswith("print1") and
-                        os.access(f, os.X_OK)):
-                    targets.append(f.name)
-            except OSError as e:
-                logger.warning(f"⚠️ Error while checking file {f.name}, skipped: {e}")
-
-    except PermissionError:
-        logger.error(f"🔒 Insufficient permissions to access directory: {out_dir}")
-    except OSError as e:
-        logger.exception(f"💥 Operating system error occurred while discovering targets: {e}")
-
-    return targets
-
-def setup_task_logger(project_name: str, target_name: str, oss_fuzz_dir: Path) -> logging.Logger:
-    """为单个任务设置日志记录器"""
-    task_id = f"{project_name}_{target_name}"
-    logger = logging.getLogger(task_id)
-    LOG_DIR = oss_fuzz_dir / "run1_fuzz_all_targets_logs_print1_parallel"
-    
-    try:
-        logger.setLevel(logging.DEBUG)
-        LOG_DIR.mkdir(parents=True, exist_ok=True)
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        log_file = LOG_DIR / f"run_{task_id}_{timestamp}.log"
-        file_handler = logging.FileHandler(log_file, encoding="utf-8")
-        formatter = logging.Formatter(
-            "%(asctime)s [PID:%(process)d] %(levelname)s: %(message)s",
-            datefmt="%Y-%m-%d %H:%M:%S"
-        )
-        file_handler.setFormatter(formatter)
-        logger.addHandler(file_handler)
-        return logger
-    except (OSError, PermissionError) as e:
-        print(f"❌ Critical error occurred during initialization of task {task_id}: {e}")
-        return None
-
-def create_task_command(project_name: str, target_name: str, timeout: int) -> str:
-    """创建任务命令字符串"""
-    return f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}"
-
-def create_subprocess(task: tuple) -> subprocess.Popen:
-    """为每个任务创建子进程"""
-    project_name, target_name, timeout, oss_fuzz_dir, logger = task
-    cmd = create_task_command(project_name, target_name, timeout)
-    
-    # 设置日志文件，存放测试过程的输出
-    task_id = f"{project_name}_{target_name}"
-    LOG_DIR = oss_fuzz_dir / "run2_fuzz_all_targets_logs"
-    LOG_DIR.mkdir(parents=True, exist_ok=True)
-    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-    log_file = LOG_DIR / f"run_{task_id}_{timestamp}.log"
-    
-    # 创建并返回 Popen 对象
-    process = subprocess.Popen(
-        cmd,
-        shell=True,
-        cwd=str(oss_fuzz_dir),
-        stdout=open(log_file, 'w'),
-        stderr=subprocess.STDOUT,
-        text=True,
-        encoding="utf-8",
-        errors="replace",
-    )
-    
-    # 将任务数据附加到进程对象以便后续使用
-    process.task_data = task
-    return process
-
-def on_process_exit(process: subprocess.Popen) -> tuple[bool, str, str]:
-    """处理进程退出"""
-    project_name, target_name, _, oss_fuzz_dir, logger = process.task_data
-    
-    # 等待进程结束
-    process.wait()
-    exit_code = process.returncode
-    
-    # 记录结果
-    if exit_code == 124:
-        logger.warning(f"⌛ Command timed out: {project_name}/{target_name}")
-    
-    # 检查是否成功
-    success = exit_code in [0, 1, 124]  # 0=成功, 1=发现崩溃, 124=超时
-    
-    if success:
-        logger.info(f"✅ Target '{target_name}' completed successfully.")
-    else:
-        logger.error(f"❌ Target '{target_name}' failed with exit code: {exit_code}")
-    
-    return success, project_name, target_name
-
-def main():
-    # Configure main process logging
-    logging.basicConfig(
-        level=logging.INFO,
-        format="[%(levelname)s] %(message)s",
-        stream=sys.stdout
-    )
-    logger = logging.getLogger("Main")
-
-    # Set up command line argument parsing
-    parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Fuzz Testing Tool")
-    parser.add_argument("project_list", help="File path containing list of project names")
-    parser.add_argument("--oss-fuzz-dir", type=Path, required=True, help="Path to the oss-fuzz directory (e.g., /path/to/oss-fuzz)")
-    parser.add_argument("--timeout", type=int, default=60, help="Timeout duration per Fuzz Target (seconds)")
-    parser.add_argument("--workers", type=int, default=cpu_count(), help="Number of parallel worker processes")
-    args = parser.parse_args()
-
-    OSS_FUZZ_DIR = args.oss_fuzz_dir.resolve()
-
-    # 1. Read project list file
-    try:
-        project_path = Path(args.project_list)
-        with open(project_path, "r", encoding="utf-8") as f:
-            projects = [line.strip() for line in f if line.strip()]
-        logger.info(f"📋 Loaded {len(projects)} projects from {project_path.name}.")
-    except FileNotFoundError:
-        logger.error(f"❌ Project list file not found: {args.project_list}")
-        sys.exit(1)
-    except (OSError, PermissionError) as e:
-        logger.exception(f"💥 Error occurred while reading project list: {e}")
-        sys.exit(1)
-
-    # 2. Discovery phase: Collect all fuzz targets
-    logger.info("\n" + "=" * 20 + " Phase 1: Discover all Fuzz Targets " + "=" * 20)
-    all_fuzz_tasks: list[tuple[str, str]] = []  # Store (project, target) tuples
-    try:
-        original_cwd = Path.cwd()  # Save current working directory
-        os.chdir(OSS_FUZZ_DIR)     # Switch to OSS-Fuzz directory
-        for project_name in projects:
-            targets = discover_targets(project_name, OSS_FUZZ_DIR, logger)
-
-            if targets:
-                logger.info(f"🔍 Discovered {len(targets)} targets in project '{project_name}': {', '.join(targets)}")
-                for target in targets:
-                    all_fuzz_tasks.append((project_name, target))
-            else:
-                logger.warning(f"⚠️ No Fuzz Targets found in project '{project_name}'.")
-        os.chdir(original_cwd)  # Restore original working directory
-    except FileNotFoundError:
-        logger.error(f"❌ OSS-Fuzz directory does not exist: {OSS_FUZZ_DIR}")
-        sys.exit(1)
-    except Exception as e:
-        logger.exception(f"💥 Unknown error occurred during discovery phase: {e}")
-        sys.exit(1)
-
-    # Check if any valid targets were found
-    if not all_fuzz_tasks:
-        logger.info("🤷 No executable Fuzz Targets found. Program exits.")
-        sys.exit(0)
-
-    # 3. Execution phase: Parallel fuzz testing
-    logger.info(f"\n✅ Discovery phase completed. Found a total of {len(all_fuzz_tasks)} fuzz tasks.")
-    logger.info("=" * 20 + " Phase 2: Parallel Fuzzing Execution " + "=" * 23)
-    logger.info(f"🚀 Starting parallel testing with {args.workers} worker processes (timeout per target: {args.timeout}s)...")
-
-    # 准备任务列表
-    tasks = []
-    for project, target in all_fuzz_tasks:
-        task_logger = setup_task_logger(project, target, OSS_FUZZ_DIR)
-        if task_logger:
-            # 每个任务包含: (project, target, timeout, oss_fuzz_dir, logger)
-            task = (project, target, args.timeout, OSS_FUZZ_DIR, task_logger)
-            tasks.append(task)
-            task_logger.info(f"🚀 Starting test -> Project: {project}, Target: {target}")
-
-    # 使用 parallel_subprocess 并行执行
-    results = parallel_subprocess(
-        iterable=tasks,
-        jobs=args.workers,
-        subprocess_creator=create_subprocess,
-        on_exit=on_process_exit,
-        use_tqdm=True,
-        tqdm_leave=True,
-        tqdm_msg="Running fuzz targets"
-    )
-
-    # 4. Result summary and reporting
-    logger.info("\n" + "=" * 20 + " Phase 3: Result Summary " + "=" * 28)
-    success_count = 0
-    failed_tasks = []
-    
-    for task, result in results.items():
-        success, project, target = result
-        if success:
-            success_count += 1
-        else:
-            failed_tasks.append((project, target))
-    
-    total_tasks = len(all_fuzz_tasks)
-    failed_count = len(failed_tasks)
-
-    # Output statistical summary
-    logger.info(f"📊 Fuzzing completed: Success {success_count}/{total_tasks}, Failed {failed_count}/{total_tasks}")
-    if failed_tasks:
-        logger.error("❌ The following Fuzz Targets failed:")
-        for project, target in failed_tasks:
-            logger.error(f"  - Project: {project}, Target: {target}")  # List detailed failures
-
-
-if __name__ == "__main__":
-    try:
-        main()
-    except KeyboardInterrupt:
-        print("\n🛑 Operation interrupted by user.")
-        sys.exit(1)
-    except Exception as e:
-        print(f"\n💥 Fatal error in main program: {e}")
-        sys.exit(1)
\ No newline at end of file

From 12666062186de1b0ad0eabb901a5884d91e75380 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Wed, 6 Aug 2025 01:12:50 +0000
Subject: [PATCH 090/134] python template

---
 fuzz/collect_fuzz_python.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index c772975..98fad4f 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -409,7 +409,7 @@ def main(
     repo_id: str = "data/valid_projects.txt",
     repo_root: str = "fuzz/oss-fuzz/projects/",
     timeout: int = 60,
-    jobs: int = 80,
+    jobs: int = 4,
     pipeline: str = "all",
     n_fuzz: int = 100,
     strategy: str = "shuffle",

From d42927feef0162e2a066f760b571327796b1db91 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Wed, 6 Aug 2025 01:13:12 +0000
Subject: [PATCH 091/134] python template

---
 fuzz/collect_fuzz_python.py | 98 +++++++++++++++++++++----------------
 1 file changed, 57 insertions(+), 41 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index 98fad4f..9e46db1 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -86,7 +86,7 @@ def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]:
         list[str]: List of target names
     """
     out_dir = oss_fuzz_dir / "build" / "out" / project_name
-    targets: list[str] = []  # Fix: Add type annotation
+    targets: list[str] = []
     
     if not out_dir.is_dir():
         logging.warning(f"Build output directory for {project_name} does not exist")
@@ -159,7 +159,7 @@ def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60):
     
     # Create target mapping
     target_map = {repo: targets for repo, targets in zip(repos, targets_list)}
-    all_targets: list[tuple[str, str]] = [  # Fix: Rename variable to avoid conflict
+    all_targets: list[tuple[str, str]] = [
         (k, v) for k, vs in target_map.items() for v in vs
     ]
     
@@ -174,7 +174,7 @@ def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60):
 
 def generate_test_template(target_name: str, repo_path: str):
     """
-    Generate test template for a single target
+    Generate Python test template for a single target
     
     Args:
         target_name (str): Target name
@@ -185,18 +185,37 @@ def generate_test_template(target_name: str, repo_path: str):
     """
     template_dir = pjoin(repo_path, "tests-gen")
     os.makedirs(template_dir, exist_ok=True)
-    template_path = pjoin(template_dir, f"{target_name}.rs")
-    
-    # Basic test template - use byte array instead of byte characters
-    template = f"""
-    #[test]
-    fn test_{target_name}() {{
-        // Test logic will be generated here
-        let input = b""; // Fuzzing input will be replaced here
-        let result = process_input(&input);
-        assert!(result.is_ok());
-    }}
-    """
+    
+    # Use .py extension for Python test files
+    template_path = pjoin(template_dir, f"{target_name}.py")
+    
+    # Basic Python test template
+    template = f"""#!/usr/bin/env python3
+import sys
+import os
+import unittest
+
+# Add the project directory to the Python path
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
+
+# Import the function to test
+from {target_name.replace('fuzz_', '')} import process_input
+
+class Test{target_name.capitalize()}(unittest.TestCase):
+    def test_generated(self):
+        \"\"\"Test generated from fuzzing input\"\"\"
+        # Fuzzing input will be replaced here
+        input_data = b""
+        
+        # Call the function under test
+        result = (input_data)
+        
+        # Add assertions based on expected behavior
+        self.assertIsNotNone(result)
+
+if __name__ == '__main__':
+    unittest.main()
+"""
     
     with open(template_path, "w") as f:
         f.write(template)
@@ -224,7 +243,7 @@ def _transform_repo(repo: str):
 
 def escape_special_chars(input_data: str) -> str:
     """
-    Escape special characters in input data
+    Escape special characters in input data for Python byte strings
     
     Args:
         input_data (str): Raw input data
@@ -232,20 +251,13 @@ def escape_special_chars(input_data: str) -> str:
     Returns:
         str: Input data with escaped characters
     """
-    # Escape backslashes and double quotes
-    escaped = input_data.replace('\\', '\\\\').replace('"', '\\"')
-    
-    # Handle non-ASCII characters
-    if any(ord(c) > 127 for c in escaped):
-        # If containing non-ASCII characters, use byte array representation
-        byte_array = [str(b) for b in input_data.encode()]
-        return f"let input = vec![{', '.join(byte_array)}];"
-    
-    return f"let input = b\"{escaped}\";"
+    # For Python, we can use repr() to safely represent byte strings
+    # This will handle all special characters and non-ASCII bytes
+    return repr(input_data.encode('latin-1', 'replace'))
 
 def substitute_input(template: str, input_data: str, idx: int, target_name: str) -> str:
     """
-    Replace fuzzing input into test template
+    Replace fuzzing input into Python test template
     
     Args:
         template (str): Template content
@@ -256,19 +268,19 @@ def substitute_input(template: str, input_data: str, idx: int, target_name: str)
     Returns:
         str: Test code after substitution
     """
-    # Escape special characters and handle non-ASCII characters
+    # Escape special characters for Python
     escaped_input = escape_special_chars(input_data)
     
     # Replace input placeholder
     new_template = template.replace(
-        'let input = b""; // Fuzzing input will be replaced here',
-        escaped_input
+        'input_data = b""',
+        f'input_data = {escaped_input}'
     )
     
-    # Replace function name to avoid duplication
+    # Replace test method name to avoid duplication
     return new_template.replace(
-        f"fn test_{target_name}()",
-        f"fn test_{target_name}_{idx}()"
+        f"def test_generated(self):",
+        f"def test_{idx}(self):"
     )
 
 def has_similar(selected: list[str], x: str, thresh: float = 0.8) -> bool:
@@ -309,8 +321,9 @@ def substitute_one_repo(
     template_dir = pjoin(repo, "tests-gen")
     input_dir = pjoin(repo, "fuzz_inputs")
     
-    for target_name in targets:  # Use target_name as loop variable
-        template_path = pjoin(template_dir, f"{target_name}.rs")
+    for target_name in targets:
+        # Use .py extension for Python test files
+        template_path = pjoin(template_dir, f"{target_name}.py")
         input_path = pjoin(input_dir, target_name)
         
         try:
@@ -352,17 +365,20 @@ def substitute_one_repo(
             
             # Generate test cases
             tests = [
-                substitute_input(template, input_data, i, target_name)  # Pass target_name
+                substitute_input(template, input_data, i, target_name)
                 for i, input_data in enumerate(inputs)
             ]
             
-            # Write generated test file
-            generated_path = pjoin(template_dir, f"{target_name}.inputs.rs")
+            # Write generated test file with .py extension
+            generated_path = pjoin(template_dir, f"{target_name}.inputs.py")
             with open(generated_path, "w") as f:
                 f.write("\n".join(tests))
                 
-            # Format code
-            subprocess.run(["rustfmt", generated_path], check=False)
+            # Format Python code (optional)
+            try:
+                subprocess.run(["black", generated_path], check=False)
+            except FileNotFoundError:
+                logging.warning("Black formatter not found, skipping formatting")
             
         except Exception as e:
             logging.error(f"Error processing {target_name}: {e}")
@@ -457,7 +473,7 @@ def main(
     elif pipeline == "all":
         build_image(repos, jobs)
         build_fuzzer(repos, jobs)
-        transform_repos(repos, jobs)  # Key addition: Template generation
+        transform_repos(repos, jobs)  # Generate test templates
         fuzz_repos(repos, jobs, timeout)
         testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh)
     else:

From f8941f1c9c444fa19354c05cbedee3773466a726 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Wed, 6 Aug 2025 23:26:29 +0000
Subject: [PATCH 092/134] correct the template

---
 fuzz/collect_fuzz_python.py | 111 +++++++++++++++++++-----------------
 1 file changed, 60 insertions(+), 51 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index 9e46db1..6a974d5 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -175,51 +175,53 @@ def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60):
 def generate_test_template(target_name: str, repo_path: str):
     """
     Generate Python test template for a single target
-    
+
     Args:
         target_name (str): Target name
         repo_path (str): Repository path
-        
+
     Returns:
         str: Template file path
     """
     template_dir = pjoin(repo_path, "tests-gen")
     os.makedirs(template_dir, exist_ok=True)
-    
-    # Use .py extension for Python test files
+
+    # Ensure __init__.py exists
+    init_path = pjoin(template_dir, "__init__.py")
+    if not os.path.exists(init_path):
+        with open(init_path, "w") as f:
+            f.write("")
+
     template_path = pjoin(template_dir, f"{target_name}.py")
-    
-    # Basic Python test template
+
+    # Python test template with placeholder
     template = f"""#!/usr/bin/env python3
 import sys
 import os
 import unittest
 
-# Add the project directory to the Python path
-sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
+# Add the parent directory to the Python path
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 
 # Import the function to test
-from {target_name.replace('fuzz_', '')} import process_input
+try:
+    from {target_name} import TestOneInput as TestClass
+except ImportError:
+    from {target_name} import TestInput as TestClass
 
 class Test{target_name.capitalize()}(unittest.TestCase):
     def test_generated(self):
         \"\"\"Test generated from fuzzing input\"\"\"
-        # Fuzzing input will be replaced here
-        input_data = b""
-        
-        # Call the function under test
-        result = (input_data)
-        
-        # Add assertions based on expected behavior
+        input_data = b""  # FUZZ_PLACEHOLDER
+        result = TestClass(input_data)
         self.assertIsNotNone(result)
 
 if __name__ == '__main__':
     unittest.main()
 """
-    
     with open(template_path, "w") as f:
         f.write(template)
-    
+
     return template_path
 
 def transform_repos(repos: list[str], jobs: int):
@@ -309,49 +311,45 @@ def substitute_one_repo(
 ):
     """
     Process a single repository, replace fuzzing inputs into test templates
-    
-    Args:
-        repo (str): Repository path
-        targets (list[str]): List of targets
-        n_fuzz (int): Number of inputs to use
-        strategy (str): Selection strategy
-        max_len (int): Maximum length
-        sim_thresh (float): Similarity threshold
     """
     template_dir = pjoin(repo, "tests-gen")
     input_dir = pjoin(repo, "fuzz_inputs")
-    
+
+    # Ensure __init__.py exists
+    init_path = pjoin(template_dir, "__init__.py")
+    if not os.path.exists(init_path):
+        with open(init_path, "w") as f:
+            f.write("")
+
     for target_name in targets:
-        # Use .py extension for Python test files
         template_path = pjoin(template_dir, f"{target_name}.py")
         input_path = pjoin(input_dir, target_name)
-        
+
         try:
             if not os.path.exists(template_path):
                 logging.warning(f"Template file not found: {template_path}")
                 continue
-                
+
             if not os.path.exists(input_path):
                 logging.warning(f"Input file not found: {input_path}")
                 continue
-                
+
             with open(template_path) as f_template:
                 template = f_template.read()
-                
+
             with open(input_path, "r") as f_input:
                 all_inputs = [line.strip() for line in f_input if line.strip()]
-            
+
             if not all_inputs:
                 logging.warning(f"No valid inputs found for {target_name}")
                 continue
-                
+
             logging.info(f"Loaded {len(all_inputs)} inputs for {target_name}")
 
-            # Input selection strategy
+            # Input selection
             if strategy == "shuffle":
                 random.shuffle(all_inputs)
-                inputs = list(islice(
-                    (x for x in all_inputs if len(x) < max_len), n_fuzz))
+                inputs = list(islice((x for x in all_inputs if len(x) < max_len), n_fuzz))
             elif strategy == "reverse":
                 inputs = []
                 for x in reversed(all_inputs):
@@ -362,24 +360,35 @@ def substitute_one_repo(
                     inputs.append(x)
             else:
                 inputs = all_inputs[:n_fuzz]
-            
-            # Generate test cases
-            tests = [
-                substitute_input(template, input_data, i, target_name)
-                for i, input_data in enumerate(inputs)
-            ]
-            
-            # Write generated test file with .py extension
+
+            # Split header and method body
+            if "def test_generated(self):" not in template:
+                logging.error(f"Template format error in {template_path}")
+                continue
+
+            header, method = template.split("def test_generated(self):", 1)
+
+            # Generate multiple methods
+            test_methods = []
+            for i, input_data in enumerate(inputs):
+                escaped_input = escape_special_chars(input_data)
+                method_code = f"    def test_{i}(self):" + method
+                method_code = method_code.replace('input_data = b""', f"input_data = {escaped_input}")
+                test_methods.append(method_code.strip())
+
+            final_code = header + "\n\n" + "\n\n".join(test_methods)
+
+            # Write to inputs.py
             generated_path = pjoin(template_dir, f"{target_name}.inputs.py")
             with open(generated_path, "w") as f:
-                f.write("\n".join(tests))
-                
-            # Format Python code (optional)
+                f.write(final_code)
+
+            # Format (optional)
             try:
                 subprocess.run(["black", generated_path], check=False)
             except FileNotFoundError:
                 logging.warning("Black formatter not found, skipping formatting")
-            
+
         except Exception as e:
             logging.error(f"Error processing {target_name}: {e}")
 
@@ -422,10 +431,10 @@ def testgen_repos(
         ))
 
 def main(
-    repo_id: str = "data/valid_projects.txt",
+    repo_id: str = "data/valid_projects3.txt",
     repo_root: str = "fuzz/oss-fuzz/projects/",
     timeout: int = 60,
-    jobs: int = 4,
+    jobs: int = 8,
     pipeline: str = "all",
     n_fuzz: int = 100,
     strategy: str = "shuffle",

From b357a5296f9fcac8adad55b3e966cf0e26c97d08 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Thu, 7 Aug 2025 02:07:16 +0000
Subject: [PATCH 093/134] ver2 wrong template

---
 fuzz/collect_fuzz_python.py | 62 ++++++++++++++++++++++++++++++-------
 1 file changed, 51 insertions(+), 11 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index 6a974d5..e1f92de 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -301,6 +301,8 @@ def similar(a, b):
         return SequenceMatcher(None, a, b).ratio()
     return any(similar(x, y) > thresh for y in selected)
 
+import re
+
 def substitute_one_repo(
     repo: str,
     targets: list[str],
@@ -361,29 +363,67 @@ def substitute_one_repo(
             else:
                 inputs = all_inputs[:n_fuzz]
 
-            # Split header and method body
-            if "def test_generated(self):" not in template:
-                logging.error(f"Template format error in {template_path}")
+            # Extract structure from template
+            match = re.search(r"(class\s+Test\w+\(unittest\.TestCase\):)", template)
+            if not match:
+                logging.error(f"Class definition not found in template: {template_path}")
                 continue
 
-            header, method = template.split("def test_generated(self):", 1)
+            class_def_index = match.start()
+            before_class = template[:class_def_index]
+            class_and_after = template[class_def_index:]
+
+            method_match = re.search(r"def\s+test_generated\(self\):", class_and_after)
+            if not method_match:
+                logging.error(f"test_generated method not found in template: {template_path}")
+                continue
 
-            # Generate multiple methods
+            method_start = method_match.end()
+            class_header = class_and_after[:method_start]
+            method_indent_block = class_and_after[method_start:]
+
+            method_lines = method_indent_block.splitlines()
+            method_body = []
+            footer_lines = []
+            for line in method_lines:
+                if line.strip() == "":
+                    continue
+                if not line.startswith("        "):  # outside method block
+                    footer_lines.append(line)
+                elif not footer_lines:  # still inside method
+                    method_body.append(line)
+
+            method_body_str = "\n".join(method_body)
+            footer_str = "\n".join(footer_lines)
+
+            # Build all test methods
             test_methods = []
             for i, input_data in enumerate(inputs):
                 escaped_input = escape_special_chars(input_data)
-                method_code = f"    def test_{i}(self):" + method
-                method_code = method_code.replace('input_data = b""', f"input_data = {escaped_input}")
-                test_methods.append(method_code.strip())
+                test_func = f"    def test_{i}(self):\n"
+                test_func += "\n".join(
+                    "        " + line.lstrip().replace('input_data = b""', f"input_data = {escaped_input}")
+                    for line in method_body if line.strip()
+                )
+                test_methods.append(test_func)
+
+            if not test_methods:
+                # Fallback: generate dummy method to avoid syntax error
+                test_methods = ["    def test_placeholder(self):\n        self.assertTrue(True)"]
+
+            final_code = before_class.rstrip() + "\n" + class_header.rstrip() + "\n\n"
+            final_code += "\n\n".join(test_methods).rstrip() + "\n"
+
+            if footer_str.strip():
+                final_code += "\n\n" + footer_str.strip() + "\n"
 
-            final_code = header + "\n\n" + "\n\n".join(test_methods)
 
-            # Write to inputs.py
+            # Write to output
             generated_path = pjoin(template_dir, f"{target_name}.inputs.py")
             with open(generated_path, "w") as f:
                 f.write(final_code)
 
-            # Format (optional)
+            # Format with black
             try:
                 subprocess.run(["black", generated_path], check=False)
             except FileNotFoundError:

From 1637e23e5869c7db92135571f0291b642555d107 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Thu, 7 Aug 2025 02:24:55 +0000
Subject: [PATCH 094/134] ok

---
 fuzz/collect_fuzz_python.py | 88 ++++++++++++++-----------------------
 1 file changed, 33 insertions(+), 55 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index e1f92de..16ae813 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -212,9 +212,8 @@ def generate_test_template(target_name: str, repo_path: str):
 class Test{target_name.capitalize()}(unittest.TestCase):
     def test_generated(self):
         \"\"\"Test generated from fuzzing input\"\"\"
-        input_data = b""  # FUZZ_PLACEHOLDER
+        input_data = b""  
         result = TestClass(input_data)
-        self.assertIsNotNone(result)
 
 if __name__ == '__main__':
     unittest.main()
@@ -313,6 +312,7 @@ def substitute_one_repo(
 ):
     """
     Process a single repository, replace fuzzing inputs into test templates
+    and generate {target_name}.inputs.py files.
     """
     template_dir = pjoin(repo, "tests-gen")
     input_dir = pjoin(repo, "fuzz_inputs")
@@ -324,21 +324,13 @@ def substitute_one_repo(
             f.write("")
 
     for target_name in targets:
-        template_path = pjoin(template_dir, f"{target_name}.py")
         input_path = pjoin(input_dir, target_name)
 
         try:
-            if not os.path.exists(template_path):
-                logging.warning(f"Template file not found: {template_path}")
-                continue
-
             if not os.path.exists(input_path):
                 logging.warning(f"Input file not found: {input_path}")
                 continue
 
-            with open(template_path) as f_template:
-                template = f_template.read()
-
             with open(input_path, "r") as f_input:
                 all_inputs = [line.strip() for line in f_input if line.strip()]
 
@@ -363,62 +355,47 @@ def substitute_one_repo(
             else:
                 inputs = all_inputs[:n_fuzz]
 
-            # Extract structure from template
-            match = re.search(r"(class\s+Test\w+\(unittest\.TestCase\):)", template)
-            if not match:
-                logging.error(f"Class definition not found in template: {template_path}")
-                continue
+            # Header
+            file_header = f"""import sys
+import os
+import unittest
 
-            class_def_index = match.start()
-            before_class = template[:class_def_index]
-            class_and_after = template[class_def_index:]
+# 将项目目录加入 Python 路径，确保能导入上层模块
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 
-            method_match = re.search(r"def\s+test_generated\(self\):", class_and_after)
-            if not method_match:
-                logging.error(f"test_generated method not found in template: {template_path}")
-                continue
+try:
+    from {target_name} import TestOneInput as TestClass
+except ImportError:
+    from {target_name} import TestInput as TestClass
+
+
+class Test{target_name.capitalize()}(unittest.TestCase):"""
 
-            method_start = method_match.end()
-            class_header = class_and_after[:method_start]
-            method_indent_block = class_and_after[method_start:]
-
-            method_lines = method_indent_block.splitlines()
-            method_body = []
-            footer_lines = []
-            for line in method_lines:
-                if line.strip() == "":
-                    continue
-                if not line.startswith("        "):  # outside method block
-                    footer_lines.append(line)
-                elif not footer_lines:  # still inside method
-                    method_body.append(line)
-
-            method_body_str = "\n".join(method_body)
-            footer_str = "\n".join(footer_lines)
-
-            # Build all test methods
+            # Method body template
+            method_body_template = [
+                '"""Test generated from fuzzing input"""',
+                'input_data = b""',
+                'result = TestClass(input_data)',
+            ]
+
+            # Generate test methods
             test_methods = []
             for i, input_data in enumerate(inputs):
                 escaped_input = escape_special_chars(input_data)
                 test_func = f"    def test_{i}(self):\n"
-                test_func += "\n".join(
-                    "        " + line.lstrip().replace('input_data = b""', f"input_data = {escaped_input}")
-                    for line in method_body if line.strip()
-                )
+                for line in method_body_template:
+                    replaced_line = line.replace('input_data = b""', f"input_data = {escaped_input}")
+                    test_func += f"        {replaced_line}\n"
                 test_methods.append(test_func)
 
             if not test_methods:
-                # Fallback: generate dummy method to avoid syntax error
                 test_methods = ["    def test_placeholder(self):\n        self.assertTrue(True)"]
 
-            final_code = before_class.rstrip() + "\n" + class_header.rstrip() + "\n\n"
-            final_code += "\n\n".join(test_methods).rstrip() + "\n"
-
-            if footer_str.strip():
-                final_code += "\n\n" + footer_str.strip() + "\n"
+            # Combine full file
+            final_code = file_header + "\n\n" + "\n\n".join(test_methods)
+            final_code += "\n\nif __name__ == '__main__':\n    unittest.main()\n"
 
-
-            # Write to output
+            # Write output file
             generated_path = pjoin(template_dir, f"{target_name}.inputs.py")
             with open(generated_path, "w") as f:
                 f.write(final_code)
@@ -432,6 +409,7 @@ def substitute_one_repo(
         except Exception as e:
             logging.error(f"Error processing {target_name}: {e}")
 
+
 def testgen_repos(
     repos: list[str],
     jobs: int,
@@ -471,10 +449,10 @@ def testgen_repos(
         ))
 
 def main(
-    repo_id: str = "data/valid_projects3.txt",
+    repo_id: str = "data/valid_projects.txt",
     repo_root: str = "fuzz/oss-fuzz/projects/",
     timeout: int = 60,
-    jobs: int = 8,
+    jobs: int = 4,
     pipeline: str = "all",
     n_fuzz: int = 100,
     strategy: str = "shuffle",

From 85b6ed907a274df3177c9ac7b20cd64cb8c18f53 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Mon, 11 Aug 2025 16:11:41 +0000
Subject: [PATCH 095/134] testgen file change into copy the original and then
 add input_data =b""

---
 fuzz/collect_fuzz_python.py | 154 +++++++++++++++---------------------
 1 file changed, 64 insertions(+), 90 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index 16ae813..a509b08 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -137,6 +137,7 @@ def fuzz_one_target(target: tuple[str, str], timeout: int):
     except Exception as e:
         logging.error(f"Error starting fuzzer: {e}")
         return None
+    
 
 def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60):
     """
@@ -300,7 +301,7 @@ def similar(a, b):
         return SequenceMatcher(None, a, b).ratio()
     return any(similar(x, y) > thresh for y in selected)
 
-import re
+
 
 def substitute_one_repo(
     repo: str,
@@ -311,103 +312,76 @@ def substitute_one_repo(
     sim_thresh: float,
 ):
     """
-    Process a single repository, replace fuzzing inputs into test templates
-    and generate {target_name}.inputs.py files.
+    从原 fuzz target 复制文件，按 fuzz input 生成多个 testgen 文件。
     """
-    template_dir = pjoin(repo, "tests-gen")
     input_dir = pjoin(repo, "fuzz_inputs")
-
-    # Ensure __init__.py exists
-    init_path = pjoin(template_dir, "__init__.py")
-    if not os.path.exists(init_path):
-        with open(init_path, "w") as f:
-            f.write("")
+    template_dir = pjoin(repo, "tests-gen")
+    os.makedirs(template_dir, exist_ok=True)
 
     for target_name in targets:
-        input_path = pjoin(input_dir, target_name)
-
-        try:
-            if not os.path.exists(input_path):
-                logging.warning(f"Input file not found: {input_path}")
-                continue
-
-            with open(input_path, "r") as f_input:
-                all_inputs = [line.strip() for line in f_input if line.strip()]
-
-            if not all_inputs:
-                logging.warning(f"No valid inputs found for {target_name}")
-                continue
-
-            logging.info(f"Loaded {len(all_inputs)} inputs for {target_name}")
-
-            # Input selection
-            if strategy == "shuffle":
-                random.shuffle(all_inputs)
-                inputs = list(islice((x for x in all_inputs if len(x) < max_len), n_fuzz))
-            elif strategy == "reverse":
-                inputs = []
-                for x in reversed(all_inputs):
-                    if len(inputs) >= n_fuzz:
-                        break
-                    if len(x) > max_len or has_similar(inputs, x, sim_thresh):
-                        continue
-                    inputs.append(x)
-            else:
-                inputs = all_inputs[:n_fuzz]
-
-            # Header
-            file_header = f"""import sys
-import os
-import unittest
-
-# 将项目目录加入 Python 路径，确保能导入上层模块
-sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
-
-try:
-    from {target_name} import TestOneInput as TestClass
-except ImportError:
-    from {target_name} import TestInput as TestClass
-
+        source_file = pjoin(repo, f"{target_name}.py")
+        if not os.path.exists(source_file):
+            logging.warning(f"Source file not found: {source_file}")
+            continue
 
-class Test{target_name.capitalize()}(unittest.TestCase):"""
-
-            # Method body template
-            method_body_template = [
-                '"""Test generated from fuzzing input"""',
-                'input_data = b""',
-                'result = TestClass(input_data)',
-            ]
-
-            # Generate test methods
-            test_methods = []
-            for i, input_data in enumerate(inputs):
-                escaped_input = escape_special_chars(input_data)
-                test_func = f"    def test_{i}(self):\n"
-                for line in method_body_template:
-                    replaced_line = line.replace('input_data = b""', f"input_data = {escaped_input}")
-                    test_func += f"        {replaced_line}\n"
-                test_methods.append(test_func)
-
-            if not test_methods:
-                test_methods = ["    def test_placeholder(self):\n        self.assertTrue(True)"]
-
-            # Combine full file
-            final_code = file_header + "\n\n" + "\n\n".join(test_methods)
-            final_code += "\n\nif __name__ == '__main__':\n    unittest.main()\n"
-
-            # Write output file
-            generated_path = pjoin(template_dir, f"{target_name}.inputs.py")
-            with open(generated_path, "w") as f:
-                f.write(final_code)
+        input_path = pjoin(input_dir, target_name)
+        if not os.path.exists(input_path):
+            logging.warning(f"Input file not found: {input_path}")
+            continue
+
+        with open(input_path, "r") as f_input:
+            all_inputs = [line.strip() for line in f_input if line.strip()]
+
+        if not all_inputs:
+            logging.warning(f"No valid inputs found for {target_name}")
+            continue
+
+        logging.info(f"Loaded {len(all_inputs)} inputs for {target_name}")
+
+        # 策略选择输入
+        if strategy == "shuffle":
+            random.shuffle(all_inputs)
+            inputs = list(islice((x for x in all_inputs if len(x) < max_len), n_fuzz))
+        elif strategy == "reverse":
+            inputs = []
+            for x in reversed(all_inputs):
+                if len(inputs) >= n_fuzz:
+                    break
+                if len(x) > max_len or has_similar(inputs, x, sim_thresh):
+                    continue
+                inputs.append(x)
+        else:
+            inputs = all_inputs[:n_fuzz]
+
+        # 每个 fuzz input 生成一个单独的文件
+        for idx, fuzz_input in enumerate(inputs, start=1):
+            with open(source_file, "r") as f_src:
+                code = f_src.read()
+
+            # 删除 main 和 __main__ 块
+            code = re.sub(r"\nif __name__ == ['\"]__main__['\"]:.*", "", code, flags=re.S)
+            code = re.sub(r"\n\s*main\s*\(.*?\)", "", code)
+
+            # 找到 TestInput / TestOneInput 并改成 test_{idx}
+            code = re.sub(r"\bTestOneInput\b", f"test_{idx}", code)
+            code = re.sub(r"\bTestInput\b", f"test_{idx}", code)
+
+            # 在 test_{idx} 函数定义后插入 data 赋值
+           
+            code = re.sub(
+                rf"(def\s+test_{idx}\s*\([^)]*\)\s*:\s*\n)(\s*)",  # 新增捕获组匹配函数体首行缩进
+                rf"\1\2data = {escape_special_chars(fuzz_input)}\n\2",  # 复用缩进
+                code,
+            )
+            out_path = pjoin(template_dir, f"{target_name}.testgen_{idx}.py")
+            with open(out_path, "w") as f_out:
+                f_out.write(code)
 
-            # Format with black
             try:
-                subprocess.run(["black", generated_path], check=False)
+                subprocess.run(["black", out_path], check=False)
             except FileNotFoundError:
                 logging.warning("Black formatter not found, skipping formatting")
 
-        except Exception as e:
-            logging.error(f"Error processing {target_name}: {e}")
 
 
 def testgen_repos(
@@ -449,9 +423,9 @@ def testgen_repos(
         ))
 
 def main(
-    repo_id: str = "data/valid_projects.txt",
+    repo_id: str = "data/valid_projects3.txt",
     repo_root: str = "fuzz/oss-fuzz/projects/",
-    timeout: int = 60,
+    timeout: int = 10,
     jobs: int = 4,
     pipeline: str = "all",
     n_fuzz: int = 100,

From 618d156e3029170a5141f6b7d9b926e09592d045 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 12 Aug 2025 05:21:06 +0000
Subject: [PATCH 096/134] only read b' ' inputs

---
 fuzz/collect_fuzz_python.py | 60 ++++++++++++++++++++++++++++++++-----
 1 file changed, 53 insertions(+), 7 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index a509b08..3084053 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -329,8 +329,19 @@ def substitute_one_repo(
             logging.warning(f"Input file not found: {input_path}")
             continue
 
-        with open(input_path, "r") as f_input:
-            all_inputs = [line.strip() for line in f_input if line.strip()]
+        # 修改1：过滤警告行，只提取有效的字节字符串
+        all_inputs = []
+        with open(input_path, "rb") as f_input:  # 二进制模式读取
+            for line in f_input:
+                try:
+                    decoded_line = line.decode('utf-8', errors='replace').strip()
+                    # 只提取以 b' 开头的有效字节字符串
+                    if decoded_line.startswith(('b"', "b'")):
+                        all_inputs.append(decoded_line)
+                except UnicodeDecodeError:
+                    # 处理无法解码的行
+                    logging.warning(f"Skipping invalid input line in {input_path}")
+                    continue
 
         if not all_inputs:
             logging.warning(f"No valid inputs found for {target_name}")
@@ -365,14 +376,17 @@ def substitute_one_repo(
             # 找到 TestInput / TestOneInput 并改成 test_{idx}
             code = re.sub(r"\bTestOneInput\b", f"test_{idx}", code)
             code = re.sub(r"\bTestInput\b", f"test_{idx}", code)
-
-            # 在 test_{idx} 函数定义后插入 data 赋值
-           
+            def insert_fuzz_input(match):
+                indent = match.group(2)
+                # 使用原始字符串避免转义解析
+                return f"{match.group(1)}{indent}data = {fuzz_input}\n{indent}"
+            # 修改2：直接使用原始字节字符串，无需额外转义
             code = re.sub(
-                rf"(def\s+test_{idx}\s*\([^)]*\)\s*:\s*\n)(\s*)",  # 新增捕获组匹配函数体首行缩进
-                rf"\1\2data = {escape_special_chars(fuzz_input)}\n\2",  # 复用缩进
+                rf"(def\s+test_{idx}\s*\([^)]*\)\s*:\s*\n)(\s*)",
+                insert_fuzz_input,  # 使用回调函数
                 code,
             )
+            
             out_path = pjoin(template_dir, f"{target_name}.testgen_{idx}.py")
             with open(out_path, "w") as f_out:
                 f_out.write(code)
@@ -381,8 +395,40 @@ def substitute_one_repo(
                 subprocess.run(["black", out_path], check=False)
             except FileNotFoundError:
                 logging.warning("Black formatter not found, skipping formatting")
+       
+        for idx, fuzz_input in enumerate(inputs, start=1):
+            with open(source_file, "r") as f_src:
+                code = f_src.read()
+
+            # 删除 main 和 __main__ 块（保持不变）
+            code = re.sub(r"\nif __name__ == ['\"]__main__['\"]:.*", "", code, flags=re.S)
+            code = re.sub(r"\n\s*main\s*\(.*?\)", "", code)
+
+            # 重命名测试函数（保持不变）
+            code = re.sub(r"\bTestOneInput\b", f"test_{idx}", code)
+            code = re.sub(r"\bTestInput\b", f"test_{idx}", code)
 
+            # ==== 核心修复：使用 lambda 函数绕过转义解析 ====
+            def insert_fuzz_input(match):
+                indent = match.group(2)  # 提取原缩进
+                return f"{match.group(1)}{indent}data = {fuzz_input}\n{indent}"
 
+            code = re.sub(
+                rf"(def\s+test_{idx}\s*\([^)]*\)\s*:\s*\n)(\s*)",
+                insert_fuzz_input,  # 替换为函数引用
+                code,
+            )
+            
+            # 写入文件（保持不变）
+            out_path = pjoin(template_dir, f"{target_name}.testgen_{idx}.py")
+            with open(out_path, "w") as f_out:
+                f_out.write(code)
+
+            # 格式化（保持不变）
+            try:
+                subprocess.run(["black", out_path], check=False)
+            except FileNotFoundError:
+                logging.warning("Black formatter not found, skipping formatting")
 
 def testgen_repos(
     repos: list[str],

From 5a7f51366067061545d38f311b336fe67076bef0 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 12 Aug 2025 06:12:28 +0000
Subject: [PATCH 097/134] remove transform

---
 fuzz/collect_fuzz_python.py | 138 ++++++++++++++++++------------------
 1 file changed, 69 insertions(+), 69 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index 3084053..8783cfc 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -173,75 +173,75 @@ def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60):
     # Execute fuzzing in parallel
     parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None)
 
-def generate_test_template(target_name: str, repo_path: str):
-    """
-    Generate Python test template for a single target
-
-    Args:
-        target_name (str): Target name
-        repo_path (str): Repository path
-
-    Returns:
-        str: Template file path
-    """
-    template_dir = pjoin(repo_path, "tests-gen")
-    os.makedirs(template_dir, exist_ok=True)
-
-    # Ensure __init__.py exists
-    init_path = pjoin(template_dir, "__init__.py")
-    if not os.path.exists(init_path):
-        with open(init_path, "w") as f:
-            f.write("")
-
-    template_path = pjoin(template_dir, f"{target_name}.py")
-
-    # Python test template with placeholder
-    template = f"""#!/usr/bin/env python3
-import sys
-import os
-import unittest
-
-# Add the parent directory to the Python path
-sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
-
-# Import the function to test
-try:
-    from {target_name} import TestOneInput as TestClass
-except ImportError:
-    from {target_name} import TestInput as TestClass
-
-class Test{target_name.capitalize()}(unittest.TestCase):
-    def test_generated(self):
-        \"\"\"Test generated from fuzzing input\"\"\"
-        input_data = b""  
-        result = TestClass(input_data)
-
-if __name__ == '__main__':
-    unittest.main()
-"""
-    with open(template_path, "w") as f:
-        f.write(template)
-
-    return template_path
-
-def transform_repos(repos: list[str], jobs: int):
-    """
-    Generate test templates for all targets
+# def generate_test_template(target_name: str, repo_path: str):
+#     """
+#     Generate Python test template for a single target
+
+#     Args:
+#         target_name (str): Target name
+#         repo_path (str): Repository path
+
+#     Returns:
+#         str: Template file path
+#     """
+#     template_dir = pjoin(repo_path, "tests-gen")
+#     os.makedirs(template_dir, exist_ok=True)
+
+#     # Ensure __init__.py exists
+#     init_path = pjoin(template_dir, "__init__.py")
+#     if not os.path.exists(init_path):
+#         with open(init_path, "w") as f:
+#             f.write("")
+
+#     template_path = pjoin(template_dir, f"{target_name}.py")
+
+#     # Python test template with placeholder
+#     template = f"""#!/usr/bin/env python3
+# import sys
+# import os
+# import unittest
+
+# # Add the parent directory to the Python path
+# sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+# # Import the function to test
+# try:
+#     from {target_name} import TestOneInput as TestClass
+# except ImportError:
+#     from {target_name} import TestInput as TestClass
+
+# class Test{target_name.capitalize()}(unittest.TestCase):
+#     def test_generated(self):
+#         \"\"\"Test generated from fuzzing input\"\"\"
+#         input_data = b""  
+#         result = TestClass(input_data)
+
+# if __name__ == '__main__':
+#     unittest.main()
+# """
+#     with open(template_path, "w") as f:
+#         f.write(template)
+
+#     return template_path
+
+# def transform_repos(repos: list[str], jobs: int):
+#     """
+#     Generate test templates for all targets
     
-    Args:
-        repos (list[str]): List of repository paths
-        jobs (int): Number of parallel tasks
-    """
-    logging.info("Generating test templates")
+#     Args:
+#         repos (list[str]): List of repository paths
+#         jobs (int): Number of parallel tasks
+#     """
+#     logging.info("Generating test templates")
     
-    def _transform_repo(repo: str):
-        project_name = os.path.basename(repo)
-        oss_fuzz_dir = Path(repo).parent.parent
-        targets = discover_targets(project_name, oss_fuzz_dir)
-        return [generate_test_template(t, repo) for t in targets]
+#     def _transform_repo(repo: str):
+#         project_name = os.path.basename(repo)
+#         oss_fuzz_dir = Path(repo).parent.parent
+#         targets = discover_targets(project_name, oss_fuzz_dir)
+#         return [generate_test_template(t, repo) for t in targets]
     
-    with ProcessingPool(jobs) as p:
-        return list(p.map(_transform_repo, repos))
+#     with ProcessingPool(jobs) as p:
+#         return list(p.map(_transform_repo, repos))
 
 def escape_special_chars(input_data: str) -> str:
     """
@@ -515,12 +515,12 @@ def main(
         fuzz_repos(repos, jobs, timeout)
     elif pipeline == "testgen":
         testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh)
-    elif pipeline == "transform":
-        transform_repos(repos, jobs)
+    # elif pipeline == "transform":
+    #     transform_repos(repos, jobs)
     elif pipeline == "all":
         build_image(repos, jobs)
         build_fuzzer(repos, jobs)
-        transform_repos(repos, jobs)  # Generate test templates
+        # transform_repos(repos, jobs)  # Generate test templates
         fuzz_repos(repos, jobs, timeout)
         testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh)
     else:

From d5fd84d70ef2111e141d9bb5da352d450cc0af42 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 12 Aug 2025 06:12:49 +0000
Subject: [PATCH 098/134] clean the inputs and testgen

---
 fuzz/clean_fuzz_dir.py | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 fuzz/clean_fuzz_dir.py

diff --git a/fuzz/clean_fuzz_dir.py b/fuzz/clean_fuzz_dir.py
new file mode 100644
index 0000000..b4e2b0b
--- /dev/null
+++ b/fuzz/clean_fuzz_dir.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+import os
+import shutil
+
+ROOT_DIR = "/home/jiayiguo/FuzzAug/fuzz/oss-fuzz/projects"
+
+def clean_project_dirs(root_dir):
+    removed_files = 0
+    removed_dirs = 0
+
+    # 遍历一级项目目录
+    for project in os.listdir(root_dir):
+        project_path = os.path.join(root_dir, project)
+        if not os.path.isdir(project_path):
+            continue
+
+        # 删除 fuzz_inputs 文件夹
+        fuzz_inputs_path = os.path.join(project_path, "fuzz_inputs")
+        if os.path.isdir(fuzz_inputs_path):
+            shutil.rmtree(fuzz_inputs_path)
+            print(f"🗑️ Removed dir: {fuzz_inputs_path}")
+            removed_dirs += 1
+
+        # 删除 tests-gen 文件夹
+        tests_gen_path = os.path.join(project_path, "tests-gen")
+        if os.path.isdir(tests_gen_path):
+            shutil.rmtree(tests_gen_path)
+            print(f"🗑️ Removed dir: {tests_gen_path}")
+            removed_dirs += 1
+
+        # 删除 .inputs.py 文件
+        for fname in os.listdir(project_path):
+            if fname.endswith(".inputs.py"):
+                file_path = os.path.join(project_path, fname)
+                os.remove(file_path)
+                print(f"🗑️ Removed file: {file_path}")
+                removed_files += 1
+
+    print(f"\n✅ Done. Removed {removed_files} files and {removed_dirs} directories.")
+
+if __name__ == "__main__":
+    clean_project_dirs(ROOT_DIR)

From bcc22b09aaccb2aaaef2a47a5c76cca5583c12b1 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 12 Aug 2025 07:11:56 +0000
Subject: [PATCH 099/134] set max_file

---
 fuzz/collect_fuzz_python.py | 218 +++++++++++++++++++++++++++---------
 1 file changed, 166 insertions(+), 52 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index 8783cfc..fc93297 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -102,6 +102,92 @@ def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]:
         logging.error(f"Error discovering targets: {e}")
     
     return targets
+import threading
+import time
+
+
+
+def monitor_file_size(file_path, process, max_size=MAX_INPUT_FILE_SIZE):
+    """
+    监控 fuzz_input 文件大小，如果超过 max_size 就杀掉进程
+    
+    Args:
+        file_path (str): 监控文件路径
+        process (subprocess.Popen): 关联的进程
+        max_size (int): 最大文件大小 (默认 500MB)
+    """
+    project_name = os.path.basename(os.path.dirname(os.path.dirname(file_path)))
+    target_name = os.path.basename(file_path).split('.')[0]
+    
+    # 关键日志：启动监控
+    logging.info(
+        f"Started file monitor for {project_name}/{target_name} "
+        f"(max size: {max_size//(1024 * 1024)}MB)"
+    )
+    
+    last_size = 0
+    last_log = time.time()
+    
+    while process.poll() is None:  # 进程还在运行
+        try:
+            if not os.path.exists(file_path):
+                # 关键日志：文件丢失警告
+                logging.warning(
+                    f"Output file missing: {file_path}. "
+                    f"Process status: {'running' if process.poll() is None else 'exited'}"
+                )
+                time.sleep(1)
+                continue
+                
+            size = os.path.getsize(file_path)
+            
+            # 记录显著的尺寸变化 (+10%)
+            if size > 0 and abs(size - last_size)/size > 0.1:
+                logging.info(
+                    f"File size changed: {file_path} "
+                    f"{last_size//1024}KB → {size//1024}KB"
+                )
+                last_size = size
+                
+            # 每分钟记录一次当前尺寸
+            if time.time() - last_log > 60:
+                logging.debug(
+                    f"File size update: {file_path} = {size//1024}KB"
+                )
+                last_log = time.time()
+            
+            if size > max_size:
+                # 关键警告：文件超限
+                logging.warning(
+                    f"Terminating {project_name}/{target_name}: "
+                    f"File size {size//(1024 * 1024)}MB > {max_size//(1024 * 1024)}MB"
+                )
+                process.kill()
+                # 记录终止后的最终文件大小
+                final_size = os.path.getsize(file_path)
+                logging.info(
+                    f"After termination: {file_path} = {final_size//1024}KB"
+                )
+                break
+                
+        except Exception as e:
+            logging.error(
+                f"File monitor error for {project_name}/{target_name}: "
+                f"{type(e).__name__} - {str(e)}"
+            )
+            # 防止错误导致高频重试
+            time.sleep(5)
+            
+        time.sleep(1)  # 每秒检查一次
+    
+    # 进程结束时记录
+    exit_code = process.poll()
+    if exit_code is not None:
+        logging.info(
+            f"Process ended: {project_name}/{target_name} "
+            f"Exit code: {exit_code} "
+            f"Output file: {os.path.exists(file_path)}"
+        )
 
 def fuzz_one_target(target: tuple[str, str], timeout: int):
     """
@@ -119,59 +205,91 @@ def fuzz_one_target(target: tuple[str, str], timeout: int):
     oss_fuzz_root = os.path.dirname(os.path.dirname(repo_path))
     
     # Create input file path
-    input_file_path = pjoin(repo_path, "fuzz_inputs", target_name)
+    input_file_path = pjoin(repo_path, "fuzz_inputs", f"{target_name}.txt")
     os.makedirs(os.path.dirname(input_file_path), exist_ok=True)
     
     try:
-        with open(input_file_path, "w") as input_file:
-            return subprocess.Popen(
-                [
-                    "bash",
-                    "-c",
-                    f"timeout {timeout} python3 infra/helper.py run_fuzzer {project_name} {target_name}"
-                ],
-                cwd=oss_fuzz_root,
-                stdout=input_file,
-                stderr=subprocess.DEVNULL,
-            )
+        # 创建空文件确保路径存在
+        open(input_file_path, "w").close()
+        
+        # 关键日志：开始执行前记录所有参数
+        logging.info(
+            f"Starting fuzzer for {project_name}/{target_name}: "
+            f"timeout={timeout}s, output={input_file_path}"
+        )
+        
+        cmd = f"timeout {timeout} python3 infra/helper.py run_fuzzer {project_name} {target_name}"
+        
+        # 关键日志：记录完整命令
+        logging.debug(f"Executing command: {cmd}")
+        
+        # 记录进程开始时间
+        start_time = datetime.now()
+        process = subprocess.Popen(
+            ["bash", "-c", cmd],
+            cwd=oss_fuzz_root,
+            stdout=open(input_file_path, "w"),
+            stderr=subprocess.DEVNULL,
+        )
+        
+        # 关键日志：记录进程ID和启动时间
+        logging.info(
+            f"Started PID {process.pid} at {start_time.strftime('%H:%M:%S')} "
+            f"for {project_name}/{target_name}"
+        )
+        
+        # 启动文件监控线程
+        threading.Thread(
+            target=monitor_file_size, 
+            args=(input_file_path, process),
+            daemon=True
+        ).start()
+        
+        return process
     except Exception as e:
-        logging.error(f"Error starting fuzzer: {e}")
+        # 详细错误日志
+        logging.error(
+            f"Failed to start fuzzer for {project_name}/{target_name}: "
+            f"{type(e).__name__} - {str(e)}"
+        )
+        # 记录堆栈跟踪
+        logging.debug("Exception details:", exc_info=True)
         return None
     
 
-def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60):
-    """
-    Perform fuzzing on a set of repositories
+# def fuzz_one_target(target: tuple[str, str], timeout: int):   
+#     """
+#     Perform fuzzing on a set of repositories
     
-    Args:
-        repos (list[str]): List of repository paths
-        jobs (int): Number of parallel tasks
-        timeout (int): Timeout duration (seconds)
-    """
-    logging.info("Discovering fuzz targets")
+#     Args:
+#         repos (list[str]): List of repository paths
+#         jobs (int): Number of parallel tasks
+#         timeout (int): Timeout duration (seconds)
+#     """
+#     logging.info("Discovering fuzz targets")
     
-    # Get all targets
-    targets_list = []
-    for repo in repos:
-        project_name = os.path.basename(repo)
-        oss_fuzz_dir = Path(repo).parent.parent
-        targets = discover_targets(project_name, oss_fuzz_dir)
-        targets_list.append(targets)
+#     # Get all targets
+#     targets_list = []
+#     for repo in repos:
+#         project_name = os.path.basename(repo)
+#         oss_fuzz_dir = Path(repo).parent.parent
+#         targets = discover_targets(project_name, oss_fuzz_dir)
+#         targets_list.append(targets)
     
-    # Create target mapping
-    target_map = {repo: targets for repo, targets in zip(repos, targets_list)}
-    all_targets: list[tuple[str, str]] = [
-        (k, v) for k, vs in target_map.items() for v in vs
-    ]
+#     # Create target mapping
+#     target_map = {repo: targets for repo, targets in zip(repos, targets_list)}
+#     all_targets: list[tuple[str, str]] = [
+#         (k, v) for k, vs in target_map.items() for v in vs
+#     ]
     
-    logging.info(f"Running fuzzing on {len(all_targets)} targets")
+#     logging.info(f"Running fuzzing on {len(all_targets)} targets")
     
-    # Create input directory
-    for repo in repos:
-        os.makedirs(pjoin(repo, "fuzz_inputs"), exist_ok=True)
+#     # Create input directory
+#     for repo in repos:
+#         os.makedirs(pjoin(repo, "fuzz_inputs"), exist_ok=True)
     
-    # Execute fuzzing in parallel
-    parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None)
+#     # Execute fuzzing in parallel
+#     parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None)
 
 # def generate_test_template(target_name: str, repo_path: str):
 #     """
@@ -332,16 +450,11 @@ def substitute_one_repo(
         # 修改1：过滤警告行，只提取有效的字节字符串
         all_inputs = []
         with open(input_path, "rb") as f_input:  # 二进制模式读取
-            for line in f_input:
-                try:
-                    decoded_line = line.decode('utf-8', errors='replace').strip()
-                    # 只提取以 b' 开头的有效字节字符串
-                    if decoded_line.startswith(('b"', "b'")):
-                        all_inputs.append(decoded_line)
-                except UnicodeDecodeError:
-                    # 处理无法解码的行
-                    logging.warning(f"Skipping invalid input line in {input_path}")
-                    continue
+            raw_inputs = [line for line in f_input]
+        valid_inputs = [
+            data for data in raw_inputs 
+         if len(data) <= max_len
+        ][:n_fuzz]
 
         if not all_inputs:
             logging.warning(f"No valid inputs found for {target_name}")
@@ -478,6 +591,7 @@ def main(
     strategy: str = "shuffle",
     max_len: int = 100,
     sim_thresh: float = 0.8,
+    max_file_size=500 * 1024 * 1024  # 500MB限制
 ):
     """
     Main function, controlling the entire fuzzing process
@@ -512,7 +626,7 @@ def main(
     elif pipeline == "build_fuzzer":
         build_fuzzer(repos, jobs)
     elif pipeline == "fuzz":
-        fuzz_repos(repos, jobs, timeout)
+        fuzz_repos(repos, jobs, timeout,max_file_size)
     elif pipeline == "testgen":
         testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh)
     # elif pipeline == "transform":
@@ -521,7 +635,7 @@ def main(
         build_image(repos, jobs)
         build_fuzzer(repos, jobs)
         # transform_repos(repos, jobs)  # Generate test templates
-        fuzz_repos(repos, jobs, timeout)
+        fuzz_repos(repos, jobs, timeout,max_file_size)
         testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh)
     else:
         logging.error(f"Unknown pipeline: {pipeline}")

From 020c970eebf8625541c642b6205e75d323ea081b Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 12 Aug 2025 07:56:38 +0000
Subject: [PATCH 100/134] max input file

---
 fuzz/collect_fuzz_python.py | 43 ++++++++++++++++++++++++++++++++-----
 1 file changed, 38 insertions(+), 5 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index fc93297..2318082 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -107,7 +107,7 @@ def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]:
 
 
 
-def monitor_file_size(file_path, process, max_size=MAX_INPUT_FILE_SIZE):
+def monitor_file_size(file_path, process, max_size=500 * 1024 * 1024):
     """
     监控 fuzz_input 文件大小，如果超过 max_size 就杀掉进程
     
@@ -255,6 +255,39 @@ def fuzz_one_target(target: tuple[str, str], timeout: int):
         # 记录堆栈跟踪
         logging.debug("Exception details:", exc_info=True)
         return None
+def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60):
+    """
+    Perform fuzzing on a set of repositories
+    
+    Args:
+        repos (list[str]): List of repository paths
+        jobs (int): Number of parallel tasks
+        timeout (int): Timeout duration (seconds)
+    """
+    logging.info("Discovering fuzz targets")
+    
+    # Get all targets
+    targets_list = []
+    for repo in repos:
+        project_name = os.path.basename(repo)
+        oss_fuzz_dir = Path(repo).parent.parent
+        targets = discover_targets(project_name, oss_fuzz_dir)
+        targets_list.append(targets)
+    
+    # Create target mapping
+    target_map = {repo: targets for repo, targets in zip(repos, targets_list)}
+    all_targets: list[tuple[str, str]] = [
+        (k, v) for k, vs in target_map.items() for v in vs
+    ]
+    
+    logging.info(f"Running fuzzing on {len(all_targets)} targets")
+    
+    # Create input directory
+    for repo in repos:
+        os.makedirs(pjoin(repo, "fuzz_inputs"), exist_ok=True)
+    
+    # Execute fuzzing in parallel
+    parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None)
     
 
 # def fuzz_one_target(target: tuple[str, str], timeout: int):   
@@ -442,7 +475,7 @@ def substitute_one_repo(
             logging.warning(f"Source file not found: {source_file}")
             continue
 
-        input_path = pjoin(input_dir, target_name)
+        input_path = pjoin(input_dir, f"{target_name}.txt")
         if not os.path.exists(input_path):
             logging.warning(f"Input file not found: {input_path}")
             continue
@@ -591,7 +624,7 @@ def main(
     strategy: str = "shuffle",
     max_len: int = 100,
     sim_thresh: float = 0.8,
-    max_file_size=500 * 1024 * 1024  # 500MB限制
+   
 ):
     """
     Main function, controlling the entire fuzzing process
@@ -626,7 +659,7 @@ def main(
     elif pipeline == "build_fuzzer":
         build_fuzzer(repos, jobs)
     elif pipeline == "fuzz":
-        fuzz_repos(repos, jobs, timeout,max_file_size)
+        fuzz_repos(repos, jobs, timeout)
     elif pipeline == "testgen":
         testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh)
     # elif pipeline == "transform":
@@ -635,7 +668,7 @@ def main(
         build_image(repos, jobs)
         build_fuzzer(repos, jobs)
         # transform_repos(repos, jobs)  # Generate test templates
-        fuzz_repos(repos, jobs, timeout,max_file_size)
+        fuzz_repos(repos, jobs, timeout)
         testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh)
     else:
         logging.error(f"Unknown pipeline: {pipeline}")

From 8787982e86bd5437c88d3f9a61c82f3872c796df Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 12 Aug 2025 08:59:41 +0000
Subject: [PATCH 101/134] input b""

---
 fuzz/collect_fuzz_python.py | 103 +++++++++++++++---------------------
 1 file changed, 42 insertions(+), 61 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index 2318082..8f5473f 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -453,7 +453,6 @@ def similar(a, b):
     return any(similar(x, y) > thresh for y in selected)
 
 
-
 def substitute_one_repo(
     repo: str,
     targets: list[str],
@@ -475,40 +474,52 @@ def substitute_one_repo(
             logging.warning(f"Source file not found: {source_file}")
             continue
 
+        # 修复：添加 .txt 后缀
         input_path = pjoin(input_dir, f"{target_name}.txt")
         if not os.path.exists(input_path):
             logging.warning(f"Input file not found: {input_path}")
             continue
 
-        # 修改1：过滤警告行，只提取有效的字节字符串
-        all_inputs = []
-        with open(input_path, "rb") as f_input:  # 二进制模式读取
-            raw_inputs = [line for line in f_input]
-        valid_inputs = [
-            data for data in raw_inputs 
-         if len(data) <= max_len
-        ][:n_fuzz]
-
-        if not all_inputs:
+        # 读取所有有效的输入数据
+        valid_inputs = []
+        with open(input_path, "rb") as f_input:
+            for line in f_input:
+                try:
+                    # 尝试解码行以检查内容
+                    decoded = line.decode('utf-8', errors='replace')
+                    
+                    # 只处理以 b' 或 b" 开头的行（这些是实际的测试输入）
+                    if decoded.startswith(("b'", 'b"')):
+                        # 提取字节数据部分
+                        if decoded.startswith("b'") and decoded.endswith("'\n"):
+                            byte_data = line[2:-2]  # 移除 b' 和末尾的 '\n
+                        elif decoded.startswith('b"') and decoded.endswith('"\n'):
+                            byte_data = line[2:-2]  # 移除 b" 和末尾的 "\n
+                        else:
+                            continue
+                            
+                        # 只保留有效长度的输入
+                        if 0 < len(byte_data) <= max_len:
+                            valid_inputs.append(byte_data)
+                except UnicodeDecodeError:
+                    # 如果无法解码，可能是二进制数据，直接使用
+                    if 0 < len(line) <= max_len:
+                        valid_inputs.append(line)
+
+        if not valid_inputs:
             logging.warning(f"No valid inputs found for {target_name}")
             continue
 
-        logging.info(f"Loaded {len(all_inputs)} inputs for {target_name}")
+        logging.info(f"Loaded {len(valid_inputs)} inputs for {target_name}")
 
-        # 策略选择输入
+        # 策略选择输入 - 最多选择 n_fuzz 个输入
         if strategy == "shuffle":
-            random.shuffle(all_inputs)
-            inputs = list(islice((x for x in all_inputs if len(x) < max_len), n_fuzz))
+            random.shuffle(valid_inputs)
+            inputs = valid_inputs[:n_fuzz]
         elif strategy == "reverse":
-            inputs = []
-            for x in reversed(all_inputs):
-                if len(inputs) >= n_fuzz:
-                    break
-                if len(x) > max_len or has_similar(inputs, x, sim_thresh):
-                    continue
-                inputs.append(x)
+            inputs = list(reversed(valid_inputs))[:n_fuzz]
         else:
-            inputs = all_inputs[:n_fuzz]
+            inputs = valid_inputs[:n_fuzz]
 
         # 每个 fuzz input 生成一个单独的文件
         for idx, fuzz_input in enumerate(inputs, start=1):
@@ -522,60 +533,30 @@ def substitute_one_repo(
             # 找到 TestInput / TestOneInput 并改成 test_{idx}
             code = re.sub(r"\bTestOneInput\b", f"test_{idx}", code)
             code = re.sub(r"\bTestInput\b", f"test_{idx}", code)
+            
+            # 插入测试数据 - 确保使用二进制表示
             def insert_fuzz_input(match):
                 indent = match.group(2)
-                # 使用原始字符串避免转义解析
-                return f"{match.group(1)}{indent}data = {fuzz_input}\n{indent}"
-            # 修改2：直接使用原始字节字符串，无需额外转义
-            code = re.sub(
-                rf"(def\s+test_{idx}\s*\([^)]*\)\s*:\s*\n)(\s*)",
-                insert_fuzz_input,  # 使用回调函数
-                code,
-            )
+                # 使用 repr() 安全表示二进制数据
+                byte_repr = repr(fuzz_input)
+                return f"{match.group(1)}{indent}data = {byte_repr}\n{indent}"
             
-            out_path = pjoin(template_dir, f"{target_name}.testgen_{idx}.py")
-            with open(out_path, "w") as f_out:
-                f_out.write(code)
-
-            try:
-                subprocess.run(["black", out_path], check=False)
-            except FileNotFoundError:
-                logging.warning("Black formatter not found, skipping formatting")
-       
-        for idx, fuzz_input in enumerate(inputs, start=1):
-            with open(source_file, "r") as f_src:
-                code = f_src.read()
-
-            # 删除 main 和 __main__ 块（保持不变）
-            code = re.sub(r"\nif __name__ == ['\"]__main__['\"]:.*", "", code, flags=re.S)
-            code = re.sub(r"\n\s*main\s*\(.*?\)", "", code)
-
-            # 重命名测试函数（保持不变）
-            code = re.sub(r"\bTestOneInput\b", f"test_{idx}", code)
-            code = re.sub(r"\bTestInput\b", f"test_{idx}", code)
-
-            # ==== 核心修复：使用 lambda 函数绕过转义解析 ====
-            def insert_fuzz_input(match):
-                indent = match.group(2)  # 提取原缩进
-                return f"{match.group(1)}{indent}data = {fuzz_input}\n{indent}"
-
+            # 在测试函数中插入数据
             code = re.sub(
                 rf"(def\s+test_{idx}\s*\([^)]*\)\s*:\s*\n)(\s*)",
-                insert_fuzz_input,  # 替换为函数引用
+                insert_fuzz_input,
                 code,
             )
             
-            # 写入文件（保持不变）
             out_path = pjoin(template_dir, f"{target_name}.testgen_{idx}.py")
             with open(out_path, "w") as f_out:
                 f_out.write(code)
 
-            # 格式化（保持不变）
+            # 格式化代码
             try:
                 subprocess.run(["black", out_path], check=False)
             except FileNotFoundError:
                 logging.warning("Black formatter not found, skipping formatting")
-
 def testgen_repos(
     repos: list[str],
     jobs: int,

From 606c34f76652bf9e5ec8faf13962445ee68d8d9c Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 12 Aug 2025 23:56:58 +0000
Subject: [PATCH 102/134] modify  the method of writing files  into PIPE

---
 fuzz/collect_fuzz_python.py | 92 ++++++++++++++++++-------------------
 1 file changed, 44 insertions(+), 48 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index 8f5473f..4d4cda4 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -190,71 +190,67 @@ def monitor_file_size(file_path, process, max_size=500 * 1024 * 1024):
         )
 
 def fuzz_one_target(target: tuple[str, str], timeout: int):
-    """
-    Perform fuzzing on a single fuzzing target
-    
-    Args:
-        target (tuple[str, str]): (Repository path, target name)
-        timeout (int): Timeout duration (seconds)
-        
-    Returns:
-        subprocess.Popen: Subprocess object
-    """
     repo_path, target_name = target
     project_name = os.path.basename(repo_path)
     oss_fuzz_root = os.path.dirname(os.path.dirname(repo_path))
-    
-    # Create input file path
+
     input_file_path = pjoin(repo_path, "fuzz_inputs", f"{target_name}.txt")
     os.makedirs(os.path.dirname(input_file_path), exist_ok=True)
-    
+
     try:
-        # 创建空文件确保路径存在
-        open(input_file_path, "w").close()
-        
-        # 关键日志：开始执行前记录所有参数
-        logging.info(
-            f"Starting fuzzer for {project_name}/{target_name}: "
-            f"timeout={timeout}s, output={input_file_path}"
-        )
-        
+        # 清空输出文件
+        open(input_file_path, "wb").close()
+
+        logging.info(f"Starting fuzzer for {project_name}/{target_name}: timeout={timeout}s, output={input_file_path}")
+
         cmd = f"timeout {timeout} python3 infra/helper.py run_fuzzer {project_name} {target_name}"
-        
-        # 关键日志：记录完整命令
         logging.debug(f"Executing command: {cmd}")
-        
-        # 记录进程开始时间
+
         start_time = datetime.now()
         process = subprocess.Popen(
             ["bash", "-c", cmd],
             cwd=oss_fuzz_root,
-            stdout=open(input_file_path, "w"),
-            stderr=subprocess.DEVNULL,
-        )
-        
-        # 关键日志：记录进程ID和启动时间
-        logging.info(
-            f"Started PID {process.pid} at {start_time.strftime('%H:%M:%S')} "
-            f"for {project_name}/{target_name}"
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            bufsize=1
         )
-        
-        # 启动文件监控线程
-        threading.Thread(
-            target=monitor_file_size, 
+
+        logging.info(f"Started PID {process.pid} at {start_time.strftime('%H:%M:%S')} for {project_name}/{target_name}")
+
+        # 启动监控线程
+        monitor_thread = threading.Thread(
+            target=monitor_file_size,
             args=(input_file_path, process),
             daemon=True
-        ).start()
-        
+        )
+        monitor_thread.start()
+
+        # 从 PIPE 读取并写入文件
+        with open(input_file_path, "ab") as output_file:
+            for chunk in iter(lambda: process.stdout.read(4096), b""):
+                output_file.write(chunk)
+
+        process.wait()
+
+        if os.path.getsize(input_file_path) == 0:
+            logging.warning(f"Output file is empty: {input_file_path}")
+            error_path = pjoin(repo_path, "fuzz_inputs", f"{target_name}_error.log")
+            with open(error_path, "wb") as error_file:
+                subprocess.run(
+                    ["bash", "-c", cmd],
+                    cwd=oss_fuzz_root,
+                    stdout=error_file,
+                    stderr=subprocess.STDOUT,
+                )
+                logging.info(f"Error output saved to {error_path}")
+
         return process
+
     except Exception as e:
-        # 详细错误日志
-        logging.error(
-            f"Failed to start fuzzer for {project_name}/{target_name}: "
-            f"{type(e).__name__} - {str(e)}"
-        )
-        # 记录堆栈跟踪
+        logging.error(f"Failed to start fuzzer for {project_name}/{target_name}: {type(e).__name__} - {str(e)}")
         logging.debug("Exception details:", exc_info=True)
         return None
+
 def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60):
     """
     Perform fuzzing on a set of repositories
@@ -598,8 +594,8 @@ def testgen_repos(
 def main(
     repo_id: str = "data/valid_projects3.txt",
     repo_root: str = "fuzz/oss-fuzz/projects/",
-    timeout: int = 10,
-    jobs: int = 4,
+    timeout: int = 30,
+    jobs: int = 8,
     pipeline: str = "all",
     n_fuzz: int = 100,
     strategy: str = "shuffle",

From 503063c7cc33e5a27129ff77fd3ce62ed0606784 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Wed, 13 Aug 2025 00:33:20 +0000
Subject: [PATCH 103/134] use max total time; remove size monitor

---
 fuzz/collect_fuzz_python.py | 183 +++++++++++++++++++-----------------
 1 file changed, 98 insertions(+), 85 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index 4d4cda4..e9ecc22 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -102,92 +102,92 @@ def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]:
         logging.error(f"Error discovering targets: {e}")
     
     return targets
-import threading
-import time
+# import threading
+# import time
 
 
 
-def monitor_file_size(file_path, process, max_size=500 * 1024 * 1024):
-    """
-    监控 fuzz_input 文件大小，如果超过 max_size 就杀掉进程
-    
-    Args:
-        file_path (str): 监控文件路径
-        process (subprocess.Popen): 关联的进程
-        max_size (int): 最大文件大小 (默认 500MB)
-    """
-    project_name = os.path.basename(os.path.dirname(os.path.dirname(file_path)))
-    target_name = os.path.basename(file_path).split('.')[0]
-    
-    # 关键日志：启动监控
-    logging.info(
-        f"Started file monitor for {project_name}/{target_name} "
-        f"(max size: {max_size//(1024 * 1024)}MB)"
-    )
-    
-    last_size = 0
-    last_log = time.time()
+# def monitor_file_size(file_path, process, max_size=500 * 1024 * 1024):
+#     """
+#     监控 fuzz_input 文件大小，如果超过 max_size 就杀掉进程
     
-    while process.poll() is None:  # 进程还在运行
-        try:
-            if not os.path.exists(file_path):
-                # 关键日志：文件丢失警告
-                logging.warning(
-                    f"Output file missing: {file_path}. "
-                    f"Process status: {'running' if process.poll() is None else 'exited'}"
-                )
-                time.sleep(1)
-                continue
+#     Args:
+#         file_path (str): 监控文件路径
+#         process (subprocess.Popen): 关联的进程
+#         max_size (int): 最大文件大小 (默认 500MB)
+#     """
+#     project_name = os.path.basename(os.path.dirname(os.path.dirname(file_path)))
+#     target_name = os.path.basename(file_path).split('.')[0]
+    
+#     # 关键日志：启动监控
+#     logging.info(
+#         f"Started file monitor for {project_name}/{target_name} "
+#         f"(max size: {max_size//(1024 * 1024)}MB)"
+#     )
+    
+#     last_size = 0
+#     last_log = time.time()
+    
+#     while process.poll() is None:  # 进程还在运行
+#         try:
+#             if not os.path.exists(file_path):
+#                 # 关键日志：文件丢失警告
+#                 logging.warning(
+#                     f"Output file missing: {file_path}. "
+#                     f"Process status: {'running' if process.poll() is None else 'exited'}"
+#                 )
+#                 time.sleep(1)
+#                 continue
                 
-            size = os.path.getsize(file_path)
+#             size = os.path.getsize(file_path)
             
-            # 记录显著的尺寸变化 (+10%)
-            if size > 0 and abs(size - last_size)/size > 0.1:
-                logging.info(
-                    f"File size changed: {file_path} "
-                    f"{last_size//1024}KB → {size//1024}KB"
-                )
-                last_size = size
+#             # 记录显著的尺寸变化 (+10%)
+#             if size > 0 and abs(size - last_size)/size > 0.1:
+#                 logging.info(
+#                     f"File size changed: {file_path} "
+#                     f"{last_size//1024}KB → {size//1024}KB"
+#                 )
+#                 last_size = size
                 
-            # 每分钟记录一次当前尺寸
-            if time.time() - last_log > 60:
-                logging.debug(
-                    f"File size update: {file_path} = {size//1024}KB"
-                )
-                last_log = time.time()
+#             # 每分钟记录一次当前尺寸
+#             if time.time() - last_log > 60:
+#                 logging.debug(
+#                     f"File size update: {file_path} = {size//1024}KB"
+#                 )
+#                 last_log = time.time()
             
-            if size > max_size:
-                # 关键警告：文件超限
-                logging.warning(
-                    f"Terminating {project_name}/{target_name}: "
-                    f"File size {size//(1024 * 1024)}MB > {max_size//(1024 * 1024)}MB"
-                )
-                process.kill()
-                # 记录终止后的最终文件大小
-                final_size = os.path.getsize(file_path)
-                logging.info(
-                    f"After termination: {file_path} = {final_size//1024}KB"
-                )
-                break
+#             if size > max_size:
+#                 # 关键警告：文件超限
+#                 logging.warning(
+#                     f"Terminating {project_name}/{target_name}: "
+#                     f"File size {size//(1024 * 1024)}MB > {max_size//(1024 * 1024)}MB"
+#                 )
+#                 process.kill()
+#                 # 记录终止后的最终文件大小
+#                 final_size = os.path.getsize(file_path)
+#                 logging.info(
+#                     f"After termination: {file_path} = {final_size//1024}KB"
+#                 )
+#                 break
                 
-        except Exception as e:
-            logging.error(
-                f"File monitor error for {project_name}/{target_name}: "
-                f"{type(e).__name__} - {str(e)}"
-            )
-            # 防止错误导致高频重试
-            time.sleep(5)
+#         except Exception as e:
+#             logging.error(
+#                 f"File monitor error for {project_name}/{target_name}: "
+#                 f"{type(e).__name__} - {str(e)}"
+#             )
+#             # 防止错误导致高频重试
+#             time.sleep(5)
             
-        time.sleep(1)  # 每秒检查一次
-    
-    # 进程结束时记录
-    exit_code = process.poll()
-    if exit_code is not None:
-        logging.info(
-            f"Process ended: {project_name}/{target_name} "
-            f"Exit code: {exit_code} "
-            f"Output file: {os.path.exists(file_path)}"
-        )
+#         time.sleep(1)  # 每秒检查一次
+    
+#     # 进程结束时记录
+#     exit_code = process.poll()
+#     if exit_code is not None:
+#         logging.info(
+#             f"Process ended: {project_name}/{target_name} "
+#             f"Exit code: {exit_code} "
+#             f"Output file: {os.path.exists(file_path)}"
+#         )
 
 def fuzz_one_target(target: tuple[str, str], timeout: int):
     repo_path, target_name = target
@@ -203,7 +203,7 @@ def fuzz_one_target(target: tuple[str, str], timeout: int):
 
         logging.info(f"Starting fuzzer for {project_name}/{target_name}: timeout={timeout}s, output={input_file_path}")
 
-        cmd = f"timeout {timeout} python3 infra/helper.py run_fuzzer {project_name} {target_name}"
+        cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}"
         logging.debug(f"Executing command: {cmd}")
 
         start_time = datetime.now()
@@ -217,13 +217,14 @@ def fuzz_one_target(target: tuple[str, str], timeout: int):
 
         logging.info(f"Started PID {process.pid} at {start_time.strftime('%H:%M:%S')} for {project_name}/{target_name}")
 
-        # 启动监控线程
-        monitor_thread = threading.Thread(
-            target=monitor_file_size,
-            args=(input_file_path, process),
-            daemon=True
-        )
-        monitor_thread.start()
+        # --- 按要求：去掉文件大小监控线程 ---
+        # monitor_thread = threading.Thread(
+        #     target=monitor_file_size,
+        #     args=(input_file_path, process),
+        #     daemon=True
+        # )
+        # monitor_thread.start()
+        # --- 结束 ---
 
         # 从 PIPE 读取并写入文件
         with open(input_file_path, "ab") as output_file:
@@ -251,6 +252,7 @@ def fuzz_one_target(target: tuple[str, str], timeout: int):
         logging.debug("Exception details:", exc_info=True)
         return None
 
+
 def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60):
     """
     Perform fuzzing on a set of repositories
@@ -286,6 +288,17 @@ def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60):
     parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None)
     
 
+# def fuzz_one_target(target: tuple[str, str], timeout: int):   
+#     """
+#     Perform fuzzing on a set of repositories
+#     """
+#     pass
+
+# def generate_test_template(target_name: str, repo_path: str):
+#     ...
+# def transform_repos(repos: list[str], jobs: int):
+#     ...
+
 # def fuzz_one_target(target: tuple[str, str], timeout: int):   
 #     """
 #     Perform fuzzing on a set of repositories
@@ -595,7 +608,7 @@ def main(
     repo_id: str = "data/valid_projects3.txt",
     repo_root: str = "fuzz/oss-fuzz/projects/",
     timeout: int = 30,
-    jobs: int = 8,
+    jobs: int = 2,
     pipeline: str = "all",
     n_fuzz: int = 100,
     strategy: str = "shuffle",

From 708c8a9e4109e57d2a91c7c155a02c1ba669a5e2 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Thu, 14 Aug 2025 01:38:50 +0000
Subject: [PATCH 104/134] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E5=B9=B6=E8=A1=8C?=
 =?UTF-8?q?=E9=94=99=E8=AF=AF,=20=E5=86=99=E5=85=A5=E6=96=B9=E6=B3=95?=
 =?UTF-8?q?=E8=BF=98=E6=98=AF=E7=9B=B4=E6=8E=A5=E5=86=99=E5=85=A5=E6=96=87?=
 =?UTF-8?q?=E4=BB=B6=20=E5=BB=B6=E6=97=B6=E6=8E=A7=E5=88=B6=E4=B8=BAmax=20?=
 =?UTF-8?q?total=20time?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 fuzz/collect_fuzz_python.py | 254 +++++++++++++++---------------------
 1 file changed, 104 insertions(+), 150 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index e9ecc22..3282bca 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -102,158 +102,42 @@ def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]:
         logging.error(f"Error discovering targets: {e}")
     
     return targets
-# import threading
-# import time
-
-
-
-# def monitor_file_size(file_path, process, max_size=500 * 1024 * 1024):
-#     """
-#     监控 fuzz_input 文件大小，如果超过 max_size 就杀掉进程
-    
-#     Args:
-#         file_path (str): 监控文件路径
-#         process (subprocess.Popen): 关联的进程
-#         max_size (int): 最大文件大小 (默认 500MB)
-#     """
-#     project_name = os.path.basename(os.path.dirname(os.path.dirname(file_path)))
-#     target_name = os.path.basename(file_path).split('.')[0]
-    
-#     # 关键日志：启动监控
-#     logging.info(
-#         f"Started file monitor for {project_name}/{target_name} "
-#         f"(max size: {max_size//(1024 * 1024)}MB)"
-#     )
-    
-#     last_size = 0
-#     last_log = time.time()
-    
-#     while process.poll() is None:  # 进程还在运行
-#         try:
-#             if not os.path.exists(file_path):
-#                 # 关键日志：文件丢失警告
-#                 logging.warning(
-#                     f"Output file missing: {file_path}. "
-#                     f"Process status: {'running' if process.poll() is None else 'exited'}"
-#                 )
-#                 time.sleep(1)
-#                 continue
-                
-#             size = os.path.getsize(file_path)
-            
-#             # 记录显著的尺寸变化 (+10%)
-#             if size > 0 and abs(size - last_size)/size > 0.1:
-#                 logging.info(
-#                     f"File size changed: {file_path} "
-#                     f"{last_size//1024}KB → {size//1024}KB"
-#                 )
-#                 last_size = size
-                
-#             # 每分钟记录一次当前尺寸
-#             if time.time() - last_log > 60:
-#                 logging.debug(
-#                     f"File size update: {file_path} = {size//1024}KB"
-#                 )
-#                 last_log = time.time()
-            
-#             if size > max_size:
-#                 # 关键警告：文件超限
-#                 logging.warning(
-#                     f"Terminating {project_name}/{target_name}: "
-#                     f"File size {size//(1024 * 1024)}MB > {max_size//(1024 * 1024)}MB"
-#                 )
-#                 process.kill()
-#                 # 记录终止后的最终文件大小
-#                 final_size = os.path.getsize(file_path)
-#                 logging.info(
-#                     f"After termination: {file_path} = {final_size//1024}KB"
-#                 )
-#                 break
-                
-#         except Exception as e:
-#             logging.error(
-#                 f"File monitor error for {project_name}/{target_name}: "
-#                 f"{type(e).__name__} - {str(e)}"
-#             )
-#             # 防止错误导致高频重试
-#             time.sleep(5)
-            
-#         time.sleep(1)  # 每秒检查一次
-    
-#     # 进程结束时记录
-#     exit_code = process.poll()
-#     if exit_code is not None:
-#         logging.info(
-#             f"Process ended: {project_name}/{target_name} "
-#             f"Exit code: {exit_code} "
-#             f"Output file: {os.path.exists(file_path)}"
-#         )
-
 def fuzz_one_target(target: tuple[str, str], timeout: int):
+    """
+    Perform fuzzing on a single fuzzing target
+    
+    Args:
+        target (tuple[str, str]): (Repository path, target name)
+        timeout (int): Timeout duration (seconds)
+        
+    Returns:
+        subprocess.Popen: Subprocess object
+    """
     repo_path, target_name = target
     project_name = os.path.basename(repo_path)
     oss_fuzz_root = os.path.dirname(os.path.dirname(repo_path))
-
-    input_file_path = pjoin(repo_path, "fuzz_inputs", f"{target_name}.txt")
+    
+    # Create input file path
+    input_file_path = pjoin(repo_path, "fuzz_inputs", target_name)
     os.makedirs(os.path.dirname(input_file_path), exist_ok=True)
-
+    
     try:
-        # 清空输出文件
-        open(input_file_path, "wb").close()
-
-        logging.info(f"Starting fuzzer for {project_name}/{target_name}: timeout={timeout}s, output={input_file_path}")
-
-        cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}"
-        logging.debug(f"Executing command: {cmd}")
-
-        start_time = datetime.now()
-        process = subprocess.Popen(
-            ["bash", "-c", cmd],
-            cwd=oss_fuzz_root,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.STDOUT,
-            bufsize=1
-        )
-
-        logging.info(f"Started PID {process.pid} at {start_time.strftime('%H:%M:%S')} for {project_name}/{target_name}")
-
-        # --- 按要求：去掉文件大小监控线程 ---
-        # monitor_thread = threading.Thread(
-        #     target=monitor_file_size,
-        #     args=(input_file_path, process),
-        #     daemon=True
-        # )
-        # monitor_thread.start()
-        # --- 结束 ---
-
-        # 从 PIPE 读取并写入文件
-        with open(input_file_path, "ab") as output_file:
-            for chunk in iter(lambda: process.stdout.read(4096), b""):
-                output_file.write(chunk)
-
-        process.wait()
-
-        if os.path.getsize(input_file_path) == 0:
-            logging.warning(f"Output file is empty: {input_file_path}")
-            error_path = pjoin(repo_path, "fuzz_inputs", f"{target_name}_error.log")
-            with open(error_path, "wb") as error_file:
-                subprocess.run(
-                    ["bash", "-c", cmd],
-                    cwd=oss_fuzz_root,
-                    stdout=error_file,
-                    stderr=subprocess.STDOUT,
-                )
-                logging.info(f"Error output saved to {error_path}")
-
-        return process
-
+        with open(input_file_path, "w") as input_file:
+            return subprocess.Popen(
+                [
+                    "bash",
+                    "-c",
+                    f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}"
+                ],
+                cwd=oss_fuzz_root,
+                stdout=input_file,
+                stderr=subprocess.DEVNULL,
+            )
     except Exception as e:
-        logging.error(f"Failed to start fuzzer for {project_name}/{target_name}: {type(e).__name__} - {str(e)}")
-        logging.debug("Exception details:", exc_info=True)
+        logging.error(f"Error starting fuzzer: {e}")
         return None
 
-
-def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60):
+def fuzz_repos(repos: list[str], jobs: int, timeout: int ):
     """
     Perform fuzzing on a set of repositories
     
@@ -286,18 +170,88 @@ def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60):
     
     # Execute fuzzing in parallel
     parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None)
-    
 
-# def fuzz_one_target(target: tuple[str, str], timeout: int):   
+
+# def fuzz_one_target(target: tuple[str, str], timeout: int):
+#     repo_path, target_name = target
+#     project_name = os.path.basename(repo_path)
+#     oss_fuzz_root = os.path.dirname(os.path.dirname(repo_path))
+
+#     input_file_path = pjoin(repo_path, "fuzz_inputs", f"{target_name}.txt")
+#     os.makedirs(os.path.dirname(input_file_path), exist_ok=True)
+
+#     try:
+#         # 清空输出文件
+#         open(input_file_path, "wb").close()
+
+#         logging.info(f"Starting fuzzer for {project_name}/{target_name}: timeout={timeout}s, output={input_file_path}")
+
+#         cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}"
+#         logging.debug(f"Executing command: {cmd}")
+
+#         start_time = datetime.now()
+#         process = subprocess.Popen(
+#             ["bash", "-c", cmd],
+#             cwd=oss_fuzz_root,
+#             stdout=subprocess.PIPE,
+#             stderr=subprocess.DEVNULL,
+#             # bufsize=1
+#         )
+
+#         logging.info(f"Started PID {process.pid} at {start_time.strftime('%H:%M:%S')} for {project_name}/{target_name}")
+
+        
+
+#         # 从 PIPE 读取并写入文件
+#         with open(input_file_path, "ab") as output_file:
+#             for chunk in iter(lambda: process.stdout.read(4096), b""):
+#                 output_file.write(chunk)
+
+#         process.wait()
+
+        
+
+#     except Exception as e:
+#         logging.error(f"Failed to start fuzzer for {project_name}/{target_name}: {type(e).__name__} - {str(e)}")
+#         logging.debug("Exception details:", exc_info=True)
+#         return None
+
+
+# def fuzz_repos(repos: list[str], jobs: int, timeout: int = 30):
 #     """
 #     Perform fuzzing on a set of repositories
+    
+#     Args:
+#         repos (list[str]): List of repository paths
+#         jobs (int): Number of parallel tasks
+#         timeout (int): Timeout duration (seconds)
 #     """
-#     pass
+#     logging.info("Discovering fuzz targets")
+    
+#     # Get all targets
+#     targets_list = []
+#     for repo in repos:
+#         project_name = os.path.basename(repo)
+#         oss_fuzz_dir = Path(repo).parent.parent
+#         targets = discover_targets(project_name, oss_fuzz_dir)
+#         targets_list.append(targets)
+    
+#     # Create target mapping
+#     target_map = {repo: targets for repo, targets in zip(repos, targets_list)}
+#     all_targets: list[tuple[str, str]] = [
+#         (k, v) for k, vs in target_map.items() for v in vs
+#     ]
+    
+#     logging.info(f"Running fuzzing on {len(all_targets)} targets")
+    
+#     # Create input directory
+#     for repo in repos:
+#         os.makedirs(pjoin(repo, "fuzz_inputs"), exist_ok=True)
+    
+#     # Execute fuzzing in parallel
+#     parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None)
+    
 
-# def generate_test_template(target_name: str, repo_path: str):
-#     ...
-# def transform_repos(repos: list[str], jobs: int):
-#     ...
 
 # def fuzz_one_target(target: tuple[str, str], timeout: int):   
 #     """

From f417e19a3ee9c598f119c94979825f71c2ae8400 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Thu, 14 Aug 2025 01:40:56 +0000
Subject: [PATCH 105/134] =?UTF-8?q?=E8=A1=A5=E5=85=85=E6=97=A5=E5=BF=97?=
 =?UTF-8?q?=E8=BE=93=E5=87=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 fuzz/collect_fuzz_python.py | 54 ++++++++++++++-----------------------
 1 file changed, 20 insertions(+), 34 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index 3282bca..74733d6 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -77,17 +77,12 @@ def _build_cmd(path: str):
 def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]:
     """
     Discover fuzzing targets
-    
-    Args:
-        project_name (str): Project name
-        oss_fuzz_dir (Path): OSS-Fuzz root directory
-        
-    Returns:
-        list[str]: List of target names
     """
     out_dir = oss_fuzz_dir / "build" / "out" / project_name
     targets: list[str] = []
     
+    logging.debug(f"Searching fuzz targets in: {out_dir}")
+    
     if not out_dir.is_dir():
         logging.warning(f"Build output directory for {project_name} does not exist")
         return targets
@@ -98,29 +93,27 @@ def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]:
                 '.' not in f.name and f.name.endswith("print1") and 
                 os.access(f, os.X_OK)):
                 targets.append(f.name)
+        logging.info(f"Discovered {len(targets)} fuzz targets in {project_name}: {targets}")
     except Exception as e:
-        logging.error(f"Error discovering targets: {e}")
+        logging.error(f"Error discovering targets for {project_name}: {e}", exc_info=True)
     
     return targets
+
+
 def fuzz_one_target(target: tuple[str, str], timeout: int):
     """
     Perform fuzzing on a single fuzzing target
-    
-    Args:
-        target (tuple[str, str]): (Repository path, target name)
-        timeout (int): Timeout duration (seconds)
-        
-    Returns:
-        subprocess.Popen: Subprocess object
     """
     repo_path, target_name = target
     project_name = os.path.basename(repo_path)
     oss_fuzz_root = os.path.dirname(os.path.dirname(repo_path))
     
-    # Create input file path
     input_file_path = pjoin(repo_path, "fuzz_inputs", target_name)
     os.makedirs(os.path.dirname(input_file_path), exist_ok=True)
-    
+
+    logging.info(f"Starting fuzzing: project={project_name}, target={target_name}, timeout={timeout}s")
+    logging.debug(f"Fuzz output will be saved to: {input_file_path}")
+
     try:
         with open(input_file_path, "w") as input_file:
             return subprocess.Popen(
@@ -134,21 +127,16 @@ def fuzz_one_target(target: tuple[str, str], timeout: int):
                 stderr=subprocess.DEVNULL,
             )
     except Exception as e:
-        logging.error(f"Error starting fuzzer: {e}")
+        logging.error(f"Error starting fuzzer for {project_name}/{target_name}: {e}", exc_info=True)
         return None
 
-def fuzz_repos(repos: list[str], jobs: int, timeout: int ):
+
+def fuzz_repos(repos: list[str], jobs: int, timeout: int):
     """
     Perform fuzzing on a set of repositories
-    
-    Args:
-        repos (list[str]): List of repository paths
-        jobs (int): Number of parallel tasks
-        timeout (int): Timeout duration (seconds)
     """
-    logging.info("Discovering fuzz targets")
-    
-    # Get all targets
+    logging.info(f"Discovering fuzz targets for {len(repos)} repositories...")
+
     targets_list = []
     for repo in repos:
         project_name = os.path.basename(repo)
@@ -156,22 +144,20 @@ def fuzz_repos(repos: list[str], jobs: int, timeout: int ):
         targets = discover_targets(project_name, oss_fuzz_dir)
         targets_list.append(targets)
     
-    # Create target mapping
     target_map = {repo: targets for repo, targets in zip(repos, targets_list)}
     all_targets: list[tuple[str, str]] = [
         (k, v) for k, vs in target_map.items() for v in vs
     ]
     
-    logging.info(f"Running fuzzing on {len(all_targets)} targets")
-    
-    # Create input directory
+    logging.info(f"Total fuzz targets discovered: {len(all_targets)}")
+    for repo, targets in target_map.items():
+        logging.info(f"{os.path.basename(repo)}: {len(targets)} targets")
+
     for repo in repos:
         os.makedirs(pjoin(repo, "fuzz_inputs"), exist_ok=True)
     
-    # Execute fuzzing in parallel
+    logging.info(f"Starting parallel fuzzing with {jobs} jobs, timeout={timeout}s per target")
     parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None)
-
-
 # def fuzz_one_target(target: tuple[str, str], timeout: int):
 #     repo_path, target_name = target
 #     project_name = os.path.basename(repo_path)

From 2a27db9f6cc7fb6a5391158ad026bc5a55e08e6a Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Thu, 14 Aug 2025 08:23:55 +0000
Subject: [PATCH 106/134] =?UTF-8?q?=E6=A8=A1=E6=9D=BF=E7=94=9F=E6=88=90?=
 =?UTF-8?q?=E6=88=90=E5=8A=9F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 fuzz/collect_fuzz_python.py | 258 ++++++++++++------------------------
 1 file changed, 83 insertions(+), 175 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index 74733d6..b3896e1 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -158,190 +158,98 @@ def fuzz_repos(repos: list[str], jobs: int, timeout: int):
     
     logging.info(f"Starting parallel fuzzing with {jobs} jobs, timeout={timeout}s per target")
     parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None)
-# def fuzz_one_target(target: tuple[str, str], timeout: int):
-#     repo_path, target_name = target
-#     project_name = os.path.basename(repo_path)
-#     oss_fuzz_root = os.path.dirname(os.path.dirname(repo_path))
-
-#     input_file_path = pjoin(repo_path, "fuzz_inputs", f"{target_name}.txt")
-#     os.makedirs(os.path.dirname(input_file_path), exist_ok=True)
-
-#     try:
-#         # 清空输出文件
-#         open(input_file_path, "wb").close()
-
-#         logging.info(f"Starting fuzzer for {project_name}/{target_name}: timeout={timeout}s, output={input_file_path}")
-
-#         cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}"
-#         logging.debug(f"Executing command: {cmd}")
-
-#         start_time = datetime.now()
-#         process = subprocess.Popen(
-#             ["bash", "-c", cmd],
-#             cwd=oss_fuzz_root,
-#             stdout=subprocess.PIPE,
-#             stderr=subprocess.DEVNULL,
-#             # bufsize=1
-#         )
+import os
+import re
+import logging
+from os.path import join as pjoin
 
-#         logging.info(f"Started PID {process.pid} at {start_time.strftime('%H:%M:%S')} for {project_name}/{target_name}")
+def generate_test_template(target_name: str, repo_path: str):
+    """
+    Generate Python test template for a single target by stripping license header,
+    main() block, and print(data) inside TestInput/TestOneInput.
+    """
+    src_file = pjoin(repo_path, target_name + ".py")
+    if not os.path.exists(src_file):
+        logging.error(f"Source target file not found: {src_file}")
+        return None
 
-        
+    with open(src_file, "r", encoding="utf-8") as f:
+        original_code = f.read()
 
-#         # 从 PIPE 读取并写入文件
-#         with open(input_file_path, "ab") as output_file:
-#             for chunk in iter(lambda: process.stdout.read(4096), b""):
-#                 output_file.write(chunk)
+    # --- 1. 保留 shebang，但删除许可证注释 ---
+    shebang = ""
+    if original_code.startswith("#!"):
+        shebang, original_code = original_code.split("\n", 1)
+        shebang += "\n"
 
-#         process.wait()
+    # 匹配从文件开头到包含 "limitations under the license" 那一行的注释块
+    license_pattern = re.compile(
+        r"^(?:\s*#.*\n)*\s*#.*limitations\s+under\s+the\s+license.*\n",
+        re.IGNORECASE | re.MULTILINE
+    )
+    code_no_license = re.sub(license_pattern, "", original_code, count=1)
+
+    # --- 2. 删除 main 函数和 if __name__ == '__main__' ---
+    code_no_main = re.sub(
+        r"\n?def\s+main\([\s\S]*?(?=^if\s+__name__\s*==\s*['\"]__main__['\"]:)",
+        "",
+        code_no_license,
+        flags=re.MULTILINE
+    )
+    code_no_main = re.sub(
+        r"\n?if\s+__name__\s*==\s*['\"]__main__['\"]:\s*main\(\s*\)\s*",
+        "",
+        code_no_main,
+        flags=re.MULTILINE
+    )
 
-        
+    # --- 3. 删除 TestInput/TestOneInput 内的 print(data) ---
+    def remove_print_in_func(match):
+        func_body = match.group(0)
+        func_body = re.sub(r"^\s*print\(data\)\s*$", "", func_body, flags=re.MULTILINE)
+        return func_body
+
+    cleaned_code = re.sub(
+        r"(def\s+Test(?:Input|OneInput)\s*\([\s\S]*?)(?=\n\w|\Z)",
+        lambda m: remove_print_in_func(m),
+        code_no_main,
+        flags=re.MULTILINE
+    )
 
-#     except Exception as e:
-#         logging.error(f"Failed to start fuzzer for {project_name}/{target_name}: {type(e).__name__} - {str(e)}")
-#         logging.debug("Exception details:", exc_info=True)
-#         return None
+    # --- 4. 输出到 tests-gen ---
+    template_dir = pjoin(repo_path, "tests-gen")
+    os.makedirs(template_dir, exist_ok=True)
 
+    init_path = pjoin(template_dir, "__init__.py")
+    if not os.path.exists(init_path):
+        with open(init_path, "w", encoding="utf-8") as f:
+            f.write("")
 
-# def fuzz_repos(repos: list[str], jobs: int, timeout: int = 30):
-#     """
-#     Perform fuzzing on a set of repositories
-    
-#     Args:
-#         repos (list[str]): List of repository paths
-#         jobs (int): Number of parallel tasks
-#         timeout (int): Timeout duration (seconds)
-#     """
-#     logging.info("Discovering fuzz targets")
-    
-#     # Get all targets
-#     targets_list = []
-#     for repo in repos:
-#         project_name = os.path.basename(repo)
-#         oss_fuzz_dir = Path(repo).parent.parent
-#         targets = discover_targets(project_name, oss_fuzz_dir)
-#         targets_list.append(targets)
-    
-#     # Create target mapping
-#     target_map = {repo: targets for repo, targets in zip(repos, targets_list)}
-#     all_targets: list[tuple[str, str]] = [
-#         (k, v) for k, vs in target_map.items() for v in vs
-#     ]
-    
-#     logging.info(f"Running fuzzing on {len(all_targets)} targets")
-    
-#     # Create input directory
-#     for repo in repos:
-#         os.makedirs(pjoin(repo, "fuzz_inputs"), exist_ok=True)
-    
-#     # Execute fuzzing in parallel
-#     parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None)
-    
+    template_path = pjoin(template_dir, f"{target_name}.py")
+    with open(template_path, "w", encoding="utf-8") as f:
+        f.write(shebang + cleaned_code.strip() + "\n")
 
+    logging.info(f"Generated cleaned template: {template_path}")
+    return template_path
 
-# def fuzz_one_target(target: tuple[str, str], timeout: int):   
-#     """
-#     Perform fuzzing on a set of repositories
-    
-#     Args:
-#         repos (list[str]): List of repository paths
-#         jobs (int): Number of parallel tasks
-#         timeout (int): Timeout duration (seconds)
-#     """
-#     logging.info("Discovering fuzz targets")
-    
-#     # Get all targets
-#     targets_list = []
-#     for repo in repos:
-#         project_name = os.path.basename(repo)
-#         oss_fuzz_dir = Path(repo).parent.parent
-#         targets = discover_targets(project_name, oss_fuzz_dir)
-#         targets_list.append(targets)
-    
-#     # Create target mapping
-#     target_map = {repo: targets for repo, targets in zip(repos, targets_list)}
-#     all_targets: list[tuple[str, str]] = [
-#         (k, v) for k, vs in target_map.items() for v in vs
-#     ]
-    
-#     logging.info(f"Running fuzzing on {len(all_targets)} targets")
-    
-#     # Create input directory
-#     for repo in repos:
-#         os.makedirs(pjoin(repo, "fuzz_inputs"), exist_ok=True)
-    
-#     # Execute fuzzing in parallel
-#     parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None)
-
-# def generate_test_template(target_name: str, repo_path: str):
-#     """
-#     Generate Python test template for a single target
-
-#     Args:
-#         target_name (str): Target name
-#         repo_path (str): Repository path
-
-#     Returns:
-#         str: Template file path
-#     """
-#     template_dir = pjoin(repo_path, "tests-gen")
-#     os.makedirs(template_dir, exist_ok=True)
-
-#     # Ensure __init__.py exists
-#     init_path = pjoin(template_dir, "__init__.py")
-#     if not os.path.exists(init_path):
-#         with open(init_path, "w") as f:
-#             f.write("")
-
-#     template_path = pjoin(template_dir, f"{target_name}.py")
-
-#     # Python test template with placeholder
-#     template = f"""#!/usr/bin/env python3
-# import sys
-# import os
-# import unittest
-
-# # Add the parent directory to the Python path
-# sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
-
-# # Import the function to test
-# try:
-#     from {target_name} import TestOneInput as TestClass
-# except ImportError:
-#     from {target_name} import TestInput as TestClass
-
-# class Test{target_name.capitalize()}(unittest.TestCase):
-#     def test_generated(self):
-#         \"\"\"Test generated from fuzzing input\"\"\"
-#         input_data = b""  
-#         result = TestClass(input_data)
-
-# if __name__ == '__main__':
-#     unittest.main()
-# """
-#     with open(template_path, "w") as f:
-#         f.write(template)
-
-#     return template_path
-
-# def transform_repos(repos: list[str], jobs: int):
-#     """
-#     Generate test templates for all targets
+def transform_repos(repos: list[str], jobs: int):
+    """
+    Generate test templates for all targets
     
-#     Args:
-#         repos (list[str]): List of repository paths
-#         jobs (int): Number of parallel tasks
-#     """
-#     logging.info("Generating test templates")
+    Args:
+        repos (list[str]): List of repository paths
+        jobs (int): Number of parallel tasks
+    """
+    logging.info("Generating test templates")
     
-#     def _transform_repo(repo: str):
-#         project_name = os.path.basename(repo)
-#         oss_fuzz_dir = Path(repo).parent.parent
-#         targets = discover_targets(project_name, oss_fuzz_dir)
-#         return [generate_test_template(t, repo) for t in targets]
+    def _transform_repo(repo: str):
+        project_name = os.path.basename(repo)
+        oss_fuzz_dir = Path(repo).parent.parent
+        targets = discover_targets(project_name, oss_fuzz_dir)
+        return [generate_test_template(t, repo) for t in targets]
     
-#     with ProcessingPool(jobs) as p:
-#         return list(p.map(_transform_repo, repos))
+    with ProcessingPool(jobs) as p:
+        return list(p.map(_transform_repo, repos))
 
 def escape_special_chars(input_data: str) -> str:
     """
@@ -424,7 +332,7 @@ def substitute_one_repo(
             continue
 
         # 修复：添加 .txt 后缀
-        input_path = pjoin(input_dir, f"{target_name}.txt")
+        input_path = pjoin(input_dir, f"{target_name}")
         if not os.path.exists(input_path):
             logging.warning(f"Input file not found: {input_path}")
             continue
@@ -592,12 +500,12 @@ def main(
         fuzz_repos(repos, jobs, timeout)
     elif pipeline == "testgen":
         testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh)
-    # elif pipeline == "transform":
-    #     transform_repos(repos, jobs)
+    elif pipeline == "transform":
+        transform_repos(repos, jobs)
     elif pipeline == "all":
         build_image(repos, jobs)
         build_fuzzer(repos, jobs)
-        # transform_repos(repos, jobs)  # Generate test templates
+        transform_repos(repos, jobs)  # Generate test templates
         fuzz_repos(repos, jobs, timeout)
         testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh)
     else:

From f58370272d3ef2dccfc00ccf19242e206011d730 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Thu, 14 Aug 2025 08:35:57 +0000
Subject: [PATCH 107/134] =?UTF-8?q?testgen=E5=AE=8C=E6=88=90?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 fuzz/collect_fuzz_python.py | 83 ++++++++++++++++++-------------------
 1 file changed, 40 insertions(+), 43 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index b3896e1..3529f19 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -265,49 +265,49 @@ def escape_special_chars(input_data: str) -> str:
     # This will handle all special characters and non-ASCII bytes
     return repr(input_data.encode('latin-1', 'replace'))
 
-def substitute_input(template: str, input_data: str, idx: int, target_name: str) -> str:
-    """
-    Replace fuzzing input into Python test template
+# def substitute_input(template: str, input_data: str, idx: int, target_name: str) -> str:
+#     """
+#     Replace fuzzing input into Python test template
     
-    Args:
-        template (str): Template content
-        input_data (str): Input data
-        idx (int): Test index
-        target_name (str): Target name
+#     Args:
+#         template (str): Template content
+#         input_data (str): Input data
+#         idx (int): Test index
+#         target_name (str): Target name
         
-    Returns:
-        str: Test code after substitution
-    """
-    # Escape special characters for Python
-    escaped_input = escape_special_chars(input_data)
+#     Returns:
+#         str: Test code after substitution
+#     """
+#     # Escape special characters for Python
+#     escaped_input = escape_special_chars(input_data)
     
-    # Replace input placeholder
-    new_template = template.replace(
-        'input_data = b""',
-        f'input_data = {escaped_input}'
-    )
+#     # Replace input placeholder
+#     new_template = template.replace(
+#         'input_data = b""',
+#         f'input_data = {escaped_input}'
+#     )
     
-    # Replace test method name to avoid duplication
-    return new_template.replace(
-        f"def test_generated(self):",
-        f"def test_{idx}(self):"
-    )
-
-def has_similar(selected: list[str], x: str, thresh: float = 0.8) -> bool:
-    """
-    Check if a string is sufficiently similar to any string in the selected list
+#     # Replace test method name to avoid duplication
+#     return new_template.replace(
+#         f"def test_generated(self):",
+#         f"def test_{idx}(self):"
+#     )
+
+# def has_similar(selected: list[str], x: str, thresh: float = 0.8) -> bool:
+#     """
+#     Check if a string is sufficiently similar to any string in the selected list
     
-    Args:
-        selected (list[str]): List of selected strings
-        x (str): String to check
-        thresh (float): Similarity threshold
+#     Args:
+#         selected (list[str]): List of selected strings
+#         x (str): String to check
+#         thresh (float): Similarity threshold
         
-    Returns:
-        bool: Whether they are similar
-    """
-    def similar(a, b):
-        return SequenceMatcher(None, a, b).ratio()
-    return any(similar(x, y) > thresh for y in selected)
+#     Returns:
+#         bool: Whether they are similar
+#     """
+#     def similar(a, b):
+#         return SequenceMatcher(None, a, b).ratio()
+#     return any(similar(x, y) > thresh for y in selected)
 
 
 def substitute_one_repo(
@@ -326,12 +326,12 @@ def substitute_one_repo(
     os.makedirs(template_dir, exist_ok=True)
 
     for target_name in targets:
-        source_file = pjoin(repo, f"{target_name}.py")
+        source_file = pjoin(template_dir, f"{target_name}.py")
         if not os.path.exists(source_file):
             logging.warning(f"Source file not found: {source_file}")
             continue
 
-        # 修复：添加 .txt 后缀
+        
         input_path = pjoin(input_dir, f"{target_name}")
         if not os.path.exists(input_path):
             logging.warning(f"Input file not found: {input_path}")
@@ -383,10 +383,7 @@ def substitute_one_repo(
             with open(source_file, "r") as f_src:
                 code = f_src.read()
 
-            # 删除 main 和 __main__ 块
-            code = re.sub(r"\nif __name__ == ['\"]__main__['\"]:.*", "", code, flags=re.S)
-            code = re.sub(r"\n\s*main\s*\(.*?\)", "", code)
-
+            
             # 找到 TestInput / TestOneInput 并改成 test_{idx}
             code = re.sub(r"\bTestOneInput\b", f"test_{idx}", code)
             code = re.sub(r"\bTestInput\b", f"test_{idx}", code)

From 68c656a4676cb9068590e89a8b3dcd1c62eeae2a Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Thu, 14 Aug 2025 18:20:33 +0000
Subject: [PATCH 108/134] =?UTF-8?q?=E5=88=A0=E9=99=A4=E5=86=97=E4=BD=99,?=
 =?UTF-8?q?=20=E4=BF=AE=E6=94=B9=E4=BB=A3=E7=A0=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 fuzz/collect_fuzz_python.py | 518 ++++++++++++++++++------------------
 1 file changed, 266 insertions(+), 252 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index 3529f19..a42d0ba 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -2,9 +2,11 @@
 Script for Python project fuzzing and test template conversion
 usage: PYTHONPATH=. python3 fuzz/collect_fuzz_python.py --pipeline all
 """
+from __future__ import annotations
+
 from pathlib import Path
 import logging
-from typing import Optional, List, Tuple
+from typing import Optional
 import fire
 import os
 from UniTSyn.frontend.util import wrap_repo, parallel_subprocess
@@ -13,157 +15,178 @@
 from tqdm import tqdm
 from pathos.multiprocessing import ProcessingPool
 import random
-from difflib import SequenceMatcher
 from itertools import islice
 from datetime import datetime
 import re
+from functools import partial
 
-def build_image(repos: list[str], jobs: int):
-    """
-    Build Docker images for OSS-Fuzz projects corresponding to each repository
-    
-    Args:
-        repos (list[str]): List of repository paths
-        jobs (int): Number of parallel tasks
-    """
-    logging.info(f"Building Docker images for {len(repos)} OSS-Fuzz projects")
-    log_dir = os.path.abspath("fuzz_pipeline_log")
-    os.makedirs(log_dir, exist_ok=True)
+############################################################
+# Top-level helpers (picklable) to avoid pool pickling woes
+############################################################
 
-    def _build_cmd(path: str):
-        project_name = os.path.basename(path.rstrip("/"))
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        log_file = os.path.join(log_dir, f"{project_name}_{timestamp}.log")
 
-        logging.info(f"Start building {project_name}, logging to {log_file}")
+def _run_build_image(path: str, log_dir: str) -> Optional[subprocess.Popen]:
+    """Helper for build_image: must be top-level for pickling."""
+    project_name = os.path.basename(path.rstrip("/"))
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    log_file = os.path.join(log_dir, f"{project_name}_{timestamp}.log")
+    logging.info(f"Start building {project_name}, logging to {log_file}")
+    # Note: child process keeps the fd open even if parent closes it after spawn
+    f = open(log_file, "w")
+    try:
         return subprocess.Popen(
-            f"yes | python3 infra/helper.py build_image {project_name}",
+            "yes | python3 infra/helper.py build_image {project}".format(project=project_name),
             cwd=os.path.abspath(os.path.join(path, "../../")),
-            stdout=open(log_file, "w"),
+            stdout=f,
             stderr=subprocess.STDOUT,
             shell=True,
         )
-
-    _ = parallel_subprocess(repos, jobs, _build_cmd, on_exit=None)
-
-def build_fuzzer(repos: list[str], jobs: int):
-    """
-    Build fuzzers in parallel for successfully built projects
-    
-    Args:
-        repos (list[str]): List of repository paths
-        jobs (int): Number of parallel tasks
-    """
-    logging.info(f"Building fuzzers for {len(repos)} OSS-Fuzz projects")
-    log_dir = os.path.abspath("fuzz_pipeline_log")
-    os.makedirs(log_dir, exist_ok=True)
-
-    def _build_cmd(path: str):
-        project_name = os.path.basename(path.rstrip("/"))
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        log_file = os.path.join(log_dir, f"{project_name}_fuzzer_{timestamp}.log")
-
-        logging.info(f"Start building fuzzers for {project_name}, logging to {log_file}")
+    except Exception:
+        f.close()
+        raise
+
+
+def _run_build_fuzzer(path: str, log_dir: str) -> Optional[subprocess.Popen]:
+    """Helper for build_fuzzer: must be top-level for pickling."""
+    project_name = os.path.basename(path.rstrip("/"))
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    log_file = os.path.join(log_dir, f"{project_name}_fuzzer_{timestamp}.log")
+    logging.info(f"Start building fuzzers for {project_name}, logging to {log_file}")
+    f = open(log_file, "w")
+    try:
         return subprocess.Popen(
-            f"python3 infra/helper.py build_fuzzers --sanitizer address {project_name}",
+            "python3 infra/helper.py build_fuzzers --sanitizer address {project}".format(
+                project=project_name
+            ),
             cwd=os.path.abspath(os.path.join(path, "../../")),
-            stdout=open(log_file, "w"),
+            stdout=f,
             stderr=subprocess.STDOUT,
             shell=True,
         )
+    except Exception:
+        f.close()
+        raise
+
+
+############################################################
+# Discover & fuzz
+############################################################
 
-    _ = parallel_subprocess(repos, jobs, _build_cmd, on_exit=None)
 
 def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]:
-    """
-    Discover fuzzing targets
+    """Discover fuzzing targets in out/<project> directory.
+
+    Rules:
+      - file name startswith "fuzz_"
+      - no dot in filename (exclude corpora, dictionaries)
+      - executable bit set
+      - (optional) if you want only print1 variants, pass a filter later
     """
     out_dir = oss_fuzz_dir / "build" / "out" / project_name
     targets: list[str] = []
-    
+
     logging.debug(f"Searching fuzz targets in: {out_dir}")
-    
+
     if not out_dir.is_dir():
         logging.warning(f"Build output directory for {project_name} does not exist")
         return targets
 
     try:
         for f in out_dir.iterdir():
-            if (f.is_file() and f.name.startswith("fuzz_") and 
-                '.' not in f.name and f.name.endswith("print1") and 
-                os.access(f, os.X_OK)):
+            if (
+                f.is_file()
+                and f.name.startswith("fuzz_")
+                and f.name.endswith("print1")
+                and "." not in f.name
+                and os.access(f, os.X_OK)
+            ):
                 targets.append(f.name)
-        logging.info(f"Discovered {len(targets)} fuzz targets in {project_name}: {targets}")
+        logging.info(
+            f"Discovered {len(targets)} fuzz targets in {project_name}: {targets}"
+        )
     except Exception as e:
-        logging.error(f"Error discovering targets for {project_name}: {e}", exc_info=True)
-    
+        logging.error(
+            f"Error discovering targets for {project_name}: {e}", exc_info=True
+        )
+
     return targets
 
 
-def fuzz_one_target(target: tuple[str, str], timeout: int):
-    """
-    Perform fuzzing on a single fuzzing target
-    """
+
+def fuzz_one_target(target: tuple[str, str], timeout: int) -> Optional[subprocess.Popen]:
+    """Perform fuzzing on a single fuzzing target."""
     repo_path, target_name = target
     project_name = os.path.basename(repo_path)
     oss_fuzz_root = os.path.dirname(os.path.dirname(repo_path))
-    
+
     input_file_path = pjoin(repo_path, "fuzz_inputs", target_name)
     os.makedirs(os.path.dirname(input_file_path), exist_ok=True)
 
-    logging.info(f"Starting fuzzing: project={project_name}, target={target_name}, timeout={timeout}s")
+    logging.info(
+        f"Starting fuzzing: project={project_name}, target={target_name}, timeout={timeout}s"
+    )
     logging.debug(f"Fuzz output will be saved to: {input_file_path}")
 
     try:
-        with open(input_file_path, "w") as input_file:
+        f = open(input_file_path, "wb")
+        try:
             return subprocess.Popen(
                 [
                     "bash",
                     "-c",
-                    f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}"
+                    f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}",
                 ],
                 cwd=oss_fuzz_root,
-                stdout=input_file,
+                stdout=f,
                 stderr=subprocess.DEVNULL,
             )
+        except Exception:
+            f.close()
+            raise
     except Exception as e:
-        logging.error(f"Error starting fuzzer for {project_name}/{target_name}: {e}", exc_info=True)
+        logging.error(
+            f"Error starting fuzzer for {project_name}/{target_name}: {e}",
+            exc_info=True,
+        )
         return None
 
 
-def fuzz_repos(repos: list[str], jobs: int, timeout: int):
-    """
-    Perform fuzzing on a set of repositories
-    """
+
+def fuzz_repos(repos: list[str], jobs: int, timeout: int) -> None:
+    """Perform fuzzing on a set of repositories."""
     logging.info(f"Discovering fuzz targets for {len(repos)} repositories...")
 
-    targets_list = []
+    targets_list: list[list[str]] = []
     for repo in repos:
         project_name = os.path.basename(repo)
         oss_fuzz_dir = Path(repo).parent.parent
         targets = discover_targets(project_name, oss_fuzz_dir)
         targets_list.append(targets)
-    
-    target_map = {repo: targets for repo, targets in zip(repos, targets_list)}
+
+    target_map: dict[str, list[str]] = {repo: targets for repo, targets in zip(repos, targets_list)}
     all_targets: list[tuple[str, str]] = [
         (k, v) for k, vs in target_map.items() for v in vs
     ]
-    
+
     logging.info(f"Total fuzz targets discovered: {len(all_targets)}")
     for repo, targets in target_map.items():
         logging.info(f"{os.path.basename(repo)}: {len(targets)} targets")
 
     for repo in repos:
         os.makedirs(pjoin(repo, "fuzz_inputs"), exist_ok=True)
-    
-    logging.info(f"Starting parallel fuzzing with {jobs} jobs, timeout={timeout}s per target")
-    parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None)
-import os
-import re
-import logging
-from os.path import join as pjoin
 
-def generate_test_template(target_name: str, repo_path: str):
+    logging.info(
+        f"Starting parallel fuzzing with {jobs} jobs, timeout={timeout}s per target"
+    )
+    parallel_subprocess(all_targets, jobs, partial(fuzz_one_target, timeout=timeout), on_exit=None)
+
+
+############################################################
+# Transform: generate cleaned templates from fuzz target py
+############################################################
+
+
+def generate_test_template(target_name: str, repo_path: str) -> Optional[str]:
     """
     Generate Python test template for a single target by stripping license header,
     main() block, and print(data) inside TestInput/TestOneInput.
@@ -176,47 +199,49 @@ def generate_test_template(target_name: str, repo_path: str):
     with open(src_file, "r", encoding="utf-8") as f:
         original_code = f.read()
 
-    # --- 1. 保留 shebang，但删除许可证注释 ---
+    # 1) keep shebang
     shebang = ""
     if original_code.startswith("#!"):
         shebang, original_code = original_code.split("\n", 1)
         shebang += "\n"
 
-    # 匹配从文件开头到包含 "limitations under the license" 那一行的注释块
+    # 2) drop license block (best-effort)
     license_pattern = re.compile(
         r"^(?:\s*#.*\n)*\s*#.*limitations\s+under\s+the\s+license.*\n",
-        re.IGNORECASE | re.MULTILINE
+        re.IGNORECASE | re.MULTILINE,
     )
     code_no_license = re.sub(license_pattern, "", original_code, count=1)
 
-    # --- 2. 删除 main 函数和 if __name__ == '__main__' ---
+    # 3) remove main() and if __main__ guards (best-effort)
     code_no_main = re.sub(
-        r"\n?def\s+main\([\s\S]*?(?=^if\s+__name__\s*==\s*['\"]__main__['\"]:)",
+        r"\n?def\s+main\([^)]*\):[\s\S]*?(?=^\S|\Z)",
         "",
         code_no_license,
-        flags=re.MULTILINE
+        flags=re.MULTILINE,
     )
     code_no_main = re.sub(
-        r"\n?if\s+__name__\s*==\s*['\"]__main__['\"]:\s*main\(\s*\)\s*",
+        r"\n?if\s+__name__\s*==\s*['\"]__main__['\"]:[\s\S]*?(?=^\S|\Z)",
         "",
         code_no_main,
-        flags=re.MULTILINE
+        flags=re.MULTILINE,
     )
 
-    # --- 3. 删除 TestInput/TestOneInput 内的 print(data) ---
-    def remove_print_in_func(match):
-        func_body = match.group(0)
-        func_body = re.sub(r"^\s*print\(data\)\s*$", "", func_body, flags=re.MULTILINE)
-        return func_body
+    # 4) remove print(data) inside TestInput/TestOneInput
+    def _strip_print_in_func(src: str) -> str:
+        def _repl(m: re.Match) -> str:
+            body = m.group(0)
+            return re.sub(r"^\s*print\(data\)\s*$", "", body, flags=re.MULTILINE)
+
+        return re.sub(
+            r"(def\s+Test(?:Input|OneInput)\s*\([\s\S]*?\):[\s\S]*?)(?=^def\s|^@|\Z)",
+            _repl,
+            src,
+            flags=re.MULTILINE,
+        )
 
-    cleaned_code = re.sub(
-        r"(def\s+Test(?:Input|OneInput)\s*\([\s\S]*?)(?=\n\w|\Z)",
-        lambda m: remove_print_in_func(m),
-        code_no_main,
-        flags=re.MULTILINE
-    )
+    cleaned_code = _strip_print_in_func(code_no_main)
 
-    # --- 4. 输出到 tests-gen ---
+    # 5) write into tests-gen
     template_dir = pjoin(repo_path, "tests-gen")
     os.makedirs(template_dir, exist_ok=True)
 
@@ -232,82 +257,29 @@ def remove_print_in_func(match):
     logging.info(f"Generated cleaned template: {template_path}")
     return template_path
 
-def transform_repos(repos: list[str], jobs: int):
+
+# top-level for pickling
+
+def _transform_repo_fn(repo: str) -> list[Optional[str]]:
+    project_name = os.path.basename(repo)
+    oss_fuzz_dir = Path(repo).parent.parent
+    targets = discover_targets(project_name, oss_fuzz_dir)
+    return [generate_test_template(t, repo) for t in targets]
+
+
+
+def transform_repos(repos: list[str], jobs: int) -> list[list[Optional[str]]]:
     """
-    Generate test templates for all targets
-    
-    Args:
-        repos (list[str]): List of repository paths
-        jobs (int): Number of parallel tasks
+    Generate test templates for all targets (parallel, picklable).
     """
     logging.info("Generating test templates")
-    
-    def _transform_repo(repo: str):
-        project_name = os.path.basename(repo)
-        oss_fuzz_dir = Path(repo).parent.parent
-        targets = discover_targets(project_name, oss_fuzz_dir)
-        return [generate_test_template(t, repo) for t in targets]
-    
     with ProcessingPool(jobs) as p:
-        return list(p.map(_transform_repo, repos))
+        return list(p.map(_transform_repo_fn, repos))
 
-def escape_special_chars(input_data: str) -> str:
-    """
-    Escape special characters in input data for Python byte strings
-    
-    Args:
-        input_data (str): Raw input data
-        
-    Returns:
-        str: Input data with escaped characters
-    """
-    # For Python, we can use repr() to safely represent byte strings
-    # This will handle all special characters and non-ASCII bytes
-    return repr(input_data.encode('latin-1', 'replace'))
-
-# def substitute_input(template: str, input_data: str, idx: int, target_name: str) -> str:
-#     """
-#     Replace fuzzing input into Python test template
-    
-#     Args:
-#         template (str): Template content
-#         input_data (str): Input data
-#         idx (int): Test index
-#         target_name (str): Target name
-        
-#     Returns:
-#         str: Test code after substitution
-#     """
-#     # Escape special characters for Python
-#     escaped_input = escape_special_chars(input_data)
-    
-#     # Replace input placeholder
-#     new_template = template.replace(
-#         'input_data = b""',
-#         f'input_data = {escaped_input}'
-#     )
-    
-#     # Replace test method name to avoid duplication
-#     return new_template.replace(
-#         f"def test_generated(self):",
-#         f"def test_{idx}(self):"
-#     )
-
-# def has_similar(selected: list[str], x: str, thresh: float = 0.8) -> bool:
-#     """
-#     Check if a string is sufficiently similar to any string in the selected list
-    
-#     Args:
-#         selected (list[str]): List of selected strings
-#         x (str): String to check
-#         thresh (float): Similarity threshold
-        
-#     Returns:
-#         bool: Whether they are similar
-#     """
-#     def similar(a, b):
-#         return SequenceMatcher(None, a, b).ratio()
-#     return any(similar(x, y) > thresh for y in selected)
+
+############################################################
+# Testgen: substitute fuzz inputs into test templates
+############################################################
 
 
 def substitute_one_repo(
@@ -317,7 +289,7 @@ def substitute_one_repo(
     strategy: str,
     max_len: int,
     sim_thresh: float,
-):
+) -> None:
     """
     从原 fuzz target 复制文件，按 fuzz input 生成多个 testgen 文件。
     """
@@ -331,36 +303,32 @@ def substitute_one_repo(
             logging.warning(f"Source file not found: {source_file}")
             continue
 
-        
         input_path = pjoin(input_dir, f"{target_name}")
         if not os.path.exists(input_path):
             logging.warning(f"Input file not found: {input_path}")
             continue
 
-        # 读取所有有效的输入数据
-        valid_inputs = []
+        # 读取所有有效的输入数据（逐行，允许原始二进制）
+        valid_inputs: list[bytes] = []
         with open(input_path, "rb") as f_input:
             for line in f_input:
-                try:
-                    # 尝试解码行以检查内容
-                    decoded = line.decode('utf-8', errors='replace')
-                    
-                    # 只处理以 b' 或 b" 开头的行（这些是实际的测试输入）
-                    if decoded.startswith(("b'", 'b"')):
-                        # 提取字节数据部分
-                        if decoded.startswith("b'") and decoded.endswith("'\n"):
-                            byte_data = line[2:-2]  # 移除 b' 和末尾的 '\n
-                        elif decoded.startswith('b"') and decoded.endswith('"\n'):
-                            byte_data = line[2:-2]  # 移除 b" 和末尾的 "\n
-                        else:
-                            continue
-                            
-                        # 只保留有效长度的输入
-                        if 0 < len(byte_data) <= max_len:
-                            valid_inputs.append(byte_data)
-                except UnicodeDecodeError:
-                    # 如果无法解码，可能是二进制数据，直接使用
-                    if 0 < len(line) <= max_len:
+                # If the line looks like a Python bytes literal b'...'
+                if line.startswith(b"b'") or line.startswith(b'b"'):
+                    # Try to strip leading b' or b" and trailing quote+newline
+                    stripped = None
+                    if line.startswith(b"b'") and line.endswith(b"'\n"):
+                        stripped = line[2:-2]
+                    elif line.startswith(b'b"') and line.endswith(b'"\n'):
+                        stripped = line[2:-2]
+                    if stripped is not None and 0 < len(stripped) <= max_len:
+                        valid_inputs.append(stripped)
+                        continue
+                # Otherwise treat as raw bytes line
+                if 0 < len(line) <= max_len:
+                    # drop final newline if present to keep tests stable
+                    if line.endswith(b"\n"):
+                        line = line[:-1]
+                    if line:
                         valid_inputs.append(line)
 
         if not valid_inputs:
@@ -369,7 +337,8 @@ def substitute_one_repo(
 
         logging.info(f"Loaded {len(valid_inputs)} inputs for {target_name}")
 
-        # 策略选择输入 - 最多选择 n_fuzz 个输入
+        # select inputs
+        inputs: list[bytes]
         if strategy == "shuffle":
             random.shuffle(valid_inputs)
             inputs = valid_inputs[:n_fuzz]
@@ -378,39 +347,46 @@ def substitute_one_repo(
         else:
             inputs = valid_inputs[:n_fuzz]
 
-        # 每个 fuzz input 生成一个单独的文件
+        # emit tests
         for idx, fuzz_input in enumerate(inputs, start=1):
-            with open(source_file, "r") as f_src:
+            with open(source_file, "r", encoding="utf-8") as f_src:
                 code = f_src.read()
 
-            
-            # 找到 TestInput / TestOneInput 并改成 test_{idx}
+            # rename entry to test_{idx}
             code = re.sub(r"\bTestOneInput\b", f"test_{idx}", code)
             code = re.sub(r"\bTestInput\b", f"test_{idx}", code)
-            
-            # 插入测试数据 - 确保使用二进制表示
-            def insert_fuzz_input(match):
+
+            # inject bytes into the top of the function body
+            def _insert(match: re.Match) -> str:
+                header = match.group(1)
                 indent = match.group(2)
-                # 使用 repr() 安全表示二进制数据
                 byte_repr = repr(fuzz_input)
-                return f"{match.group(1)}{indent}data = {byte_repr}\n{indent}"
-            
-            # 在测试函数中插入数据
+                return f"{header}{indent}data = {byte_repr}\n{indent}"
+
             code = re.sub(
                 rf"(def\s+test_{idx}\s*\([^)]*\)\s*:\s*\n)(\s*)",
-                insert_fuzz_input,
+                _insert,
                 code,
             )
-            
+
             out_path = pjoin(template_dir, f"{target_name}.testgen_{idx}.py")
-            with open(out_path, "w") as f_out:
+            with open(out_path, "w", encoding="utf-8") as f_out:
                 f_out.write(code)
 
-            # 格式化代码
+            # format if black exists
             try:
                 subprocess.run(["black", out_path], check=False)
             except FileNotFoundError:
                 logging.warning("Black formatter not found, skipping formatting")
+
+
+# top-level wrapper to map (repo, targets) tuples without lambdas
+
+def _substitute_wrapper(args: tuple[str, list[str], int, str, int, float]) -> None:
+    return substitute_one_repo(*args)
+
+
+
 def testgen_repos(
     repos: list[str],
     jobs: int,
@@ -418,39 +394,67 @@ def testgen_repos(
     strategy: str = "shuffle",
     max_len: int = 100,
     sim_thresh: float = 0.8,
-):
-    """
-    Generate test cases from fuzzing inputs
-    
-    Args:
-        repos (list[str]): List of repository paths
-        jobs (int): Number of parallel tasks
-        n_fuzz (int): Number of inputs to use
-        strategy (str): Selection strategy
-        max_len (int): Maximum length
-        sim_thresh (float): Similarity threshold
-    """
+) -> None:
+    """Generate test cases from fuzzing inputs."""
     # First get all targets
-    targets_list = []
+    targets_list: list[list[str]] = []
     for repo in repos:
         project_name = os.path.basename(repo)
         oss_fuzz_dir = Path(repo).parent.parent
         targets = discover_targets(project_name, oss_fuzz_dir)
         targets_list.append(targets)
-    
-    target_map = {repo: targets for repo, targets in zip(repos, targets_list)}
-    
-    # Process each repository in parallel
+
+    target_map: dict[str, list[str]] = {repo: targets for repo, targets in zip(repos, targets_list)}
+
+    work: list[tuple[str, list[str], int, str, int, float]] = [
+        (repo, targets, n_fuzz, strategy, max_len, sim_thresh)
+        for repo, targets in target_map.items()
+    ]
+
     with ProcessingPool(jobs) as p:
-        list(p.map(
-            lambda item: substitute_one_repo(
-                item[0], item[1], n_fuzz, strategy, max_len, sim_thresh
-            ),
-            target_map.items()
-        ))
+        list(p.map(_substitute_wrapper, work))
+
+
+############################################################
+# Build steps (parallel via parallel_subprocess)
+############################################################
+
+
+def build_image(repos: list[str], jobs: int) -> None:
+    """Build Docker images for all repos."""
+    logging.info(f"Building Docker images for {len(repos)} OSS-Fuzz projects")
+    log_dir = os.path.abspath("fuzz_pipeline_log")
+    os.makedirs(log_dir, exist_ok=True)
+
+    parallel_subprocess(
+        repos,
+        jobs,
+        partial(_run_build_image, log_dir=log_dir),
+        on_exit=None,
+    )
+
+
+def build_fuzzer(repos: list[str], jobs: int) -> None:
+    """Build fuzzers in parallel for successfully built projects."""
+    logging.info(f"Building fuzzers for {len(repos)} OSS-Fuzz projects")
+    log_dir = os.path.abspath("fuzz_pipeline_log")
+    os.makedirs(log_dir, exist_ok=True)
+
+    parallel_subprocess(
+        repos,
+        jobs,
+        partial(_run_build_fuzzer, log_dir=log_dir),
+        on_exit=None,
+    )
+
+
+############################################################
+# CLI
+############################################################
+
 
 def main(
-    repo_id: str = "data/valid_projects3.txt",
+    repo_id: str = "data/valid_projects2.txt",
     repo_root: str = "fuzz/oss-fuzz/projects/",
     timeout: int = 30,
     jobs: int = 2,
@@ -459,36 +463,42 @@ def main(
     strategy: str = "shuffle",
     max_len: int = 100,
     sim_thresh: float = 0.8,
-   
 ):
     """
     Main function, controlling the entire fuzzing process
-    
+
     Args:
-        repo_id (str): Project ID file path
+        repo_id (str): Project ID file path or a single project name
         repo_root (str): Project root directory
-        timeout (int): Timeout duration
+        timeout (int): Timeout duration per fuzz target (seconds)
         jobs (int): Number of parallel tasks
-        pipeline (str): Pipeline type
-        n_fuzz (int): Number of inputs to use
-        strategy (str): Selection strategy
-        max_len (int): Maximum length
-        sim_thresh (float): Similarity threshold
+        pipeline (str): One of [build_image, build_fuzzer, fuzz, testgen, transform, all]
+        n_fuzz (int): Number of inputs to use (testgen)
+        strategy (str): Selection strategy [head|shuffle|reverse]
+        max_len (int): Maximum fuzz input length (bytes)
+        sim_thresh (float): Reserved for similarity dedup (not used currently)
     """
     try:
-        with open(repo_id, "r") as f:
+        with open(repo_id, "r", encoding="utf-8") as f:
             repo_id_list = [line.strip() for line in f if line.strip()]
     except FileNotFoundError:
         repo_id_list = [repo_id]
 
     # Collect repository paths
-    repos = []
-    for repo_id in repo_id_list:
-        repo_path = abspath(os.path.join(repo_root, repo_id))
+    repos: list[str] = []
+    for rid in repo_id_list:
+        repo_path = abspath(os.path.join(repo_root, rid))
         if os.path.isdir(repo_path):
             repos.append(repo_path)
+        else:
+            logging.warning(f"Repo not found or not a directory: {repo_path}")
+
+    if not repos:
+        logging.error("No valid repositories found.")
+        return
+
+    pipeline = pipeline.lower().strip()
 
-    # Execute specified pipeline
     if pipeline == "build_image":
         build_image(repos, jobs)
     elif pipeline == "build_fuzzer":
@@ -502,12 +512,16 @@ def main(
     elif pipeline == "all":
         build_image(repos, jobs)
         build_fuzzer(repos, jobs)
-        transform_repos(repos, jobs)  # Generate test templates
+        transform_repos(repos, jobs)
         fuzz_repos(repos, jobs, timeout)
         testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh)
     else:
         logging.error(f"Unknown pipeline: {pipeline}")
 
+
 if __name__ == "__main__":
-    logging.basicConfig(level=logging.INFO)
-    fire.Fire(main)
\ No newline at end of file
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s %(levelname)s %(message)s",
+    )
+    fire.Fire(main)

From 60fbb7ab251e6b124c1a63d6b059010a46df989b Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Thu, 14 Aug 2025 22:02:59 +0000
Subject: [PATCH 109/134] =?UTF-8?q?=E6=9B=B4=E6=8D=A2=E4=B8=BA=E6=9C=AA?=
 =?UTF-8?q?=E5=88=A0=E9=99=A4=E5=86=97=E4=BD=99=E7=89=88=E6=9C=AC?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 fuzz/collect_fuzz_python.py | 520 ++++++++++++++++++------------------
 1 file changed, 253 insertions(+), 267 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index a42d0ba..2d950a0 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -2,11 +2,9 @@
 Script for Python project fuzzing and test template conversion
 usage: PYTHONPATH=. python3 fuzz/collect_fuzz_python.py --pipeline all
 """
-from __future__ import annotations
-
 from pathlib import Path
 import logging
-from typing import Optional
+from typing import Optional, List, Tuple
 import fire
 import os
 from UniTSyn.frontend.util import wrap_repo, parallel_subprocess
@@ -15,178 +13,157 @@
 from tqdm import tqdm
 from pathos.multiprocessing import ProcessingPool
 import random
+from difflib import SequenceMatcher
 from itertools import islice
 from datetime import datetime
 import re
-from functools import partial
 
-############################################################
-# Top-level helpers (picklable) to avoid pool pickling woes
-############################################################
+def build_image(repos: list[str], jobs: int):
+    """
+    Build Docker images for OSS-Fuzz projects corresponding to each repository
+    
+    Args:
+        repos (list[str]): List of repository paths
+        jobs (int): Number of parallel tasks
+    """
+    logging.info(f"Building Docker images for {len(repos)} OSS-Fuzz projects")
+    log_dir = os.path.abspath("fuzz_pipeline_log")
+    os.makedirs(log_dir, exist_ok=True)
 
+    def _build_cmd(path: str):
+        project_name = os.path.basename(path.rstrip("/"))
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        log_file = os.path.join(log_dir, f"{project_name}_{timestamp}.log")
 
-def _run_build_image(path: str, log_dir: str) -> Optional[subprocess.Popen]:
-    """Helper for build_image: must be top-level for pickling."""
-    project_name = os.path.basename(path.rstrip("/"))
-    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-    log_file = os.path.join(log_dir, f"{project_name}_{timestamp}.log")
-    logging.info(f"Start building {project_name}, logging to {log_file}")
-    # Note: child process keeps the fd open even if parent closes it after spawn
-    f = open(log_file, "w")
-    try:
+        logging.info(f"Start building {project_name}, logging to {log_file}")
         return subprocess.Popen(
-            "yes | python3 infra/helper.py build_image {project}".format(project=project_name),
+            f"yes | python3 infra/helper.py build_image {project_name}",
             cwd=os.path.abspath(os.path.join(path, "../../")),
-            stdout=f,
+            stdout=open(log_file, "w"),
             stderr=subprocess.STDOUT,
             shell=True,
         )
-    except Exception:
-        f.close()
-        raise
-
-
-def _run_build_fuzzer(path: str, log_dir: str) -> Optional[subprocess.Popen]:
-    """Helper for build_fuzzer: must be top-level for pickling."""
-    project_name = os.path.basename(path.rstrip("/"))
-    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-    log_file = os.path.join(log_dir, f"{project_name}_fuzzer_{timestamp}.log")
-    logging.info(f"Start building fuzzers for {project_name}, logging to {log_file}")
-    f = open(log_file, "w")
-    try:
+
+    _ = parallel_subprocess(repos, jobs, _build_cmd, on_exit=None)
+
+def build_fuzzer(repos: list[str], jobs: int):
+    """
+    Build fuzzers in parallel for successfully built projects
+    
+    Args:
+        repos (list[str]): List of repository paths
+        jobs (int): Number of parallel tasks
+    """
+    logging.info(f"Building fuzzers for {len(repos)} OSS-Fuzz projects")
+    log_dir = os.path.abspath("fuzz_pipeline_log")
+    os.makedirs(log_dir, exist_ok=True)
+
+    def _build_cmd(path: str):
+        project_name = os.path.basename(path.rstrip("/"))
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        log_file = os.path.join(log_dir, f"{project_name}_fuzzer_{timestamp}.log")
+
+        logging.info(f"Start building fuzzers for {project_name}, logging to {log_file}")
         return subprocess.Popen(
-            "python3 infra/helper.py build_fuzzers --sanitizer address {project}".format(
-                project=project_name
-            ),
+            f"python3 infra/helper.py build_fuzzers --sanitizer address {project_name}",
             cwd=os.path.abspath(os.path.join(path, "../../")),
-            stdout=f,
+            stdout=open(log_file, "w"),
             stderr=subprocess.STDOUT,
             shell=True,
         )
-    except Exception:
-        f.close()
-        raise
-
-
-############################################################
-# Discover & fuzz
-############################################################
 
+    _ = parallel_subprocess(repos, jobs, _build_cmd, on_exit=None)
 
 def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]:
-    """Discover fuzzing targets in out/<project> directory.
-
-    Rules:
-      - file name startswith "fuzz_"
-      - no dot in filename (exclude corpora, dictionaries)
-      - executable bit set
-      - (optional) if you want only print1 variants, pass a filter later
+    """
+    Discover fuzzing targets
     """
     out_dir = oss_fuzz_dir / "build" / "out" / project_name
     targets: list[str] = []
-
+    
     logging.debug(f"Searching fuzz targets in: {out_dir}")
-
+    
     if not out_dir.is_dir():
         logging.warning(f"Build output directory for {project_name} does not exist")
         return targets
 
     try:
         for f in out_dir.iterdir():
-            if (
-                f.is_file()
-                and f.name.startswith("fuzz_")
-                and f.name.endswith("print1")
-                and "." not in f.name
-                and os.access(f, os.X_OK)
-            ):
+            if (f.is_file() and f.name.startswith("fuzz_") and 
+                '.' not in f.name and f.name.endswith("print1") and 
+                os.access(f, os.X_OK)):
                 targets.append(f.name)
-        logging.info(
-            f"Discovered {len(targets)} fuzz targets in {project_name}: {targets}"
-        )
+        logging.info(f"Discovered {len(targets)} fuzz targets in {project_name}: {targets}")
     except Exception as e:
-        logging.error(
-            f"Error discovering targets for {project_name}: {e}", exc_info=True
-        )
-
+        logging.error(f"Error discovering targets for {project_name}: {e}", exc_info=True)
+    
     return targets
 
 
-
-def fuzz_one_target(target: tuple[str, str], timeout: int) -> Optional[subprocess.Popen]:
-    """Perform fuzzing on a single fuzzing target."""
+def fuzz_one_target(target: tuple[str, str], timeout: int):
+    """
+    Perform fuzzing on a single fuzzing target
+    """
     repo_path, target_name = target
     project_name = os.path.basename(repo_path)
     oss_fuzz_root = os.path.dirname(os.path.dirname(repo_path))
-
+    
     input_file_path = pjoin(repo_path, "fuzz_inputs", target_name)
     os.makedirs(os.path.dirname(input_file_path), exist_ok=True)
 
-    logging.info(
-        f"Starting fuzzing: project={project_name}, target={target_name}, timeout={timeout}s"
-    )
+    logging.info(f"Starting fuzzing: project={project_name}, target={target_name}, timeout={timeout}s")
     logging.debug(f"Fuzz output will be saved to: {input_file_path}")
 
     try:
-        f = open(input_file_path, "wb")
-        try:
+        with open(input_file_path, "w") as input_file:
             return subprocess.Popen(
                 [
                     "bash",
                     "-c",
-                    f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}",
+                    f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}"
                 ],
                 cwd=oss_fuzz_root,
-                stdout=f,
+                stdout=input_file,
                 stderr=subprocess.DEVNULL,
             )
-        except Exception:
-            f.close()
-            raise
     except Exception as e:
-        logging.error(
-            f"Error starting fuzzer for {project_name}/{target_name}: {e}",
-            exc_info=True,
-        )
+        logging.error(f"Error starting fuzzer for {project_name}/{target_name}: {e}", exc_info=True)
         return None
 
 
-
-def fuzz_repos(repos: list[str], jobs: int, timeout: int) -> None:
-    """Perform fuzzing on a set of repositories."""
+def fuzz_repos(repos: list[str], jobs: int, timeout: int):
+    """
+    Perform fuzzing on a set of repositories
+    """
     logging.info(f"Discovering fuzz targets for {len(repos)} repositories...")
 
-    targets_list: list[list[str]] = []
+    targets_list = []
     for repo in repos:
         project_name = os.path.basename(repo)
         oss_fuzz_dir = Path(repo).parent.parent
         targets = discover_targets(project_name, oss_fuzz_dir)
         targets_list.append(targets)
-
-    target_map: dict[str, list[str]] = {repo: targets for repo, targets in zip(repos, targets_list)}
+    
+    target_map = {repo: targets for repo, targets in zip(repos, targets_list)}
     all_targets: list[tuple[str, str]] = [
         (k, v) for k, vs in target_map.items() for v in vs
     ]
-
+    
     logging.info(f"Total fuzz targets discovered: {len(all_targets)}")
     for repo, targets in target_map.items():
         logging.info(f"{os.path.basename(repo)}: {len(targets)} targets")
 
     for repo in repos:
         os.makedirs(pjoin(repo, "fuzz_inputs"), exist_ok=True)
+    
+    logging.info(f"Starting parallel fuzzing with {jobs} jobs, timeout={timeout}s per target")
+    parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None)
+import os
+import re
+import logging
+from os.path import join as pjoin
 
-    logging.info(
-        f"Starting parallel fuzzing with {jobs} jobs, timeout={timeout}s per target"
-    )
-    parallel_subprocess(all_targets, jobs, partial(fuzz_one_target, timeout=timeout), on_exit=None)
-
-
-############################################################
-# Transform: generate cleaned templates from fuzz target py
-############################################################
-
-
-def generate_test_template(target_name: str, repo_path: str) -> Optional[str]:
+def generate_test_template(target_name: str, repo_path: str):
     """
     Generate Python test template for a single target by stripping license header,
     main() block, and print(data) inside TestInput/TestOneInput.
@@ -199,49 +176,47 @@ def generate_test_template(target_name: str, repo_path: str) -> Optional[str]:
     with open(src_file, "r", encoding="utf-8") as f:
         original_code = f.read()
 
-    # 1) keep shebang
+    # --- 1. 保留 shebang，但删除许可证注释 ---
     shebang = ""
     if original_code.startswith("#!"):
         shebang, original_code = original_code.split("\n", 1)
         shebang += "\n"
 
-    # 2) drop license block (best-effort)
+    # 匹配从文件开头到包含 "limitations under the license" 那一行的注释块
     license_pattern = re.compile(
         r"^(?:\s*#.*\n)*\s*#.*limitations\s+under\s+the\s+license.*\n",
-        re.IGNORECASE | re.MULTILINE,
+        re.IGNORECASE | re.MULTILINE
     )
     code_no_license = re.sub(license_pattern, "", original_code, count=1)
 
-    # 3) remove main() and if __main__ guards (best-effort)
+    # --- 2. 删除 main 函数和 if __name__ == '__main__' ---
     code_no_main = re.sub(
-        r"\n?def\s+main\([^)]*\):[\s\S]*?(?=^\S|\Z)",
+        r"\n?def\s+main\([\s\S]*?(?=^if\s+__name__\s*==\s*['\"]__main__['\"]:)",
         "",
         code_no_license,
-        flags=re.MULTILINE,
+        flags=re.MULTILINE
     )
     code_no_main = re.sub(
-        r"\n?if\s+__name__\s*==\s*['\"]__main__['\"]:[\s\S]*?(?=^\S|\Z)",
+        r"\n?if\s+__name__\s*==\s*['\"]__main__['\"]:\s*main\(\s*\)\s*",
         "",
         code_no_main,
-        flags=re.MULTILINE,
+        flags=re.MULTILINE
     )
 
-    # 4) remove print(data) inside TestInput/TestOneInput
-    def _strip_print_in_func(src: str) -> str:
-        def _repl(m: re.Match) -> str:
-            body = m.group(0)
-            return re.sub(r"^\s*print\(data\)\s*$", "", body, flags=re.MULTILINE)
-
-        return re.sub(
-            r"(def\s+Test(?:Input|OneInput)\s*\([\s\S]*?\):[\s\S]*?)(?=^def\s|^@|\Z)",
-            _repl,
-            src,
-            flags=re.MULTILINE,
-        )
+    # --- 3. 删除 TestInput/TestOneInput 内的 print(data) ---
+    def remove_print_in_func(match):
+        func_body = match.group(0)
+        func_body = re.sub(r"^\s*print\(data\)\s*$", "", func_body, flags=re.MULTILINE)
+        return func_body
 
-    cleaned_code = _strip_print_in_func(code_no_main)
+    cleaned_code = re.sub(
+        r"(def\s+Test(?:Input|OneInput)\s*\([\s\S]*?)(?=\n\w|\Z)",
+        lambda m: remove_print_in_func(m),
+        code_no_main,
+        flags=re.MULTILINE
+    )
 
-    # 5) write into tests-gen
+    # --- 4. 输出到 tests-gen ---
     template_dir = pjoin(repo_path, "tests-gen")
     os.makedirs(template_dir, exist_ok=True)
 
@@ -257,29 +232,82 @@ def _repl(m: re.Match) -> str:
     logging.info(f"Generated cleaned template: {template_path}")
     return template_path
 
-
-# top-level for pickling
-
-def _transform_repo_fn(repo: str) -> list[Optional[str]]:
-    project_name = os.path.basename(repo)
-    oss_fuzz_dir = Path(repo).parent.parent
-    targets = discover_targets(project_name, oss_fuzz_dir)
-    return [generate_test_template(t, repo) for t in targets]
-
-
-
-def transform_repos(repos: list[str], jobs: int) -> list[list[Optional[str]]]:
+def transform_repos(repos: list[str], jobs: int):
     """
-    Generate test templates for all targets (parallel, picklable).
+    Generate test templates for all targets
+    
+    Args:
+        repos (list[str]): List of repository paths
+        jobs (int): Number of parallel tasks
     """
     logging.info("Generating test templates")
+    
+    def _transform_repo(repo: str):
+        project_name = os.path.basename(repo)
+        oss_fuzz_dir = Path(repo).parent.parent
+        targets = discover_targets(project_name, oss_fuzz_dir)
+        return [generate_test_template(t, repo) for t in targets]
+    
     with ProcessingPool(jobs) as p:
-        return list(p.map(_transform_repo_fn, repos))
-
-
-############################################################
-# Testgen: substitute fuzz inputs into test templates
-############################################################
+        return list(p.map(_transform_repo, repos))
+
+# def escape_special_chars(input_data: str) -> str:
+#     """
+#     Escape special characters in input data for Python byte strings
+    
+#     Args:
+#         input_data (str): Raw input data
+        
+#     Returns:
+#         str: Input data with escaped characters
+#     """
+#     # For Python, we can use repr() to safely represent byte strings
+#     # This will handle all special characters and non-ASCII bytes
+#     return repr(input_data.encode('latin-1', 'replace'))
+
+# def substitute_input(template: str, input_data: str, idx: int, target_name: str) -> str:
+#     """
+#     Replace fuzzing input into Python test template
+    
+#     Args:
+#         template (str): Template content
+#         input_data (str): Input data
+#         idx (int): Test index
+#         target_name (str): Target name
+        
+#     Returns:
+#         str: Test code after substitution
+#     """
+#     # Escape special characters for Python
+#     escaped_input = escape_special_chars(input_data)
+    
+#     # Replace input placeholder
+#     new_template = template.replace(
+#         'input_data = b""',
+#         f'input_data = {escaped_input}'
+#     )
+    
+#     # Replace test method name to avoid duplication
+#     return new_template.replace(
+#         f"def test_generated(self):",
+#         f"def test_{idx}(self):"
+#     )
+
+# def has_similar(selected: list[str], x: str, thresh: float = 0.8) -> bool:
+#     """
+#     Check if a string is sufficiently similar to any string in the selected list
+    
+#     Args:
+#         selected (list[str]): List of selected strings
+#         x (str): String to check
+#         thresh (float): Similarity threshold
+        
+#     Returns:
+#         bool: Whether they are similar
+#     """
+#     def similar(a, b):
+#         return SequenceMatcher(None, a, b).ratio()
+#     return any(similar(x, y) > thresh for y in selected)
 
 
 def substitute_one_repo(
@@ -289,7 +317,7 @@ def substitute_one_repo(
     strategy: str,
     max_len: int,
     sim_thresh: float,
-) -> None:
+):
     """
     从原 fuzz target 复制文件，按 fuzz input 生成多个 testgen 文件。
     """
@@ -303,32 +331,36 @@ def substitute_one_repo(
             logging.warning(f"Source file not found: {source_file}")
             continue
 
+        
         input_path = pjoin(input_dir, f"{target_name}")
         if not os.path.exists(input_path):
             logging.warning(f"Input file not found: {input_path}")
             continue
 
-        # 读取所有有效的输入数据（逐行，允许原始二进制）
-        valid_inputs: list[bytes] = []
+        # 读取所有有效的输入数据
+        valid_inputs = []
         with open(input_path, "rb") as f_input:
             for line in f_input:
-                # If the line looks like a Python bytes literal b'...'
-                if line.startswith(b"b'") or line.startswith(b'b"'):
-                    # Try to strip leading b' or b" and trailing quote+newline
-                    stripped = None
-                    if line.startswith(b"b'") and line.endswith(b"'\n"):
-                        stripped = line[2:-2]
-                    elif line.startswith(b'b"') and line.endswith(b'"\n'):
-                        stripped = line[2:-2]
-                    if stripped is not None and 0 < len(stripped) <= max_len:
-                        valid_inputs.append(stripped)
-                        continue
-                # Otherwise treat as raw bytes line
-                if 0 < len(line) <= max_len:
-                    # drop final newline if present to keep tests stable
-                    if line.endswith(b"\n"):
-                        line = line[:-1]
-                    if line:
+                try:
+                    # 尝试解码行以检查内容
+                    decoded = line.decode('utf-8', errors='replace')
+                    
+                    # 只处理以 b' 或 b" 开头的行（这些是实际的测试输入）
+                    if decoded.startswith(("b'", 'b"')):
+                        # 提取字节数据部分
+                        if decoded.startswith("b'") and decoded.endswith("'\n"):
+                            byte_data = line[2:-2]  # 移除 b' 和末尾的 '\n
+                        elif decoded.startswith('b"') and decoded.endswith('"\n'):
+                            byte_data = line[2:-2]  # 移除 b" 和末尾的 "\n
+                        else:
+                            continue
+                            
+                        # 只保留有效长度的输入
+                        if 0 < len(byte_data) <= max_len:
+                            valid_inputs.append(byte_data)
+                except UnicodeDecodeError:
+                    # 如果无法解码，可能是二进制数据，直接使用
+                    if 0 < len(line) <= max_len:
                         valid_inputs.append(line)
 
         if not valid_inputs:
@@ -337,8 +369,7 @@ def substitute_one_repo(
 
         logging.info(f"Loaded {len(valid_inputs)} inputs for {target_name}")
 
-        # select inputs
-        inputs: list[bytes]
+        # 策略选择输入 - 最多选择 n_fuzz 个输入
         if strategy == "shuffle":
             random.shuffle(valid_inputs)
             inputs = valid_inputs[:n_fuzz]
@@ -347,46 +378,39 @@ def substitute_one_repo(
         else:
             inputs = valid_inputs[:n_fuzz]
 
-        # emit tests
+        # 每个 fuzz input 生成一个单独的文件
         for idx, fuzz_input in enumerate(inputs, start=1):
-            with open(source_file, "r", encoding="utf-8") as f_src:
+            with open(source_file, "r") as f_src:
                 code = f_src.read()
 
-            # rename entry to test_{idx}
+            
+            # 找到 TestInput / TestOneInput 并改成 test_{idx}
             code = re.sub(r"\bTestOneInput\b", f"test_{idx}", code)
             code = re.sub(r"\bTestInput\b", f"test_{idx}", code)
-
-            # inject bytes into the top of the function body
-            def _insert(match: re.Match) -> str:
-                header = match.group(1)
+            
+            # 插入测试数据 - 确保使用二进制表示
+            def insert_fuzz_input(match):
                 indent = match.group(2)
+                # 使用 repr() 安全表示二进制数据
                 byte_repr = repr(fuzz_input)
-                return f"{header}{indent}data = {byte_repr}\n{indent}"
-
+                return f"{match.group(1)}{indent}data = {byte_repr}\n{indent}"
+            
+            # 在测试函数中插入数据
             code = re.sub(
                 rf"(def\s+test_{idx}\s*\([^)]*\)\s*:\s*\n)(\s*)",
-                _insert,
+                insert_fuzz_input,
                 code,
             )
-
+            
             out_path = pjoin(template_dir, f"{target_name}.testgen_{idx}.py")
-            with open(out_path, "w", encoding="utf-8") as f_out:
+            with open(out_path, "w") as f_out:
                 f_out.write(code)
 
-            # format if black exists
+            # 格式化代码
             try:
                 subprocess.run(["black", out_path], check=False)
             except FileNotFoundError:
                 logging.warning("Black formatter not found, skipping formatting")
-
-
-# top-level wrapper to map (repo, targets) tuples without lambdas
-
-def _substitute_wrapper(args: tuple[str, list[str], int, str, int, float]) -> None:
-    return substitute_one_repo(*args)
-
-
-
 def testgen_repos(
     repos: list[str],
     jobs: int,
@@ -394,67 +418,39 @@ def testgen_repos(
     strategy: str = "shuffle",
     max_len: int = 100,
     sim_thresh: float = 0.8,
-) -> None:
-    """Generate test cases from fuzzing inputs."""
+):
+    """
+    Generate test cases from fuzzing inputs
+    
+    Args:
+        repos (list[str]): List of repository paths
+        jobs (int): Number of parallel tasks
+        n_fuzz (int): Number of inputs to use
+        strategy (str): Selection strategy
+        max_len (int): Maximum length
+        sim_thresh (float): Similarity threshold
+    """
     # First get all targets
-    targets_list: list[list[str]] = []
+    targets_list = []
     for repo in repos:
         project_name = os.path.basename(repo)
         oss_fuzz_dir = Path(repo).parent.parent
         targets = discover_targets(project_name, oss_fuzz_dir)
         targets_list.append(targets)
-
-    target_map: dict[str, list[str]] = {repo: targets for repo, targets in zip(repos, targets_list)}
-
-    work: list[tuple[str, list[str], int, str, int, float]] = [
-        (repo, targets, n_fuzz, strategy, max_len, sim_thresh)
-        for repo, targets in target_map.items()
-    ]
-
+    
+    target_map = {repo: targets for repo, targets in zip(repos, targets_list)}
+    
+    # Process each repository in parallel
     with ProcessingPool(jobs) as p:
-        list(p.map(_substitute_wrapper, work))
-
-
-############################################################
-# Build steps (parallel via parallel_subprocess)
-############################################################
-
-
-def build_image(repos: list[str], jobs: int) -> None:
-    """Build Docker images for all repos."""
-    logging.info(f"Building Docker images for {len(repos)} OSS-Fuzz projects")
-    log_dir = os.path.abspath("fuzz_pipeline_log")
-    os.makedirs(log_dir, exist_ok=True)
-
-    parallel_subprocess(
-        repos,
-        jobs,
-        partial(_run_build_image, log_dir=log_dir),
-        on_exit=None,
-    )
-
-
-def build_fuzzer(repos: list[str], jobs: int) -> None:
-    """Build fuzzers in parallel for successfully built projects."""
-    logging.info(f"Building fuzzers for {len(repos)} OSS-Fuzz projects")
-    log_dir = os.path.abspath("fuzz_pipeline_log")
-    os.makedirs(log_dir, exist_ok=True)
-
-    parallel_subprocess(
-        repos,
-        jobs,
-        partial(_run_build_fuzzer, log_dir=log_dir),
-        on_exit=None,
-    )
-
-
-############################################################
-# CLI
-############################################################
-
+        list(p.map(
+            lambda item: substitute_one_repo(
+                item[0], item[1], n_fuzz, strategy, max_len, sim_thresh
+            ),
+            target_map.items()
+        ))
 
 def main(
-    repo_id: str = "data/valid_projects2.txt",
+    repo_id: str = "data/valid_projects3.txt",
     repo_root: str = "fuzz/oss-fuzz/projects/",
     timeout: int = 30,
     jobs: int = 2,
@@ -463,42 +459,36 @@ def main(
     strategy: str = "shuffle",
     max_len: int = 100,
     sim_thresh: float = 0.8,
+   
 ):
     """
     Main function, controlling the entire fuzzing process
-
+    
     Args:
-        repo_id (str): Project ID file path or a single project name
+        repo_id (str): Project ID file path
         repo_root (str): Project root directory
-        timeout (int): Timeout duration per fuzz target (seconds)
+        timeout (int): Timeout duration
         jobs (int): Number of parallel tasks
-        pipeline (str): One of [build_image, build_fuzzer, fuzz, testgen, transform, all]
-        n_fuzz (int): Number of inputs to use (testgen)
-        strategy (str): Selection strategy [head|shuffle|reverse]
-        max_len (int): Maximum fuzz input length (bytes)
-        sim_thresh (float): Reserved for similarity dedup (not used currently)
+        pipeline (str): Pipeline type
+        n_fuzz (int): Number of inputs to use
+        strategy (str): Selection strategy
+        max_len (int): Maximum length
+        sim_thresh (float): Similarity threshold
     """
     try:
-        with open(repo_id, "r", encoding="utf-8") as f:
+        with open(repo_id, "r") as f:
             repo_id_list = [line.strip() for line in f if line.strip()]
     except FileNotFoundError:
         repo_id_list = [repo_id]
 
     # Collect repository paths
-    repos: list[str] = []
-    for rid in repo_id_list:
-        repo_path = abspath(os.path.join(repo_root, rid))
+    repos = []
+    for repo_id in repo_id_list:
+        repo_path = abspath(os.path.join(repo_root, repo_id))
         if os.path.isdir(repo_path):
             repos.append(repo_path)
-        else:
-            logging.warning(f"Repo not found or not a directory: {repo_path}")
-
-    if not repos:
-        logging.error("No valid repositories found.")
-        return
-
-    pipeline = pipeline.lower().strip()
 
+    # Execute specified pipeline
     if pipeline == "build_image":
         build_image(repos, jobs)
     elif pipeline == "build_fuzzer":
@@ -512,16 +502,12 @@ def main(
     elif pipeline == "all":
         build_image(repos, jobs)
         build_fuzzer(repos, jobs)
-        transform_repos(repos, jobs)
+        transform_repos(repos, jobs)  # Generate test templates
         fuzz_repos(repos, jobs, timeout)
         testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh)
     else:
         logging.error(f"Unknown pipeline: {pipeline}")
 
-
 if __name__ == "__main__":
-    logging.basicConfig(
-        level=logging.INFO,
-        format="%(asctime)s %(levelname)s %(message)s",
-    )
-    fire.Fire(main)
+    logging.basicConfig(level=logging.INFO)
+    fire.Fire(main)
\ No newline at end of file

From 641998d6eb40ccfea35a694802028413ae25bb70 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Thu, 14 Aug 2025 22:39:22 +0000
Subject: [PATCH 110/134] =?UTF-8?q?template=E6=8F=92=E5=85=A5data=3Db""=20?=
 =?UTF-8?q?=E5=87=BD=E6=95=B0header=E6=94=B9=E4=B8=BAtest=5F()?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 fuzz/collect_fuzz_python.py | 87 ++++++++++++++++++++++---------------
 1 file changed, 52 insertions(+), 35 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index 2d950a0..9cf01cb 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -166,7 +166,7 @@ def fuzz_repos(repos: list[str], jobs: int, timeout: int):
 def generate_test_template(target_name: str, repo_path: str):
     """
     Generate Python test template for a single target by stripping license header,
-    main() block, and print(data) inside TestInput/TestOneInput.
+    main() block, and converting TestInput/TestOneInput to test_ with data=b"".
     """
     src_file = pjoin(repo_path, target_name + ".py")
     if not os.path.exists(src_file):
@@ -182,7 +182,6 @@ def generate_test_template(target_name: str, repo_path: str):
         shebang, original_code = original_code.split("\n", 1)
         shebang += "\n"
 
-    # 匹配从文件开头到包含 "limitations under the license" 那一行的注释块
     license_pattern = re.compile(
         r"^(?:\s*#.*\n)*\s*#.*limitations\s+under\s+the\s+license.*\n",
         re.IGNORECASE | re.MULTILINE
@@ -203,15 +202,49 @@ def generate_test_template(target_name: str, repo_path: str):
         flags=re.MULTILINE
     )
 
-    # --- 3. 删除 TestInput/TestOneInput 内的 print(data) ---
-    def remove_print_in_func(match):
-        func_body = match.group(0)
-        func_body = re.sub(r"^\s*print\(data\)\s*$", "", func_body, flags=re.MULTILINE)
-        return func_body
-
+    # --- 3. 转换测试函数 ---
+    def process_test_function(match):
+        # 提取完整的函数定义和函数体
+        func_str = match.group(0)
+        
+        # 1. 删除print(data)语句
+        func_str = re.sub(r'print\s*\(\s*data\s*\)\s*', '', func_str)
+        
+        # 2. 将TestInput/TestOneInput改为test_()
+        func_str = re.sub(r'def\s+(TestInput|TestOneInput)\s*\(data\)', 'def test_()', func_str)
+        
+        # 3. 在函数体第一行可执行代码前插入data = b""
+        # 查找第一个非空行（忽略空行和注释）
+        lines = func_str.splitlines()
+        if len(lines) < 2:
+            return func_str
+            
+        # 找到函数定义行后的第一个非空、非注释行
+        insert_idx = None
+        for i in range(1, len(lines)):
+            line = lines[i].strip()
+            if line and not line.startswith('#'):
+                insert_idx = i
+                break
+        
+        if insert_idx is None:
+            return func_str
+            
+        # 获取该行的缩进量
+        indent_match = re.match(r'^(\s*)', lines[insert_idx])
+        if not indent_match:
+            return func_str
+            
+        indent = indent_match.group(1)
+        
+        # 插入 data = b""
+        lines.insert(insert_idx, f"{indent}data = b\"\"")
+        
+        return "\n".join(lines)
+    
     cleaned_code = re.sub(
-        r"(def\s+Test(?:Input|OneInput)\s*\([\s\S]*?)(?=\n\w|\Z)",
-        lambda m: remove_print_in_func(m),
+        r"def\s+(TestInput|TestOneInput)\s*\(data\):[\s\S]*?(?=\n\w|\Z)",
+        process_test_function,
         code_no_main,
         flags=re.MULTILINE
     )
@@ -330,7 +363,6 @@ def substitute_one_repo(
         if not os.path.exists(source_file):
             logging.warning(f"Source file not found: {source_file}")
             continue
-
         
         input_path = pjoin(input_dir, f"{target_name}")
         if not os.path.exists(input_path):
@@ -345,21 +377,18 @@ def substitute_one_repo(
                     # 尝试解码行以检查内容
                     decoded = line.decode('utf-8', errors='replace')
                     
-                    # 只处理以 b' 或 b" 开头的行（这些是实际的测试输入）
+                    # 只处理以 b' 或 b" 开头的行
                     if decoded.startswith(("b'", 'b"')):
-                        # 提取字节数据部分
                         if decoded.startswith("b'") and decoded.endswith("'\n"):
-                            byte_data = line[2:-2]  # 移除 b' 和末尾的 '\n
+                            byte_data = line[2:-2]
                         elif decoded.startswith('b"') and decoded.endswith('"\n'):
-                            byte_data = line[2:-2]  # 移除 b" 和末尾的 "\n
+                            byte_data = line[2:-2]
                         else:
                             continue
                             
-                        # 只保留有效长度的输入
                         if 0 < len(byte_data) <= max_len:
                             valid_inputs.append(byte_data)
                 except UnicodeDecodeError:
-                    # 如果无法解码，可能是二进制数据，直接使用
                     if 0 < len(line) <= max_len:
                         valid_inputs.append(line)
 
@@ -369,7 +398,7 @@ def substitute_one_repo(
 
         logging.info(f"Loaded {len(valid_inputs)} inputs for {target_name}")
 
-        # 策略选择输入 - 最多选择 n_fuzz 个输入
+        # 策略选择输入
         if strategy == "shuffle":
             random.shuffle(valid_inputs)
             inputs = valid_inputs[:n_fuzz]
@@ -382,25 +411,13 @@ def substitute_one_repo(
         for idx, fuzz_input in enumerate(inputs, start=1):
             with open(source_file, "r") as f_src:
                 code = f_src.read()
-
-            
-            # 找到 TestInput / TestOneInput 并改成 test_{idx}
-            code = re.sub(r"\bTestOneInput\b", f"test_{idx}", code)
-            code = re.sub(r"\bTestInput\b", f"test_{idx}", code)
             
-            # 插入测试数据 - 确保使用二进制表示
-            def insert_fuzz_input(match):
-                indent = match.group(2)
-                # 使用 repr() 安全表示二进制数据
-                byte_repr = repr(fuzz_input)
-                return f"{match.group(1)}{indent}data = {byte_repr}\n{indent}"
+            # 1. 把函数名 test_ 改成 test_{idx}
+            code = re.sub(r'def\s+test_', f'def test_{idx}', code)
             
-            # 在测试函数中插入数据
-            code = re.sub(
-                rf"(def\s+test_{idx}\s*\([^)]*\)\s*:\s*\n)(\s*)",
-                insert_fuzz_input,
-                code,
-            )
+            # 2. 替换 data = b"" 为输入数据
+            input_repr = repr(fuzz_input)
+            code = code.replace('data = b""', f'data = {input_repr}')
             
             out_path = pjoin(template_dir, f"{target_name}.testgen_{idx}.py")
             with open(out_path, "w") as f_out:

From 73aac90fdffbbb1857bcf4a49fe2df2bb4e11ae6 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Thu, 14 Aug 2025 22:44:21 +0000
Subject: [PATCH 111/134] translation

---
 fuzz/collect_fuzz_python.py | 100 ++++++++----------------------------
 1 file changed, 21 insertions(+), 79 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index 9cf01cb..893e55e 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -176,7 +176,7 @@ def generate_test_template(target_name: str, repo_path: str):
     with open(src_file, "r", encoding="utf-8") as f:
         original_code = f.read()
 
-    # --- 1. 保留 shebang，但删除许可证注释 ---
+    # --- 1. Keep shebang but remove license comments ---
     shebang = ""
     if original_code.startswith("#!"):
         shebang, original_code = original_code.split("\n", 1)
@@ -188,7 +188,7 @@ def generate_test_template(target_name: str, repo_path: str):
     )
     code_no_license = re.sub(license_pattern, "", original_code, count=1)
 
-    # --- 2. 删除 main 函数和 if __name__ == '__main__' ---
+    # --- 2. Remove main function and if __name__ == '__main__' ---
     code_no_main = re.sub(
         r"\n?def\s+main\([\s\S]*?(?=^if\s+__name__\s*==\s*['\"]__main__['\"]:)",
         "",
@@ -202,24 +202,24 @@ def generate_test_template(target_name: str, repo_path: str):
         flags=re.MULTILINE
     )
 
-    # --- 3. 转换测试函数 ---
+    # --- 3. Convert test functions ---
     def process_test_function(match):
-        # 提取完整的函数定义和函数体
+        # Extract the complete function definition and body
         func_str = match.group(0)
         
-        # 1. 删除print(data)语句
+        # 1. Remove print(data) statements
         func_str = re.sub(r'print\s*\(\s*data\s*\)\s*', '', func_str)
         
-        # 2. 将TestInput/TestOneInput改为test_()
+        # 2. Change TestInput/TestOneInput to test_()
         func_str = re.sub(r'def\s+(TestInput|TestOneInput)\s*\(data\)', 'def test_()', func_str)
         
-        # 3. 在函数体第一行可执行代码前插入data = b""
-        # 查找第一个非空行（忽略空行和注释）
+        # 3. Insert data = b"" before the first executable line in the function body
+        # Find the first non-empty line (ignoring empty lines and comments)
         lines = func_str.splitlines()
         if len(lines) < 2:
             return func_str
             
-        # 找到函数定义行后的第一个非空、非注释行
+        # Find the first non-empty, non-comment line after the function definition
         insert_idx = None
         for i in range(1, len(lines)):
             line = lines[i].strip()
@@ -230,14 +230,14 @@ def process_test_function(match):
         if insert_idx is None:
             return func_str
             
-        # 获取该行的缩进量
+        # Get the indentation level of that line
         indent_match = re.match(r'^(\s*)', lines[insert_idx])
         if not indent_match:
             return func_str
             
         indent = indent_match.group(1)
         
-        # 插入 data = b""
+        # Insert data = b""
         lines.insert(insert_idx, f"{indent}data = b\"\"")
         
         return "\n".join(lines)
@@ -249,7 +249,7 @@ def process_test_function(match):
         flags=re.MULTILINE
     )
 
-    # --- 4. 输出到 tests-gen ---
+    # --- 4. Output to tests-gen directory ---
     template_dir = pjoin(repo_path, "tests-gen")
     os.makedirs(template_dir, exist_ok=True)
 
@@ -284,64 +284,6 @@ def _transform_repo(repo: str):
     with ProcessingPool(jobs) as p:
         return list(p.map(_transform_repo, repos))
 
-# def escape_special_chars(input_data: str) -> str:
-#     """
-#     Escape special characters in input data for Python byte strings
-    
-#     Args:
-#         input_data (str): Raw input data
-        
-#     Returns:
-#         str: Input data with escaped characters
-#     """
-#     # For Python, we can use repr() to safely represent byte strings
-#     # This will handle all special characters and non-ASCII bytes
-#     return repr(input_data.encode('latin-1', 'replace'))
-
-# def substitute_input(template: str, input_data: str, idx: int, target_name: str) -> str:
-#     """
-#     Replace fuzzing input into Python test template
-    
-#     Args:
-#         template (str): Template content
-#         input_data (str): Input data
-#         idx (int): Test index
-#         target_name (str): Target name
-        
-#     Returns:
-#         str: Test code after substitution
-#     """
-#     # Escape special characters for Python
-#     escaped_input = escape_special_chars(input_data)
-    
-#     # Replace input placeholder
-#     new_template = template.replace(
-#         'input_data = b""',
-#         f'input_data = {escaped_input}'
-#     )
-    
-#     # Replace test method name to avoid duplication
-#     return new_template.replace(
-#         f"def test_generated(self):",
-#         f"def test_{idx}(self):"
-#     )
-
-# def has_similar(selected: list[str], x: str, thresh: float = 0.8) -> bool:
-#     """
-#     Check if a string is sufficiently similar to any string in the selected list
-    
-#     Args:
-#         selected (list[str]): List of selected strings
-#         x (str): String to check
-#         thresh (float): Similarity threshold
-        
-#     Returns:
-#         bool: Whether they are similar
-#     """
-#     def similar(a, b):
-#         return SequenceMatcher(None, a, b).ratio()
-#     return any(similar(x, y) > thresh for y in selected)
-
 
 def substitute_one_repo(
     repo: str,
@@ -352,7 +294,7 @@ def substitute_one_repo(
     sim_thresh: float,
 ):
     """
-    从原 fuzz target 复制文件，按 fuzz input 生成多个 testgen 文件。
+    Copy files from fuzz target template and generate multiple testgen files based on fuzz inputs
     """
     input_dir = pjoin(repo, "fuzz_inputs")
     template_dir = pjoin(repo, "tests-gen")
@@ -369,15 +311,15 @@ def substitute_one_repo(
             logging.warning(f"Input file not found: {input_path}")
             continue
 
-        # 读取所有有效的输入数据
+        # Read all valid input data
         valid_inputs = []
         with open(input_path, "rb") as f_input:
             for line in f_input:
                 try:
-                    # 尝试解码行以检查内容
+                    # Attempt to decode the line to check content
                     decoded = line.decode('utf-8', errors='replace')
                     
-                    # 只处理以 b' 或 b" 开头的行
+                    # Only process lines starting with b' or b"
                     if decoded.startswith(("b'", 'b"')):
                         if decoded.startswith("b'") and decoded.endswith("'\n"):
                             byte_data = line[2:-2]
@@ -398,7 +340,7 @@ def substitute_one_repo(
 
         logging.info(f"Loaded {len(valid_inputs)} inputs for {target_name}")
 
-        # 策略选择输入
+        # Strategy for selecting inputs
         if strategy == "shuffle":
             random.shuffle(valid_inputs)
             inputs = valid_inputs[:n_fuzz]
@@ -407,15 +349,15 @@ def substitute_one_repo(
         else:
             inputs = valid_inputs[:n_fuzz]
 
-        # 每个 fuzz input 生成一个单独的文件
+        # Generate a separate file for each fuzz input
         for idx, fuzz_input in enumerate(inputs, start=1):
             with open(source_file, "r") as f_src:
                 code = f_src.read()
             
-            # 1. 把函数名 test_ 改成 test_{idx}
+            # 1. Change function name from test_ to test_{idx}
             code = re.sub(r'def\s+test_', f'def test_{idx}', code)
             
-            # 2. 替换 data = b"" 为输入数据
+            # 2. Replace data = b"" with input data
             input_repr = repr(fuzz_input)
             code = code.replace('data = b""', f'data = {input_repr}')
             
@@ -423,7 +365,7 @@ def substitute_one_repo(
             with open(out_path, "w") as f_out:
                 f_out.write(code)
 
-            # 格式化代码
+            # Format code
             try:
                 subprocess.run(["black", out_path], check=False)
             except FileNotFoundError:

From 4c94beb8656a8ee859eeba3bf056acac2b012cf3 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Thu, 14 Aug 2025 22:51:47 +0000
Subject: [PATCH 112/134] A complete script for building the processes of
 build_image, build_fuzzer, fuzz, transform, and testgen, suitable for Python
 projects.

---
 fuzz/clean_fuzz_dir.py      | 14 +++++++-------
 fuzz/collect_fuzz_python.py |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/fuzz/clean_fuzz_dir.py b/fuzz/clean_fuzz_dir.py
index b4e2b0b..b3bed0a 100644
--- a/fuzz/clean_fuzz_dir.py
+++ b/fuzz/clean_fuzz_dir.py
@@ -28,13 +28,13 @@ def clean_project_dirs(root_dir):
             print(f"🗑️ Removed dir: {tests_gen_path}")
             removed_dirs += 1
 
-        # 删除 .inputs.py 文件
-        for fname in os.listdir(project_path):
-            if fname.endswith(".inputs.py"):
-                file_path = os.path.join(project_path, fname)
-                os.remove(file_path)
-                print(f"🗑️ Removed file: {file_path}")
-                removed_files += 1
+        # # 删除 .inputs.py 文件
+        # for fname in os.listdir(project_path):
+        #     if fname.endswith(".inputs.py"):
+        #         file_path = os.path.join(project_path, fname)
+        #         os.remove(file_path)
+        #         print(f"🗑️ Removed file: {file_path}")
+        #         removed_files += 1
 
     print(f"\n✅ Done. Removed {removed_files} files and {removed_dirs} directories.")
 
diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index 893e55e..c8f1678 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -412,7 +412,7 @@ def main(
     repo_id: str = "data/valid_projects3.txt",
     repo_root: str = "fuzz/oss-fuzz/projects/",
     timeout: int = 30,
-    jobs: int = 2,
+    jobs: int = 8,
     pipeline: str = "all",
     n_fuzz: int = 100,
     strategy: str = "shuffle",

From 5a470885064bbf96dd966a0394df3e702b8a1346 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Thu, 14 Aug 2025 23:04:39 +0000
Subject: [PATCH 113/134] delete some imports

---
 fuzz/collect_fuzz_python.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index c8f1678..737c5de 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -158,10 +158,6 @@ def fuzz_repos(repos: list[str], jobs: int, timeout: int):
     
     logging.info(f"Starting parallel fuzzing with {jobs} jobs, timeout={timeout}s per target")
     parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None)
-import os
-import re
-import logging
-from os.path import join as pjoin
 
 def generate_test_template(target_name: str, repo_path: str):
     """

From a16d664b3981dd74825d18f589cdefe41489d72b Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Fri, 15 Aug 2025 23:46:08 +0000
Subject: [PATCH 114/134] use ASTfor transform and testgen

---
 fuzz/collect_fuzz_python.py | 310 ++++++++++++++++++++++++------------
 1 file changed, 209 insertions(+), 101 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index 737c5de..4d9913f 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -3,8 +3,10 @@
 usage: PYTHONPATH=. python3 fuzz/collect_fuzz_python.py --pipeline all
 """
 from pathlib import Path
+import ast
+import astunparse 
 import logging
-from typing import Optional, List, Tuple
+from typing import Optional
 import fire
 import os
 from UniTSyn.frontend.util import wrap_repo, parallel_subprocess
@@ -159,10 +161,28 @@ def fuzz_repos(repos: list[str], jobs: int, timeout: int):
     logging.info(f"Starting parallel fuzzing with {jobs} jobs, timeout={timeout}s per target")
     parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None)
 
+def transform_repos(repos: list[str], jobs: int):
+    """
+    Generate test templates for all targets
+    
+    Args:
+        repos (list[str]): List of repository paths
+        jobs (int): Number of parallel tasks
+    """
+    logging.info("Generating test templates")
+    
+    def _transform_repo(repo: str):
+        project_name = os.path.basename(repo)
+        oss_fuzz_dir = Path(repo).parent.parent
+        targets = discover_targets(project_name, oss_fuzz_dir)
+        return [generate_test_template(t, repo) for t in targets]
+    
+    with ProcessingPool(jobs) as p:
+        return list(p.map(_transform_repo, repos))
+
 def generate_test_template(target_name: str, repo_path: str):
     """
-    Generate Python test template for a single target by stripping license header,
-    main() block, and converting TestInput/TestOneInput to test_ with data=b"".
+    Generate Python test template using AST for more precise code transformations
     """
     src_file = pjoin(repo_path, target_name + ".py")
     if not os.path.exists(src_file):
@@ -184,68 +204,22 @@ def generate_test_template(target_name: str, repo_path: str):
     )
     code_no_license = re.sub(license_pattern, "", original_code, count=1)
 
-    # --- 2. Remove main function and if __name__ == '__main__' ---
-    code_no_main = re.sub(
-        r"\n?def\s+main\([\s\S]*?(?=^if\s+__name__\s*==\s*['\"]__main__['\"]:)",
-        "",
-        code_no_license,
-        flags=re.MULTILINE
-    )
-    code_no_main = re.sub(
-        r"\n?if\s+__name__\s*==\s*['\"]__main__['\"]:\s*main\(\s*\)\s*",
-        "",
-        code_no_main,
-        flags=re.MULTILINE
-    )
+    # --- 2. Parse code to AST ---
+    try:
+        tree = ast.parse(code_no_license)
+    except SyntaxError as e:
+        logging.error(f"Syntax error in {src_file}: {e}")
+        return None
 
-    # --- 3. Convert test functions ---
-    def process_test_function(match):
-        # Extract the complete function definition and body
-        func_str = match.group(0)
-        
-        # 1. Remove print(data) statements
-        func_str = re.sub(r'print\s*\(\s*data\s*\)\s*', '', func_str)
-        
-        # 2. Change TestInput/TestOneInput to test_()
-        func_str = re.sub(r'def\s+(TestInput|TestOneInput)\s*\(data\)', 'def test_()', func_str)
-        
-        # 3. Insert data = b"" before the first executable line in the function body
-        # Find the first non-empty line (ignoring empty lines and comments)
-        lines = func_str.splitlines()
-        if len(lines) < 2:
-            return func_str
-            
-        # Find the first non-empty, non-comment line after the function definition
-        insert_idx = None
-        for i in range(1, len(lines)):
-            line = lines[i].strip()
-            if line and not line.startswith('#'):
-                insert_idx = i
-                break
-        
-        if insert_idx is None:
-            return func_str
-            
-        # Get the indentation level of that line
-        indent_match = re.match(r'^(\s*)', lines[insert_idx])
-        if not indent_match:
-            return func_str
-            
-        indent = indent_match.group(1)
-        
-        # Insert data = b""
-        lines.insert(insert_idx, f"{indent}data = b\"\"")
-        
-        return "\n".join(lines)
-    
-    cleaned_code = re.sub(
-        r"def\s+(TestInput|TestOneInput)\s*\(data\):[\s\S]*?(?=\n\w|\Z)",
-        process_test_function,
-        code_no_main,
-        flags=re.MULTILINE
-    )
+    # --- 3. AST transformation ---
+    transformer = TestFunctionTransformer()
+    new_tree = transformer.visit(tree)
+    ast.fix_missing_locations(new_tree)
+
+    # --- 4. Generate cleaned code ---
+    cleaned_code = astunparse.unparse(new_tree)
 
-    # --- 4. Output to tests-gen directory ---
+    # --- 5. Output to tests-gen directory ---
     template_dir = pjoin(repo_path, "tests-gen")
     os.makedirs(template_dir, exist_ok=True)
 
@@ -260,26 +234,144 @@ def process_test_function(match):
 
     logging.info(f"Generated cleaned template: {template_path}")
     return template_path
-
-def transform_repos(repos: list[str], jobs: int):
-    """
-    Generate test templates for all targets
     
-    Args:
-        repos (list[str]): List of repository paths
-        jobs (int): Number of parallel tasks
-    """
-    logging.info("Generating test templates")
+class TestFunctionTransformer(ast.NodeTransformer):
+    """AST transformer for test function conversion"""
     
-    def _transform_repo(repo: str):
-        project_name = os.path.basename(repo)
-        oss_fuzz_dir = Path(repo).parent.parent
-        targets = discover_targets(project_name, oss_fuzz_dir)
-        return [generate_test_template(t, repo) for t in targets]
+    def visit_FunctionDef(self, node):
+        # 首先处理 main 函数（移除）
+        if node.name == "main":
+            return None
+        
+        # 处理 TestInput/TestOneInput 函数
+        if node.name in ["TestInput", "TestOneInput"]:
+            # a. 记录参数名称（假设只有一个参数）
+            param_name = None
+            if node.args.args:
+                param_name = node.args.args[0].arg
+            
+            # b. 将函数名改为 test_
+            node.name = "test_"
+            
+            # c. 移除参数（将参数列表设为空）
+            node.args = ast.arguments(
+                posonlyargs=[],
+                args=[],
+                vararg=None,
+                kwonlyargs=[],
+                kw_defaults=[],
+                kwarg=None,
+                defaults=[]
+            )
+            
+            # d. 在函数体开头插入 原参数名 = b""
+            if param_name:
+                self.add_param_assignment(node, param_name)
+            
+            # f. 删除所有 print(原参数名) 的语句
+            if param_name:
+                self.remove_print_param(node, param_name)
+        
+        # 确保继续遍历子节点
+        self.generic_visit(node)
+        return node
     
-    with ProcessingPool(jobs) as p:
-        return list(p.map(_transform_repo, repos))
-
+    def add_param_assignment(self, node, param_name):
+        """Add param_name = b"" at the beginning of the function body"""
+        # 创建赋值节点
+        assign_node = ast.Assign(
+            targets=[ast.Name(id=param_name, ctx=ast.Store())],
+            value=ast.Constant(value=b"")
+        )
+        
+        # 如果有文档字符串，插入在文档字符串之后
+        if node.body and isinstance(node.body[0], ast.Expr) and isinstance(node.body[0].value, ast.Str):
+            node.body.insert(1, assign_node)
+        else:
+            node.body.insert(0, assign_node)
+    
+    def remove_print_param(self, node, param_name):
+        """Remove print statements for the specific parameter"""
+        new_body = []
+        for stmt in node.body:
+            # 跳过 print(param_name) 调用
+            if (isinstance(stmt, ast.Expr) and 
+                isinstance(stmt.value, ast.Call) and
+                isinstance(stmt.value.func, ast.Name) and
+                stmt.value.func.id == "print" and
+                any(isinstance(arg, ast.Name) and arg.id == param_name 
+                    for arg in stmt.value.args)):
+                continue
+            new_body.append(stmt)
+        node.body = new_body
+    
+    def visit_If(self, node):
+        """Remove if __name__ == '__main__' blocks"""
+        # 检查是否是主函数保护
+        if (isinstance(node.test, ast.Compare) and
+            isinstance(node.test.left, ast.Name) and
+            node.test.left.id == "__name__" and
+            isinstance(node.test.ops[0], ast.Eq) and
+            isinstance(node.test.comparators[0], ast.Constant) and
+            node.test.comparators[0].value == "__main__"):
+            
+            # 移除整个 if 块
+            return None
+        
+        # 确保继续遍历子节点
+        self.generic_visit(node)
+        return node
+class TestGenTransformer(ast.NodeTransformer):
+    """AST transformer for generating test cases from fuzzing inputs"""
+    
+    def __init__(self, idx: int, fuzz_input: bytes):
+        self.idx = idx
+        self.fuzz_input = fuzz_input
+        self.found_test_function = False
+    
+    def visit_FunctionDef(self, node):
+        # 只处理名为 test_ 的函数
+        if node.name == "test_":
+            self.found_test_function = True
+            
+            # 1. 将函数名改为 test_{idx}
+            node.name = f"test_{self.idx}"
+            
+            # 2. 找到并替换 data = b"" 赋值语句
+            self.replace_data_assignment(node)
+        
+        return node
+    
+    def replace_data_assignment(self, node):
+        """Replace data assignment with fuzz input"""
+        for i, stmt in enumerate(node.body):
+            # 查找赋值语句
+            if isinstance(stmt, ast.Assign):
+                # 检查是否是 data = b"" 格式的赋值
+                if (len(stmt.targets) == 1 and 
+                    isinstance(stmt.targets[0], ast.Name) and 
+                    isinstance(stmt.value, ast.Constant) and 
+                    stmt.value.value == b""):
+                    
+                    # 替换为新的输入数据
+                    node.body[i] = ast.Assign(
+                        targets=[stmt.targets[0]],
+                        value=ast.Constant(value=self.fuzz_input)
+                    )
+                    return
+                
+                # 检查是否是 data = b'' 格式的赋值
+                if (len(stmt.targets) == 1 and 
+                    isinstance(stmt.targets[0], ast.Name) and 
+                    isinstance(stmt.value, ast.Constant) and 
+                    stmt.value.value == b''):
+                    
+                    # 替换为新的输入数据
+                    node.body[i] = ast.Assign(
+                        targets=[stmt.targets[0]],
+                        value=ast.Constant(value=self.fuzz_input)
+                    )
+                    return
 
 def substitute_one_repo(
     repo: str,
@@ -291,6 +383,7 @@ def substitute_one_repo(
 ):
     """
     Copy files from fuzz target template and generate multiple testgen files based on fuzz inputs
+    using AST transformations
     """
     input_dir = pjoin(repo, "fuzz_inputs")
     template_dir = pjoin(repo, "tests-gen")
@@ -307,15 +400,15 @@ def substitute_one_repo(
             logging.warning(f"Input file not found: {input_path}")
             continue
 
-        # Read all valid input data
+        # 读取所有有效的输入数据
         valid_inputs = []
         with open(input_path, "rb") as f_input:
             for line in f_input:
                 try:
-                    # Attempt to decode the line to check content
+                    # 尝试解码行以检查内容
                     decoded = line.decode('utf-8', errors='replace')
                     
-                    # Only process lines starting with b' or b"
+                    # 只处理以 b' 或 b" 开头的行
                     if decoded.startswith(("b'", 'b"')):
                         if decoded.startswith("b'") and decoded.endswith("'\n"):
                             byte_data = line[2:-2]
@@ -336,7 +429,7 @@ def substitute_one_repo(
 
         logging.info(f"Loaded {len(valid_inputs)} inputs for {target_name}")
 
-        # Strategy for selecting inputs
+        # 策略选择输入
         if strategy == "shuffle":
             random.shuffle(valid_inputs)
             inputs = valid_inputs[:n_fuzz]
@@ -345,27 +438,42 @@ def substitute_one_repo(
         else:
             inputs = valid_inputs[:n_fuzz]
 
-        # Generate a separate file for each fuzz input
+        # 每个 fuzz input 生成一个单独的文件（使用 AST）
         for idx, fuzz_input in enumerate(inputs, start=1):
             with open(source_file, "r") as f_src:
                 code = f_src.read()
             
-            # 1. Change function name from test_ to test_{idx}
-            code = re.sub(r'def\s+test_', f'def test_{idx}', code)
-            
-            # 2. Replace data = b"" with input data
-            input_repr = repr(fuzz_input)
-            code = code.replace('data = b""', f'data = {input_repr}')
-            
-            out_path = pjoin(template_dir, f"{target_name}.testgen_{idx}.py")
-            with open(out_path, "w") as f_out:
-                f_out.write(code)
-
-            # Format code
             try:
-                subprocess.run(["black", out_path], check=False)
-            except FileNotFoundError:
-                logging.warning("Black formatter not found, skipping formatting")
+                # 解析为 AST
+                tree = ast.parse(code)
+                
+                # 应用转换器
+                transformer = TestGenTransformer(idx, fuzz_input)
+                new_tree = transformer.visit(tree)
+                ast.fix_missing_locations(new_tree)
+                
+                # 确保找到并处理了测试函数
+                if not transformer.found_test_function:
+                    logging.warning(f"No test_ function found in {source_file}")
+                    continue
+                
+                # 生成新代码
+                new_code = astunparse.unparse(new_tree)
+                
+                out_path = pjoin(template_dir, f"{target_name}.testgen_{idx}.py")
+                with open(out_path, "w") as f_out:
+                    f_out.write(new_code)
+                
+                # 格式化代码
+                try:
+                    subprocess.run(["black", out_path], check=False)
+                except FileNotFoundError:
+                    logging.warning("Black formatter not found, skipping formatting")
+            
+            except SyntaxError as e:
+                logging.error(f"Syntax error when processing {source_file}: {e}")
+            except Exception as e:
+                logging.error(f"Error generating test case for {target_name}: {e}")
 def testgen_repos(
     repos: list[str],
     jobs: int,
@@ -405,7 +513,7 @@ def testgen_repos(
         ))
 
 def main(
-    repo_id: str = "data/valid_projects3.txt",
+    repo_id: str = "data/valid_projects.txt",
     repo_root: str = "fuzz/oss-fuzz/projects/",
     timeout: int = 30,
     jobs: int = 8,

From 730e45889425a45efc35f5b1785515bfb0c463f2 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Fri, 15 Aug 2025 23:46:19 +0000
Subject: [PATCH 115/134] use AST

---
 fuzz/modify_fuzz_files.py | 67 +++++++++++++++++++++------------------
 1 file changed, 37 insertions(+), 30 deletions(-)

diff --git a/fuzz/modify_fuzz_files.py b/fuzz/modify_fuzz_files.py
index de8333f..8e4eca4 100644
--- a/fuzz/modify_fuzz_files.py
+++ b/fuzz/modify_fuzz_files.py
@@ -1,34 +1,41 @@
 import os
-import re
+import ast
 
 def add_print_to_testoneinput(file_path):
     with open(file_path, 'r') as f:
         content = f.read()
 
-    # 正则表达式匹配TestOneInput或TestInput函数定义及其函数体
-    pattern = r'(\bdef\s+(TestOneInput|TestInput)\(data\):\s*\n)((?:[ \t]+.*\n|\s*\n)*)'
-    matches = re.finditer(pattern, content, re.MULTILINE)
+    # 解析 AST
+    tree = ast.parse(content)
 
-    new_content = content
-    for match in reversed(list(matches)):
-        function_def = match.group(1)
-        function_body = match.group(3)
-        
-        # 在函数体开头添加print(data)语句
-        new_function_body = re.sub(
-            r'^([ \t]*)(.*\n)', 
-            r'\g<1>\2\g<1>print(data)\n', 
-            function_body, 
-            count=1
-        )
-        
-        # 只有在函数体非空且未添加过print时才替换
-        if new_function_body != function_body:
-            new_content = (
-                new_content[:match.start(3)] + 
-                new_function_body + 
-                new_content[match.end(3):]
-            )
+    class InsertPrintTransformer(ast.NodeTransformer):
+        def visit_FunctionDef(self, node):
+            if node.name in ("TestOneInput", "TestInput") and node.args.args:
+                first_arg_name = node.args.args[0].arg
+                # 创建 print(参数名) 语句
+                print_stmt = ast.Expr(
+                    value=ast.Call(
+                        func=ast.Name(id='print', ctx=ast.Load()),
+                        args=[ast.Name(id=first_arg_name, ctx=ast.Load())],
+                        keywords=[]
+                    )
+                )
+                # 确保没有重复插入
+                if not (
+                    isinstance(node.body[0], ast.Expr)
+                    and isinstance(node.body[0].value, ast.Call)
+                    and getattr(node.body[0].value.func, "id", None) == "print"
+                ):
+                    node.body.insert(0, print_stmt)
+            return node
+
+    transformer = InsertPrintTransformer()
+    new_tree = transformer.visit(tree)
+    ast.fix_missing_locations(new_tree)
+
+    # 转回代码
+    import astor
+    new_content = astor.to_source(new_tree)
 
     return new_content
 
@@ -41,7 +48,7 @@ def main():
 
     for project in projects:
         project_dir = os.path.join(projects_path, project)
-        
+
         if not os.path.isdir(project_dir):
             continue
 
@@ -49,18 +56,18 @@ def main():
             for file in files:
                 if file.startswith('fuzz_') and file.endswith('.py'):
                     file_path = os.path.join(root, file)
-                    
+
                     try:
                         new_content = add_print_to_testoneinput(file_path)
-                        
-                        # 保存修改后的文件（添加_print后缀）
+
+                        # 保存修改后的文件
                         new_file_path = file_path.rsplit('.', 1)[0] + '_print1.py'
                         with open(new_file_path, 'w') as f:
                             f.write(new_content)
                         print(f"Processed: {file_path} -> {new_file_path}")
-                            
+
                     except Exception as e:
                         print(f"Error processing {file_path}: {str(e)}")
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()

From 40e380718ac2378f186b1010ab2cd8926255b24a Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Sat, 16 Aug 2025 00:07:48 +0000
Subject: [PATCH 116/134] Set up command line arguments

---
 fuzz/clean_fuzz_dir.py | 37 +++++++++++++++++++++++--------------
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/fuzz/clean_fuzz_dir.py b/fuzz/clean_fuzz_dir.py
index b3bed0a..f8d69fe 100644
--- a/fuzz/clean_fuzz_dir.py
+++ b/fuzz/clean_fuzz_dir.py
@@ -1,42 +1,51 @@
 #!/usr/bin/env python3
 import os
 import shutil
+import argparse
 
-ROOT_DIR = "/home/jiayiguo/FuzzAug/fuzz/oss-fuzz/projects"
+# Default root directory
+DEFAULT_ROOT_DIR = "/home/jiayiguo/FuzzAug/fuzz/oss-fuzz/projects"
 
 def clean_project_dirs(root_dir):
     removed_files = 0
     removed_dirs = 0
 
-    # 遍历一级项目目录
+    # Walk through the root directory
     for project in os.listdir(root_dir):
         project_path = os.path.join(root_dir, project)
         if not os.path.isdir(project_path):
             continue
 
-        # 删除 fuzz_inputs 文件夹
+        # Delete fuzz_inputs directories
         fuzz_inputs_path = os.path.join(project_path, "fuzz_inputs")
         if os.path.isdir(fuzz_inputs_path):
             shutil.rmtree(fuzz_inputs_path)
             print(f"🗑️ Removed dir: {fuzz_inputs_path}")
             removed_dirs += 1
 
-        # 删除 tests-gen 文件夹
+        # Delete tests-gen directories
         tests_gen_path = os.path.join(project_path, "tests-gen")
         if os.path.isdir(tests_gen_path):
             shutil.rmtree(tests_gen_path)
             print(f"🗑️ Removed dir: {tests_gen_path}")
             removed_dirs += 1
 
-        # # 删除 .inputs.py 文件
-        # for fname in os.listdir(project_path):
-        #     if fname.endswith(".inputs.py"):
-        #         file_path = os.path.join(project_path, fname)
-        #         os.remove(file_path)
-        #         print(f"🗑️ Removed file: {file_path}")
-        #         removed_files += 1
-
-    print(f"\n✅ Done. Removed {removed_files} files and {removed_dirs} directories.")
+    print(f"\n✅ Done. Removed {removed_dirs} directories in total.")
 
 if __name__ == "__main__":
-    clean_project_dirs(ROOT_DIR)
+    # Set up command line arguments
+    parser = argparse.ArgumentParser(
+        description='Clean project directories by removing fuzz_inputs and tests-gen folders',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument('--root_dir', default=DEFAULT_ROOT_DIR,
+                        help='Root directory containing project folders')
+    args = parser.parse_args()
+
+    # Validate the root directory exists
+    if not os.path.isdir(args.root_dir):
+        print(f"❌ Error: Specified root directory does not exist: {args.root_dir}")
+        exit(1)
+        
+    print(f"Cleaning projects in: {args.root_dir}")
+    clean_project_dirs(args.root_dir)
\ No newline at end of file

From e3e5546582ee064c947eaba50f0455fbf8481c21 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Sat, 16 Aug 2025 00:29:13 +0000
Subject: [PATCH 117/134] use fire

---
 fuzz/clean_fuzz_dir.py | 43 +++++++++++++++++++-----------------------
 1 file changed, 19 insertions(+), 24 deletions(-)

diff --git a/fuzz/clean_fuzz_dir.py b/fuzz/clean_fuzz_dir.py
index f8d69fe..9a9bdf8 100644
--- a/fuzz/clean_fuzz_dir.py
+++ b/fuzz/clean_fuzz_dir.py
@@ -1,51 +1,46 @@
 #!/usr/bin/env python3
 import os
 import shutil
-import argparse
+import fire
 
-# Default root directory
-DEFAULT_ROOT_DIR = "/home/jiayiguo/FuzzAug/fuzz/oss-fuzz/projects"
+def clean_project_dirs(root_dir="/home/jiayiguo/FuzzAug/fuzz/oss-fuzz/projects"):
+    """
+    清理 oss-fuzz 项目目录下的 fuzz_inputs 和 tests-gen 文件夹
 
-def clean_project_dirs(root_dir):
+    Args:
+        root_dir (str): 项目的根目录路径
+    """
     removed_files = 0
     removed_dirs = 0
 
-    # Walk through the root directory
     for project in os.listdir(root_dir):
         project_path = os.path.join(root_dir, project)
         if not os.path.isdir(project_path):
             continue
 
-        # Delete fuzz_inputs directories
+        # 删除 fuzz_inputs 文件夹
         fuzz_inputs_path = os.path.join(project_path, "fuzz_inputs")
         if os.path.isdir(fuzz_inputs_path):
             shutil.rmtree(fuzz_inputs_path)
             print(f"🗑️ Removed dir: {fuzz_inputs_path}")
             removed_dirs += 1
 
-        # Delete tests-gen directories
+        # 删除 tests-gen 文件夹
         tests_gen_path = os.path.join(project_path, "tests-gen")
         if os.path.isdir(tests_gen_path):
             shutil.rmtree(tests_gen_path)
             print(f"🗑️ Removed dir: {tests_gen_path}")
             removed_dirs += 1
 
-    print(f"\n✅ Done. Removed {removed_dirs} directories in total.")
+        # 如果需要删除 .inputs.py 文件，取消注释以下代码
+        # for fname in os.listdir(project_path):
+        #     if fname.endswith(".inputs.py"):
+        #         file_path = os.path.join(project_path, fname)
+        #         os.remove(file_path)
+        #         print(f"🗑️ Removed file: {file_path}")
+        #         removed_files += 1
+
+    print(f"\n✅ Done. Removed {removed_files} files and {removed_dirs} directories.")
 
 if __name__ == "__main__":
-    # Set up command line arguments
-    parser = argparse.ArgumentParser(
-        description='Clean project directories by removing fuzz_inputs and tests-gen folders',
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
-    )
-    parser.add_argument('--root_dir', default=DEFAULT_ROOT_DIR,
-                        help='Root directory containing project folders')
-    args = parser.parse_args()
-
-    # Validate the root directory exists
-    if not os.path.isdir(args.root_dir):
-        print(f"❌ Error: Specified root directory does not exist: {args.root_dir}")
-        exit(1)
-        
-    print(f"Cleaning projects in: {args.root_dir}")
-    clean_project_dirs(args.root_dir)
\ No newline at end of file
+    fire.Fire(clean_project_dirs)

From ab6813d080cda75e7bccbdb7df5b2cc4bd829d43 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Sat, 16 Aug 2025 00:30:36 +0000
Subject: [PATCH 118/134] use FIre

---
 fuzz/modify_fuzz_files.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/fuzz/modify_fuzz_files.py b/fuzz/modify_fuzz_files.py
index 8e4eca4..1ed8b48 100644
--- a/fuzz/modify_fuzz_files.py
+++ b/fuzz/modify_fuzz_files.py
@@ -1,5 +1,7 @@
+#!/usr/bin/env python3
 import os
 import ast
+import fire
 
 def add_print_to_testoneinput(file_path):
     with open(file_path, 'r') as f:
@@ -36,13 +38,19 @@ def visit_FunctionDef(self, node):
     # 转回代码
     import astor
     new_content = astor.to_source(new_tree)
-
     return new_content
 
-def main():
-    projects_path = "/home/jiayiguo/FuzzAug/fuzz/oss-fuzz/projects"
-    valid_projects_file = "data/valid_projects.txt"
+def main(
+    projects_path="/home/jiayiguo/FuzzAug/fuzz/oss-fuzz/projects",
+    valid_projects_file="data/valid_projects.txt"
+):
+    """
+    给 fuzz target 的 TestOneInput / TestInput 函数开头插入 print(参数名)
 
+    Args:
+        projects_path (str): OSS-Fuzz 项目的根目录
+        valid_projects_file (str): 包含有效项目名的文件路径
+    """
     with open(valid_projects_file, 'r') as f:
         projects = [line.strip() for line in f if line.strip()]
 
@@ -70,4 +78,4 @@ def main():
                         print(f"Error processing {file_path}: {str(e)}")
 
 if __name__ == "__main__":
-    main()
+    fire.Fire(main)

From f2c7485d241d81e8e8c2b669ec6abe93ded5a3e0 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Sun, 17 Aug 2025 02:16:05 +0000
Subject: [PATCH 119/134] black formatter

---
 fuzz/collect_fuzz_python.py | 240 +++++++++++++++++++++---------------
 1 file changed, 144 insertions(+), 96 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index 4d9913f..bb958c9 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -2,9 +2,10 @@
 Script for Python project fuzzing and test template conversion
 usage: PYTHONPATH=. python3 fuzz/collect_fuzz_python.py --pipeline all
 """
+
 from pathlib import Path
 import ast
-import astunparse 
+import astunparse
 import logging
 from typing import Optional
 import fire
@@ -20,10 +21,11 @@
 from datetime import datetime
 import re
 
+
 def build_image(repos: list[str], jobs: int):
     """
     Build Docker images for OSS-Fuzz projects corresponding to each repository
-    
+
     Args:
         repos (list[str]): List of repository paths
         jobs (int): Number of parallel tasks
@@ -48,10 +50,11 @@ def _build_cmd(path: str):
 
     _ = parallel_subprocess(repos, jobs, _build_cmd, on_exit=None)
 
+
 def build_fuzzer(repos: list[str], jobs: int):
     """
     Build fuzzers in parallel for successfully built projects
-    
+
     Args:
         repos (list[str]): List of repository paths
         jobs (int): Number of parallel tasks
@@ -65,7 +68,9 @@ def _build_cmd(path: str):
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         log_file = os.path.join(log_dir, f"{project_name}_fuzzer_{timestamp}.log")
 
-        logging.info(f"Start building fuzzers for {project_name}, logging to {log_file}")
+        logging.info(
+            f"Start building fuzzers for {project_name}, logging to {log_file}"
+        )
         return subprocess.Popen(
             f"python3 infra/helper.py build_fuzzers --sanitizer address {project_name}",
             cwd=os.path.abspath(os.path.join(path, "../../")),
@@ -76,29 +81,38 @@ def _build_cmd(path: str):
 
     _ = parallel_subprocess(repos, jobs, _build_cmd, on_exit=None)
 
+
 def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]:
     """
     Discover fuzzing targets
     """
     out_dir = oss_fuzz_dir / "build" / "out" / project_name
     targets: list[str] = []
-    
+
     logging.debug(f"Searching fuzz targets in: {out_dir}")
-    
+
     if not out_dir.is_dir():
         logging.warning(f"Build output directory for {project_name} does not exist")
         return targets
 
     try:
         for f in out_dir.iterdir():
-            if (f.is_file() and f.name.startswith("fuzz_") and 
-                '.' not in f.name and f.name.endswith("print1") and 
-                os.access(f, os.X_OK)):
+            if (
+                f.is_file()
+                and f.name.startswith("fuzz_")
+                and "." not in f.name
+                and f.name.endswith("print1")
+                and os.access(f, os.X_OK)
+            ):
                 targets.append(f.name)
-        logging.info(f"Discovered {len(targets)} fuzz targets in {project_name}: {targets}")
+        logging.info(
+            f"Discovered {len(targets)} fuzz targets in {project_name}: {targets}"
+        )
     except Exception as e:
-        logging.error(f"Error discovering targets for {project_name}: {e}", exc_info=True)
-    
+        logging.error(
+            f"Error discovering targets for {project_name}: {e}", exc_info=True
+        )
+
     return targets
 
 
@@ -109,11 +123,13 @@ def fuzz_one_target(target: tuple[str, str], timeout: int):
     repo_path, target_name = target
     project_name = os.path.basename(repo_path)
     oss_fuzz_root = os.path.dirname(os.path.dirname(repo_path))
-    
+
     input_file_path = pjoin(repo_path, "fuzz_inputs", target_name)
     os.makedirs(os.path.dirname(input_file_path), exist_ok=True)
 
-    logging.info(f"Starting fuzzing: project={project_name}, target={target_name}, timeout={timeout}s")
+    logging.info(
+        f"Starting fuzzing: project={project_name}, target={target_name}, timeout={timeout}s"
+    )
     logging.debug(f"Fuzz output will be saved to: {input_file_path}")
 
     try:
@@ -122,14 +138,17 @@ def fuzz_one_target(target: tuple[str, str], timeout: int):
                 [
                     "bash",
                     "-c",
-                    f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}"
+                    f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}",
                 ],
                 cwd=oss_fuzz_root,
                 stdout=input_file,
                 stderr=subprocess.DEVNULL,
             )
     except Exception as e:
-        logging.error(f"Error starting fuzzer for {project_name}/{target_name}: {e}", exc_info=True)
+        logging.error(
+            f"Error starting fuzzer for {project_name}/{target_name}: {e}",
+            exc_info=True,
+        )
         return None
 
 
@@ -145,41 +164,47 @@ def fuzz_repos(repos: list[str], jobs: int, timeout: int):
         oss_fuzz_dir = Path(repo).parent.parent
         targets = discover_targets(project_name, oss_fuzz_dir)
         targets_list.append(targets)
-    
+
     target_map = {repo: targets for repo, targets in zip(repos, targets_list)}
     all_targets: list[tuple[str, str]] = [
         (k, v) for k, vs in target_map.items() for v in vs
     ]
-    
+
     logging.info(f"Total fuzz targets discovered: {len(all_targets)}")
     for repo, targets in target_map.items():
         logging.info(f"{os.path.basename(repo)}: {len(targets)} targets")
 
     for repo in repos:
         os.makedirs(pjoin(repo, "fuzz_inputs"), exist_ok=True)
-    
-    logging.info(f"Starting parallel fuzzing with {jobs} jobs, timeout={timeout}s per target")
-    parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None)
+
+    logging.info(
+        f"Starting parallel fuzzing with {jobs} jobs, timeout={timeout}s per target"
+    )
+    parallel_subprocess(
+        all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None
+    )
+
 
 def transform_repos(repos: list[str], jobs: int):
     """
     Generate test templates for all targets
-    
+
     Args:
         repos (list[str]): List of repository paths
         jobs (int): Number of parallel tasks
     """
     logging.info("Generating test templates")
-    
+
     def _transform_repo(repo: str):
         project_name = os.path.basename(repo)
         oss_fuzz_dir = Path(repo).parent.parent
         targets = discover_targets(project_name, oss_fuzz_dir)
         return [generate_test_template(t, repo) for t in targets]
-    
+
     with ProcessingPool(jobs) as p:
         return list(p.map(_transform_repo, repos))
 
+
 def generate_test_template(target_name: str, repo_path: str):
     """
     Generate Python test template using AST for more precise code transformations
@@ -200,7 +225,7 @@ def generate_test_template(target_name: str, repo_path: str):
 
     license_pattern = re.compile(
         r"^(?:\s*#.*\n)*\s*#.*limitations\s+under\s+the\s+license.*\n",
-        re.IGNORECASE | re.MULTILINE
+        re.IGNORECASE | re.MULTILINE,
     )
     code_no_license = re.sub(license_pattern, "", original_code, count=1)
 
@@ -234,25 +259,26 @@ def generate_test_template(target_name: str, repo_path: str):
 
     logging.info(f"Generated cleaned template: {template_path}")
     return template_path
-    
+
+
 class TestFunctionTransformer(ast.NodeTransformer):
     """AST transformer for test function conversion"""
-    
+
     def visit_FunctionDef(self, node):
         # 首先处理 main 函数（移除）
         if node.name == "main":
             return None
-        
+
         # 处理 TestInput/TestOneInput 函数
         if node.name in ["TestInput", "TestOneInput"]:
             # a. 记录参数名称（假设只有一个参数）
             param_name = None
             if node.args.args:
                 param_name = node.args.args[0].arg
-            
+
             # b. 将函数名改为 test_
             node.name = "test_"
-            
+
             # c. 移除参数（将参数列表设为空）
             node.args = ast.arguments(
                 posonlyargs=[],
@@ -261,118 +287,135 @@ def visit_FunctionDef(self, node):
                 kwonlyargs=[],
                 kw_defaults=[],
                 kwarg=None,
-                defaults=[]
+                defaults=[],
             )
-            
+
             # d. 在函数体开头插入 原参数名 = b""
             if param_name:
                 self.add_param_assignment(node, param_name)
-            
+
             # f. 删除所有 print(原参数名) 的语句
             if param_name:
                 self.remove_print_param(node, param_name)
-        
+
         # 确保继续遍历子节点
         self.generic_visit(node)
         return node
-    
+
     def add_param_assignment(self, node, param_name):
         """Add param_name = b"" at the beginning of the function body"""
         # 创建赋值节点
         assign_node = ast.Assign(
             targets=[ast.Name(id=param_name, ctx=ast.Store())],
-            value=ast.Constant(value=b"")
+            value=ast.Constant(value=b""),
         )
-        
+
         # 如果有文档字符串，插入在文档字符串之后
-        if node.body and isinstance(node.body[0], ast.Expr) and isinstance(node.body[0].value, ast.Str):
+        if (
+            node.body
+            and isinstance(node.body[0], ast.Expr)
+            and isinstance(node.body[0].value, ast.Str)
+        ):
             node.body.insert(1, assign_node)
         else:
             node.body.insert(0, assign_node)
-    
+
     def remove_print_param(self, node, param_name):
         """Remove print statements for the specific parameter"""
         new_body = []
         for stmt in node.body:
             # 跳过 print(param_name) 调用
-            if (isinstance(stmt, ast.Expr) and 
-                isinstance(stmt.value, ast.Call) and
-                isinstance(stmt.value.func, ast.Name) and
-                stmt.value.func.id == "print" and
-                any(isinstance(arg, ast.Name) and arg.id == param_name 
-                    for arg in stmt.value.args)):
+            if (
+                isinstance(stmt, ast.Expr)
+                and isinstance(stmt.value, ast.Call)
+                and isinstance(stmt.value.func, ast.Name)
+                and stmt.value.func.id == "print"
+                and any(
+                    isinstance(arg, ast.Name) and arg.id == param_name
+                    for arg in stmt.value.args
+                )
+            ):
                 continue
             new_body.append(stmt)
         node.body = new_body
-    
+
     def visit_If(self, node):
         """Remove if __name__ == '__main__' blocks"""
         # 检查是否是主函数保护
-        if (isinstance(node.test, ast.Compare) and
-            isinstance(node.test.left, ast.Name) and
-            node.test.left.id == "__name__" and
-            isinstance(node.test.ops[0], ast.Eq) and
-            isinstance(node.test.comparators[0], ast.Constant) and
-            node.test.comparators[0].value == "__main__"):
-            
+        if (
+            isinstance(node.test, ast.Compare)
+            and isinstance(node.test.left, ast.Name)
+            and node.test.left.id == "__name__"
+            and isinstance(node.test.ops[0], ast.Eq)
+            and isinstance(node.test.comparators[0], ast.Constant)
+            and node.test.comparators[0].value == "__main__"
+        ):
+
             # 移除整个 if 块
             return None
-        
+
         # 确保继续遍历子节点
         self.generic_visit(node)
         return node
+
+
 class TestGenTransformer(ast.NodeTransformer):
     """AST transformer for generating test cases from fuzzing inputs"""
-    
+
     def __init__(self, idx: int, fuzz_input: bytes):
         self.idx = idx
         self.fuzz_input = fuzz_input
         self.found_test_function = False
-    
+
     def visit_FunctionDef(self, node):
         # 只处理名为 test_ 的函数
         if node.name == "test_":
             self.found_test_function = True
-            
+
             # 1. 将函数名改为 test_{idx}
             node.name = f"test_{self.idx}"
-            
+
             # 2. 找到并替换 data = b"" 赋值语句
             self.replace_data_assignment(node)
-        
+
         return node
-    
+
     def replace_data_assignment(self, node):
         """Replace data assignment with fuzz input"""
         for i, stmt in enumerate(node.body):
             # 查找赋值语句
             if isinstance(stmt, ast.Assign):
                 # 检查是否是 data = b"" 格式的赋值
-                if (len(stmt.targets) == 1 and 
-                    isinstance(stmt.targets[0], ast.Name) and 
-                    isinstance(stmt.value, ast.Constant) and 
-                    stmt.value.value == b""):
-                    
+                if (
+                    len(stmt.targets) == 1
+                    and isinstance(stmt.targets[0], ast.Name)
+                    and isinstance(stmt.value, ast.Constant)
+                    and stmt.value.value == b""
+                ):
+
                     # 替换为新的输入数据
                     node.body[i] = ast.Assign(
                         targets=[stmt.targets[0]],
-                        value=ast.Constant(value=self.fuzz_input)
+                        value=ast.Constant(value=self.fuzz_input),
                     )
                     return
-                
+
                 # 检查是否是 data = b'' 格式的赋值
-                if (len(stmt.targets) == 1 and 
-                    isinstance(stmt.targets[0], ast.Name) and 
-                    isinstance(stmt.value, ast.Constant) and 
-                    stmt.value.value == b''):
-                    
+                if (
+                    len(stmt.targets) == 1
+                    and isinstance(stmt.targets[0], ast.Name)
+                    and isinstance(stmt.value, ast.Constant)
+                    and stmt.value.value == b""
+                ):
+
                     # 替换为新的输入数据
                     node.body[i] = ast.Assign(
                         targets=[stmt.targets[0]],
-                        value=ast.Constant(value=self.fuzz_input)
+                        value=ast.Constant(value=self.fuzz_input),
                     )
                     return
 
+
 def substitute_one_repo(
     repo: str,
     targets: list[str],
@@ -394,7 +437,7 @@ def substitute_one_repo(
         if not os.path.exists(source_file):
             logging.warning(f"Source file not found: {source_file}")
             continue
-        
+
         input_path = pjoin(input_dir, f"{target_name}")
         if not os.path.exists(input_path):
             logging.warning(f"Input file not found: {input_path}")
@@ -406,8 +449,8 @@ def substitute_one_repo(
             for line in f_input:
                 try:
                     # 尝试解码行以检查内容
-                    decoded = line.decode('utf-8', errors='replace')
-                    
+                    decoded = line.decode("utf-8", errors="replace")
+
                     # 只处理以 b' 或 b" 开头的行
                     if decoded.startswith(("b'", 'b"')):
                         if decoded.startswith("b'") and decoded.endswith("'\n"):
@@ -416,7 +459,7 @@ def substitute_one_repo(
                             byte_data = line[2:-2]
                         else:
                             continue
-                            
+
                         if 0 < len(byte_data) <= max_len:
                             valid_inputs.append(byte_data)
                 except UnicodeDecodeError:
@@ -442,38 +485,40 @@ def substitute_one_repo(
         for idx, fuzz_input in enumerate(inputs, start=1):
             with open(source_file, "r") as f_src:
                 code = f_src.read()
-            
+
             try:
                 # 解析为 AST
                 tree = ast.parse(code)
-                
+
                 # 应用转换器
                 transformer = TestGenTransformer(idx, fuzz_input)
                 new_tree = transformer.visit(tree)
                 ast.fix_missing_locations(new_tree)
-                
+
                 # 确保找到并处理了测试函数
                 if not transformer.found_test_function:
                     logging.warning(f"No test_ function found in {source_file}")
                     continue
-                
+
                 # 生成新代码
                 new_code = astunparse.unparse(new_tree)
-                
+
                 out_path = pjoin(template_dir, f"{target_name}.testgen_{idx}.py")
                 with open(out_path, "w") as f_out:
                     f_out.write(new_code)
-                
+
                 # 格式化代码
                 try:
                     subprocess.run(["black", out_path], check=False)
                 except FileNotFoundError:
                     logging.warning("Black formatter not found, skipping formatting")
-            
+
             except SyntaxError as e:
                 logging.error(f"Syntax error when processing {source_file}: {e}")
             except Exception as e:
                 logging.error(f"Error generating test case for {target_name}: {e}")
+
+
 def testgen_repos(
     repos: list[str],
     jobs: int,
@@ -484,7 +529,7 @@ def testgen_repos(
 ):
     """
     Generate test cases from fuzzing inputs
-    
+
     Args:
         repos (list[str]): List of repository paths
         jobs (int): Number of parallel tasks
@@ -500,17 +545,20 @@ def testgen_repos(
         oss_fuzz_dir = Path(repo).parent.parent
         targets = discover_targets(project_name, oss_fuzz_dir)
         targets_list.append(targets)
-    
+
     target_map = {repo: targets for repo, targets in zip(repos, targets_list)}
-    
+
     # Process each repository in parallel
     with ProcessingPool(jobs) as p:
-        list(p.map(
-            lambda item: substitute_one_repo(
-                item[0], item[1], n_fuzz, strategy, max_len, sim_thresh
-            ),
-            target_map.items()
-        ))
+        list(
+            p.map(
+                lambda item: substitute_one_repo(
+                    item[0], item[1], n_fuzz, strategy, max_len, sim_thresh
+                ),
+                target_map.items(),
+            )
+        )
+
 
 def main(
     repo_id: str = "data/valid_projects.txt",
@@ -522,11 +570,10 @@ def main(
     strategy: str = "shuffle",
     max_len: int = 100,
     sim_thresh: float = 0.8,
-   
 ):
     """
     Main function, controlling the entire fuzzing process
-    
+
     Args:
         repo_id (str): Project ID file path
         repo_root (str): Project root directory
@@ -571,6 +618,7 @@ def main(
     else:
         logging.error(f"Unknown pipeline: {pipeline}")
 
+
 if __name__ == "__main__":
     logging.basicConfig(level=logging.INFO)
-    fire.Fire(main)
\ No newline at end of file
+    fire.Fire(main)

From 2ebcee3a6b90dd42ce3b8d4afe77089e656a6fc9 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Sun, 17 Aug 2025 20:09:19 +0000
Subject: [PATCH 120/134] deal the data after closing the file

---
 fuzz/collect_fuzz_python.py | 43 ++++++++++++++++++++-----------------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index bb958c9..e1c4dd4 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -445,33 +445,36 @@ def substitute_one_repo(
 
         # 读取所有有效的输入数据
         valid_inputs = []
+        # 首先读取文件内容，然后关闭文件
         with open(input_path, "rb") as f_input:
-            for line in f_input:
-                try:
-                    # 尝试解码行以检查内容
-                    decoded = line.decode("utf-8", errors="replace")
-
-                    # 只处理以 b' 或 b" 开头的行
-                    if decoded.startswith(("b'", 'b"')):
-                        if decoded.startswith("b'") and decoded.endswith("'\n"):
-                            byte_data = line[2:-2]
-                        elif decoded.startswith('b"') and decoded.endswith('"\n'):
-                            byte_data = line[2:-2]
-                        else:
-                            continue
-
-                        if 0 < len(byte_data) <= max_len:
-                            valid_inputs.append(byte_data)
-                except UnicodeDecodeError:
-                    if 0 < len(line) <= max_len:
-                        valid_inputs.append(line)
+            lines = f_input.readlines()
+
+        # 文件已关闭，现在处理数据
+        for line in lines:
+            try:
+                # 尝试解码行以检查内容
+                decoded = line.decode("utf-8", errors="replace")
+
+                # 只处理以 b' 或 b" 开头的行
+                if decoded.startswith(("b'", 'b"')):
+                    if decoded.startswith("b'") and decoded.endswith("'\n"):
+                        byte_data = line[2:-2]
+                    elif decoded.startswith('b"') and decoded.endswith('"\n'):
+                        byte_data = line[2:-2]
+                    else:
+                        continue
+
+                    if 0 < len(byte_data) <= max_len:
+                        valid_inputs.append(byte_data)
+            except UnicodeDecodeError:
+                if 0 < len(line) <= max_len:
+                    valid_inputs.append(line)
 
         if not valid_inputs:
             logging.warning(f"No valid inputs found for {target_name}")
             continue
 
         logging.info(f"Loaded {len(valid_inputs)} inputs for {target_name}")
-
         # 策略选择输入
         if strategy == "shuffle":
             random.shuffle(valid_inputs)

From 7aa3f037b90ec533e6386e2530dc348fdbc31f2f Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Sun, 17 Aug 2025 20:40:30 +0000
Subject: [PATCH 121/134] when doing line-matching, check for # This is a test
 template in the line

---
 fuzz/collect_fuzz_python.py | 82 ++++++++++++++-----------------------
 1 file changed, 30 insertions(+), 52 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index e1c4dd4..5b4d089 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -303,23 +303,33 @@ def visit_FunctionDef(self, node):
         return node
 
     def add_param_assignment(self, node, param_name):
-        """Add param_name = b"" at the beginning of the function body"""
+        """Add param_name = b"..." at the beginning of the function body with an inline comment"""
+        # 创建包含赋值和注释的复合值
+        value_with_comment = ast.JoinedStr(
+            values=[
+                ast.FormattedValue(value=ast.Constant(value=b""), conversion=-1),
+                ast.Constant(value="  # This is a test template")
+            ]
+        )
+        
         # 创建赋值节点
         assign_node = ast.Assign(
             targets=[ast.Name(id=param_name, ctx=ast.Store())],
-            value=ast.Constant(value=b""),
+            value=value_with_comment
         )
-
+        
         # 如果有文档字符串，插入在文档字符串之后
         if (
             node.body
             and isinstance(node.body[0], ast.Expr)
-            and isinstance(node.body[0].value, ast.Str)
+            and isinstance(node.body[0].value, ast.Constant)
+            and isinstance(node.body[0].value.value, str)
         ):
+            # 插入在文档字符串后面
             node.body.insert(1, assign_node)
         else:
+            # 插入在函数开头
             node.body.insert(0, assign_node)
-
     def remove_print_param(self, node, param_name):
         """Remove print statements for the specific parameter"""
         new_body = []
@@ -360,62 +370,30 @@ def visit_If(self, node):
 
 
 class TestGenTransformer(ast.NodeTransformer):
-    """AST transformer for generating test cases from fuzzing inputs"""
-
-    def __init__(self, idx: int, fuzz_input: bytes):
+    def __init__(self, idx, fuzz_input):
         self.idx = idx
         self.fuzz_input = fuzz_input
         self.found_test_function = False
 
     def visit_FunctionDef(self, node):
-        # 只处理名为 test_ 的函数
         if node.name == "test_":
             self.found_test_function = True
-
-            # 1. 将函数名改为 test_{idx}
-            node.name = f"test_{self.idx}"
-
-            # 2. 找到并替换 data = b"" 赋值语句
-            self.replace_data_assignment(node)
-
+            # 遍历函数体，寻找包含注释的赋值语句
+            for i, stmt in enumerate(node.body):
+                # 检查是否是赋值语句
+                if isinstance(stmt, ast.Assign):
+                    # 检查赋值语句的值是否是带有注释的复合值
+                    if (
+                        isinstance(stmt.value, ast.JoinedStr)
+                        and len(stmt.value.values) >= 2
+                        and isinstance(stmt.value.values[1], ast.Constant)
+                        and stmt.value.values[1].value == "  # This is a test template"
+                    ):
+                        # 替换为新的输入值
+                        stmt.value = ast.Constant(value=self.fuzz_input)
+                        break
         return node
 
-    def replace_data_assignment(self, node):
-        """Replace data assignment with fuzz input"""
-        for i, stmt in enumerate(node.body):
-            # 查找赋值语句
-            if isinstance(stmt, ast.Assign):
-                # 检查是否是 data = b"" 格式的赋值
-                if (
-                    len(stmt.targets) == 1
-                    and isinstance(stmt.targets[0], ast.Name)
-                    and isinstance(stmt.value, ast.Constant)
-                    and stmt.value.value == b""
-                ):
-
-                    # 替换为新的输入数据
-                    node.body[i] = ast.Assign(
-                        targets=[stmt.targets[0]],
-                        value=ast.Constant(value=self.fuzz_input),
-                    )
-                    return
-
-                # 检查是否是 data = b'' 格式的赋值
-                if (
-                    len(stmt.targets) == 1
-                    and isinstance(stmt.targets[0], ast.Name)
-                    and isinstance(stmt.value, ast.Constant)
-                    and stmt.value.value == b""
-                ):
-
-                    # 替换为新的输入数据
-                    node.body[i] = ast.Assign(
-                        targets=[stmt.targets[0]],
-                        value=ast.Constant(value=self.fuzz_input),
-                    )
-                    return
-
-
 def substitute_one_repo(
     repo: str,
     targets: list[str],

From 11ca42ff4774baea495d506d65b895f29fd3fcae Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Sun, 17 Aug 2025 20:59:04 +0000
Subject: [PATCH 122/134] when doing line-matching, check for # This is a test
 template in the line

---
 fuzz/collect_fuzz_python.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index 5b4d089..4f0147f 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -378,7 +378,11 @@ def __init__(self, idx, fuzz_input):
     def visit_FunctionDef(self, node):
         if node.name == "test_":
             self.found_test_function = True
-            # 遍历函数体，寻找包含注释的赋值语句
+            
+            # 1. 修改函数名
+            node.name = f"test_{self.idx}"
+            
+            # 2. 查找并替换包含特定注释的赋值语句
             for i, stmt in enumerate(node.body):
                 # 检查是否是赋值语句
                 if isinstance(stmt, ast.Assign):

From a0bbe5606679c86e8d94831cdb8c7e26f0e55d7e Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Mon, 18 Aug 2025 00:21:19 +0000
Subject: [PATCH 123/134] delete UnicodeDecodeError

---
 fuzz/collect_fuzz_python.py | 34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index 4f0147f..42228e5 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -431,12 +431,11 @@ def substitute_one_repo(
         with open(input_path, "rb") as f_input:
             lines = f_input.readlines()
 
-        # 文件已关闭，现在处理数据
-        for line in lines:
-            try:
-                # 尝试解码行以检查内容
+            # 文件已关闭，现在处理数据
+            for line in lines:
+                # 使用 errors='replace' 确保解码不会失败
                 decoded = line.decode("utf-8", errors="replace")
-
+                
                 # 只处理以 b' 或 b" 开头的行
                 if decoded.startswith(("b'", 'b"')):
                     if decoded.startswith("b'") and decoded.endswith("'\n"):
@@ -445,11 +444,11 @@ def substitute_one_repo(
                         byte_data = line[2:-2]
                     else:
                         continue
-
+                    
                     if 0 < len(byte_data) <= max_len:
                         valid_inputs.append(byte_data)
-            except UnicodeDecodeError:
-                if 0 < len(line) <= max_len:
+                # 对于其他行，如果长度在范围内且不是以 b' 或 b" 开头，也考虑加入
+                elif 0 < len(line) <= max_len:
                     valid_inputs.append(line)
 
         if not valid_inputs:
@@ -492,12 +491,19 @@ def substitute_one_repo(
                 with open(out_path, "w") as f_out:
                     f_out.write(new_code)
 
-                # 格式化代码
-                try:
-                    subprocess.run(["black", out_path], check=False)
-                except FileNotFoundError:
-                    logging.warning("Black formatter not found, skipping formatting")
-
+            # 格式化代码
+            formatter_installed = True
+            try:
+                subprocess.run(["black", out_path], 
+                            check=False, 
+                            stdout=subprocess.DEVNULL,  # 隐藏输出
+                            stderr=subprocess.DEVNULL)  # 隐藏错误
+            except FileNotFoundError:
+                if formatter_installed:  # 避免多次记录
+                    logging.warning("Black code formatter not found. For better formatting, install with:")
+                    logging.warning("pip install black")
+                    formatter_installed = False
+                
             except SyntaxError as e:
                 logging.error(f"Syntax error when processing {source_file}: {e}")
             except Exception as e:

From 0067af3e7bdf376a27cc34f047389b84fd5d1ff0 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Mon, 18 Aug 2025 02:13:04 +0000
Subject: [PATCH 124/134] apply  transformations on the original unmodified
 fuzz targets.

---
 fuzz/collect_fuzz_python.py | 90 +++++++++++++++++++++----------------
 1 file changed, 51 insertions(+), 39 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index 42228e5..4ca7f3b 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -198,7 +198,15 @@ def transform_repos(repos: list[str], jobs: int):
     def _transform_repo(repo: str):
         project_name = os.path.basename(repo)
         oss_fuzz_dir = Path(repo).parent.parent
-        targets = discover_targets(project_name, oss_fuzz_dir)
+        raw_targets = discover_targets(project_name, oss_fuzz_dir)
+        
+        # 只需移除目标名称中的 "_print1"，不要添加任何新后缀
+        transformed_targets = [t.replace("_print1", "") for t in raw_targets]
+        
+        # 去重
+        targets = list(set(transformed_targets))
+        
+        # 传递给 generate_test_template 的是简单目标名称
         return [generate_test_template(t, repo) for t in targets]
 
     with ProcessingPool(jobs) as p:
@@ -209,7 +217,10 @@ def generate_test_template(target_name: str, repo_path: str):
     """
     Generate Python test template using AST for more precise code transformations
     """
-    src_file = pjoin(repo_path, target_name + ".py")
+    src_file = pjoin(repo_path, target_name)
+    logging.info(f"Generating test template for {src_file}")    
+    if not src_file.endswith(".py"):
+        src_file += ".py"    
     if not os.path.exists(src_file):
         logging.error(f"Source target file not found: {src_file}")
         return None
@@ -253,7 +264,7 @@ def generate_test_template(target_name: str, repo_path: str):
         with open(init_path, "w", encoding="utf-8") as f:
             f.write("")
 
-    template_path = pjoin(template_dir, f"{target_name}.py")
+    template_path = pjoin(template_dir, f"{os.path.splitext(target_name)[0]}.py")
     with open(template_path, "w", encoding="utf-8") as f:
         f.write(shebang + cleaned_code.strip() + "\n")
 
@@ -294,9 +305,6 @@ def visit_FunctionDef(self, node):
             if param_name:
                 self.add_param_assignment(node, param_name)
 
-            # f. 删除所有 print(原参数名) 的语句
-            if param_name:
-                self.remove_print_param(node, param_name)
 
         # 确保继续遍历子节点
         self.generic_visit(node)
@@ -400,7 +408,7 @@ def visit_FunctionDef(self, node):
 
 def substitute_one_repo(
     repo: str,
-    targets: list[str],
+    targets: list[tuple],  # 每个元素是 (transformed_target, raw_target)
     n_fuzz: int,
     strategy: str,
     max_len: int,
@@ -414,23 +422,25 @@ def substitute_one_repo(
     template_dir = pjoin(repo, "tests-gen")
     os.makedirs(template_dir, exist_ok=True)
 
-    for target_name in targets:
-        source_file = pjoin(template_dir, f"{target_name}.py")
+    for transformed_target, raw_target in targets:
+        # 使用转换后的目标名称构建模板文件路径
+        source_file = pjoin(template_dir, transformed_target + ".py")
+        
+        # 使用原始目标名称构建输入文件路径
+        input_path = pjoin(input_dir, raw_target)
+        
+        # 确保源文件存在
         if not os.path.exists(source_file):
             logging.warning(f"Source file not found: {source_file}")
             continue
-
-        input_path = pjoin(input_dir, f"{target_name}")
         if not os.path.exists(input_path):
             logging.warning(f"Input file not found: {input_path}")
             continue
-
+        
         # 读取所有有效的输入数据
         valid_inputs = []
-        # 首先读取文件内容，然后关闭文件
         with open(input_path, "rb") as f_input:
             lines = f_input.readlines()
-
             # 文件已关闭，现在处理数据
             for line in lines:
                 # 使用 errors='replace' 确保解码不会失败
@@ -452,10 +462,12 @@ def substitute_one_repo(
                     valid_inputs.append(line)
 
         if not valid_inputs:
-            logging.warning(f"No valid inputs found for {target_name}")
+            # 使用 transformed_target 而不是 target_name
+            logging.warning(f"No valid inputs found for {transformed_target}")
             continue
 
-        logging.info(f"Loaded {len(valid_inputs)} inputs for {target_name}")
+        # 使用 transformed_target 而不是 target_name
+        logging.info(f"Loaded {len(valid_inputs)} inputs for {transformed_target}")
         # 策略选择输入
         if strategy == "shuffle":
             random.shuffle(valid_inputs)
@@ -487,28 +499,22 @@ def substitute_one_repo(
                 # 生成新代码
                 new_code = astunparse.unparse(new_tree)
 
-                out_path = pjoin(template_dir, f"{target_name}.testgen_{idx}.py")
+                # 使用 transformed_target 而不是 target_name
+                out_path = pjoin(template_dir, f"{transformed_target}.testgen_{idx}.py")
                 with open(out_path, "w") as f_out:
                     f_out.write(new_code)
 
-            # 格式化代码
-            formatter_installed = True
-            try:
-                subprocess.run(["black", out_path], 
-                            check=False, 
-                            stdout=subprocess.DEVNULL,  # 隐藏输出
-                            stderr=subprocess.DEVNULL)  # 隐藏错误
-            except FileNotFoundError:
-                if formatter_installed:  # 避免多次记录
-                    logging.warning("Black code formatter not found. For better formatting, install with:")
-                    logging.warning("pip install black")
-                    formatter_installed = False
+                # 格式化代码
+                try:
+                    subprocess.run(["black", out_path], check=False)
+                except FileNotFoundError:
+                    logging.warning("Black formatter not found, skipping formatting")
                 
             except SyntaxError as e:
                 logging.error(f"Syntax error when processing {source_file}: {e}")
             except Exception as e:
-                logging.error(f"Error generating test case for {target_name}: {e}")
-
+                # 使用 transformed_target 而不是 target_name
+                logging.error(f"Error generating test case for {transformed_target}: {e}")
 
 def testgen_repos(
     repos: list[str],
@@ -529,28 +535,34 @@ def testgen_repos(
         max_len (int): Maximum length
         sim_thresh (float): Similarity threshold
     """
-    # First get all targets
-    targets_list = []
+    # First get all targets and apply transformation
+    target_map = {}
     for repo in repos:
         project_name = os.path.basename(repo)
         oss_fuzz_dir = Path(repo).parent.parent
-        targets = discover_targets(project_name, oss_fuzz_dir)
-        targets_list.append(targets)
-
-    target_map = {repo: targets for repo, targets in zip(repos, targets_list)}
+        raw_targets = discover_targets(project_name, oss_fuzz_dir)
+        
+        # 保存原始目标名称和转换后的目标名称
+        transformed_targets = [t.replace("_print1", "") for t in raw_targets]
+        targets = list(zip(transformed_targets, raw_targets))  # (转换后, 原始)
+        target_map[repo] = targets
 
     # Process each repository in parallel
     with ProcessingPool(jobs) as p:
         list(
             p.map(
                 lambda item: substitute_one_repo(
-                    item[0], item[1], n_fuzz, strategy, max_len, sim_thresh
+                    item[0],         # repo path
+                    item[1],         # list of (transformed, raw) targets
+                    n_fuzz, 
+                    strategy, 
+                    max_len, 
+                    sim_thresh
                 ),
                 target_map.items(),
             )
         )
 
-
 def main(
     repo_id: str = "data/valid_projects.txt",
     repo_root: str = "fuzz/oss-fuzz/projects/",

From f456adde40a5de28fc047110d0c9d58f7c51c53a Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Mon, 18 Aug 2025 02:18:31 +0000
Subject: [PATCH 125/134]  put all AST related class/module/function in another
 file and import from there.

---
 fuzz/ast_utils.py           | 201 ++++++++++++++++++++++++++++++++++++
 fuzz/collect_fuzz_python.py | 201 +-----------------------------------
 2 files changed, 204 insertions(+), 198 deletions(-)
 create mode 100644 fuzz/ast_utils.py

diff --git a/fuzz/ast_utils.py b/fuzz/ast_utils.py
new file mode 100644
index 0000000..60bfce3
--- /dev/null
+++ b/fuzz/ast_utils.py
@@ -0,0 +1,201 @@
+# ast_utils.py
+import ast
+import astunparse
+import logging
+import os
+import re
+
+class TestFunctionTransformer(ast.NodeTransformer):
+    """AST transformer for test function conversion"""
+
+    def visit_FunctionDef(self, node):
+        # 首先处理 main 函数（移除）
+        if node.name == "main":
+            return None
+
+        # 处理 TestInput/TestOneInput 函数
+        if node.name in ["TestInput", "TestOneInput"]:
+            # a. 记录参数名称（假设只有一个参数）
+            param_name = None
+            if node.args.args:
+                param_name = node.args.args[0].arg
+
+            # b. 将函数名改为 test_
+            node.name = "test_"
+
+            # c. 移除参数（将参数列表设为空）
+            node.args = ast.arguments(
+                posonlyargs=[],
+                args=[],
+                vararg=None,
+                kwonlyargs=[],
+                kw_defaults=[],
+                kwarg=None,
+                defaults=[],
+            )
+
+            # d. 在函数体开头插入 原参数名 = b""
+            if param_name:
+                self.add_param_assignment(node, param_name)
+
+        # 确保继续遍历子节点
+        self.generic_visit(node)
+        return node
+
+    def add_param_assignment(self, node, param_name):
+        """Add param_name = b"..." at the beginning of the function body with an inline comment"""
+        # 创建包含赋值和注释的复合值
+        value_with_comment = ast.JoinedStr(
+            values=[
+                ast.FormattedValue(value=ast.Constant(value=b""), conversion=-1),
+                ast.Constant(value="  # This is a test template")
+            ]
+        )
+        
+        # 创建赋值节点
+        assign_node = ast.Assign(
+            targets=[ast.Name(id=param_name, ctx=ast.Store())],
+            value=value_with_comment
+        )
+        
+        # 如果有文档字符串，插入在文档字符串之后
+        if (
+            node.body
+            and isinstance(node.body[0], ast.Expr)
+            and isinstance(node.body[0].value, ast.Constant)
+            and isinstance(node.body[0].value.value, str)
+        ):
+            # 插入在文档字符串后面
+            node.body.insert(1, assign_node)
+        else:
+            # 插入在函数开头
+            node.body.insert(0, assign_node)
+
+    def remove_print_param(self, node, param_name):
+        """Remove print statements for the specific parameter"""
+        new_body = []
+        for stmt in node.body:
+            # 跳过 print(param_name) 调用
+            if (
+                isinstance(stmt, ast.Expr)
+                and isinstance(stmt.value, ast.Call)
+                and isinstance(stmt.value.func, ast.Name)
+                and stmt.value.func.id == "print"
+                and any(
+                    isinstance(arg, ast.Name) and arg.id == param_name
+                    for arg in stmt.value.args
+                )
+            ):
+                continue
+            new_body.append(stmt)
+        node.body = new_body
+
+    def visit_If(self, node):
+        """Remove if __name__ == '__main__' blocks"""
+        # 检查是否是主函数保护
+        if (
+            isinstance(node.test, ast.Compare)
+            and isinstance(node.test.left, ast.Name)
+            and node.test.left.id == "__name__"
+            and isinstance(node.test.ops[0], ast.Eq)
+            and isinstance(node.test.comparators[0], ast.Constant)
+            and node.test.comparators[0].value == "__main__"
+        ):
+
+            # 移除整个 if 块
+            return None
+
+        # 确保继续遍历子节点
+        self.generic_visit(node)
+        return node
+
+
+class TestGenTransformer(ast.NodeTransformer):
+    def __init__(self, idx, fuzz_input):
+        self.idx = idx
+        self.fuzz_input = fuzz_input
+        self.found_test_function = False
+
+    def visit_FunctionDef(self, node):
+        if node.name == "test_":
+            self.found_test_function = True
+            
+            # 1. 修改函数名
+            node.name = f"test_{self.idx}"
+            
+            # 2. 查找并替换包含特定注释的赋值语句
+            for i, stmt in enumerate(node.body):
+                # 检查是否是赋值语句
+                if isinstance(stmt, ast.Assign):
+                    # 检查赋值语句的值是否是带有注释的复合值
+                    if (
+                        isinstance(stmt.value, ast.JoinedStr)
+                        and len(stmt.value.values) >= 2
+                        and isinstance(stmt.value.values[1], ast.Constant)
+                        and stmt.value.values[1].value == "  # This is a test template"
+                    ):
+                        # 替换为新的输入值
+                        stmt.value = ast.Constant(value=self.fuzz_input)
+                        break
+        return node
+
+
+def generate_test_template(target_name: str, repo_path: str):
+    """
+    Generate Python test template using AST for more precise code transformations
+    """
+    src_file = os.path.join(repo_path, target_name)
+    logging.info(f"Generating test template for {src_file}")    
+    if not src_file.endswith(".py"):
+        src_file += ".py"    
+    if not os.path.exists(src_file):
+        logging.error(f"Source target file not found: {src_file}")
+        return None
+
+    with open(src_file, "r", encoding="utf-8") as f:
+        original_code = f.read()
+
+    # --- 1. Keep shebang but remove license comments ---
+    shebang = ""
+    if original_code.startswith("#!"):
+        shebang, original_code = original_code.split("\n", 1)
+        shebang += "\n"
+
+    license_pattern = re.compile(
+        r"^(?:\s*#.*\n)*\s*#.*limitations\s+under\s+the\s+license.*\n",
+        re.IGNORECASE | re.MULTILINE,
+    )
+    code_no_license = re.sub(license_pattern, "", original_code, count=1)
+
+    # --- 2. Parse code to AST ---
+    try:
+        tree = ast.parse(code_no_license)
+    except SyntaxError as e:
+        logging.error(f"Syntax error in {src_file}: {e}")
+        return None
+
+    # --- 3. AST transformation ---
+    transformer = TestFunctionTransformer()
+    new_tree = transformer.visit(tree)
+    ast.fix_missing_locations(new_tree)
+
+    # --- 4. Generate cleaned code ---
+    cleaned_code = astunparse.unparse(new_tree)
+
+    # --- 5. Output to tests-gen directory ---
+    template_dir = os.path.join(repo_path, "tests-gen")
+    os.makedirs(template_dir, exist_ok=True)
+
+    init_path = os.path.join(template_dir, "__init__.py")
+    if not os.path.exists(init_path):
+        with open(init_path, "w", encoding="utf-8") as f:
+            f.write("")
+
+    # 使用目标名称的基础部分（移除扩展名）作为输出文件名
+    base_target_name = os.path.splitext(target_name)[0]
+    template_path = os.path.join(template_dir, f"{base_target_name}.py")
+    with open(template_path, "w", encoding="utf-8") as f:
+        f.write(shebang + cleaned_code.strip() + "\n")
+
+    logging.info(f"Generated cleaned template: {template_path}")
+    return template_path
\ No newline at end of file
diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index 4ca7f3b..f752512 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -4,8 +4,6 @@
 """
 
 from pathlib import Path
-import ast
-import astunparse
 import logging
 from typing import Optional
 import fire
@@ -19,8 +17,9 @@
 from difflib import SequenceMatcher
 from itertools import islice
 from datetime import datetime
-import re
 
+# 导入AST相关的功能
+from ast_utils import TestFunctionTransformer, TestGenTransformer, generate_test_template
 
 def build_image(repos: list[str], jobs: int):
     """
@@ -212,200 +211,6 @@ def _transform_repo(repo: str):
     with ProcessingPool(jobs) as p:
         return list(p.map(_transform_repo, repos))
 
-
-def generate_test_template(target_name: str, repo_path: str):
-    """
-    Generate Python test template using AST for more precise code transformations
-    """
-    src_file = pjoin(repo_path, target_name)
-    logging.info(f"Generating test template for {src_file}")    
-    if not src_file.endswith(".py"):
-        src_file += ".py"    
-    if not os.path.exists(src_file):
-        logging.error(f"Source target file not found: {src_file}")
-        return None
-
-    with open(src_file, "r", encoding="utf-8") as f:
-        original_code = f.read()
-
-    # --- 1. Keep shebang but remove license comments ---
-    shebang = ""
-    if original_code.startswith("#!"):
-        shebang, original_code = original_code.split("\n", 1)
-        shebang += "\n"
-
-    license_pattern = re.compile(
-        r"^(?:\s*#.*\n)*\s*#.*limitations\s+under\s+the\s+license.*\n",
-        re.IGNORECASE | re.MULTILINE,
-    )
-    code_no_license = re.sub(license_pattern, "", original_code, count=1)
-
-    # --- 2. Parse code to AST ---
-    try:
-        tree = ast.parse(code_no_license)
-    except SyntaxError as e:
-        logging.error(f"Syntax error in {src_file}: {e}")
-        return None
-
-    # --- 3. AST transformation ---
-    transformer = TestFunctionTransformer()
-    new_tree = transformer.visit(tree)
-    ast.fix_missing_locations(new_tree)
-
-    # --- 4. Generate cleaned code ---
-    cleaned_code = astunparse.unparse(new_tree)
-
-    # --- 5. Output to tests-gen directory ---
-    template_dir = pjoin(repo_path, "tests-gen")
-    os.makedirs(template_dir, exist_ok=True)
-
-    init_path = pjoin(template_dir, "__init__.py")
-    if not os.path.exists(init_path):
-        with open(init_path, "w", encoding="utf-8") as f:
-            f.write("")
-
-    template_path = pjoin(template_dir, f"{os.path.splitext(target_name)[0]}.py")
-    with open(template_path, "w", encoding="utf-8") as f:
-        f.write(shebang + cleaned_code.strip() + "\n")
-
-    logging.info(f"Generated cleaned template: {template_path}")
-    return template_path
-
-
-class TestFunctionTransformer(ast.NodeTransformer):
-    """AST transformer for test function conversion"""
-
-    def visit_FunctionDef(self, node):
-        # 首先处理 main 函数（移除）
-        if node.name == "main":
-            return None
-
-        # 处理 TestInput/TestOneInput 函数
-        if node.name in ["TestInput", "TestOneInput"]:
-            # a. 记录参数名称（假设只有一个参数）
-            param_name = None
-            if node.args.args:
-                param_name = node.args.args[0].arg
-
-            # b. 将函数名改为 test_
-            node.name = "test_"
-
-            # c. 移除参数（将参数列表设为空）
-            node.args = ast.arguments(
-                posonlyargs=[],
-                args=[],
-                vararg=None,
-                kwonlyargs=[],
-                kw_defaults=[],
-                kwarg=None,
-                defaults=[],
-            )
-
-            # d. 在函数体开头插入 原参数名 = b""
-            if param_name:
-                self.add_param_assignment(node, param_name)
-
-
-        # 确保继续遍历子节点
-        self.generic_visit(node)
-        return node
-
-    def add_param_assignment(self, node, param_name):
-        """Add param_name = b"..." at the beginning of the function body with an inline comment"""
-        # 创建包含赋值和注释的复合值
-        value_with_comment = ast.JoinedStr(
-            values=[
-                ast.FormattedValue(value=ast.Constant(value=b""), conversion=-1),
-                ast.Constant(value="  # This is a test template")
-            ]
-        )
-        
-        # 创建赋值节点
-        assign_node = ast.Assign(
-            targets=[ast.Name(id=param_name, ctx=ast.Store())],
-            value=value_with_comment
-        )
-        
-        # 如果有文档字符串，插入在文档字符串之后
-        if (
-            node.body
-            and isinstance(node.body[0], ast.Expr)
-            and isinstance(node.body[0].value, ast.Constant)
-            and isinstance(node.body[0].value.value, str)
-        ):
-            # 插入在文档字符串后面
-            node.body.insert(1, assign_node)
-        else:
-            # 插入在函数开头
-            node.body.insert(0, assign_node)
-    def remove_print_param(self, node, param_name):
-        """Remove print statements for the specific parameter"""
-        new_body = []
-        for stmt in node.body:
-            # 跳过 print(param_name) 调用
-            if (
-                isinstance(stmt, ast.Expr)
-                and isinstance(stmt.value, ast.Call)
-                and isinstance(stmt.value.func, ast.Name)
-                and stmt.value.func.id == "print"
-                and any(
-                    isinstance(arg, ast.Name) and arg.id == param_name
-                    for arg in stmt.value.args
-                )
-            ):
-                continue
-            new_body.append(stmt)
-        node.body = new_body
-
-    def visit_If(self, node):
-        """Remove if __name__ == '__main__' blocks"""
-        # 检查是否是主函数保护
-        if (
-            isinstance(node.test, ast.Compare)
-            and isinstance(node.test.left, ast.Name)
-            and node.test.left.id == "__name__"
-            and isinstance(node.test.ops[0], ast.Eq)
-            and isinstance(node.test.comparators[0], ast.Constant)
-            and node.test.comparators[0].value == "__main__"
-        ):
-
-            # 移除整个 if 块
-            return None
-
-        # 确保继续遍历子节点
-        self.generic_visit(node)
-        return node
-
-
-class TestGenTransformer(ast.NodeTransformer):
-    def __init__(self, idx, fuzz_input):
-        self.idx = idx
-        self.fuzz_input = fuzz_input
-        self.found_test_function = False
-
-    def visit_FunctionDef(self, node):
-        if node.name == "test_":
-            self.found_test_function = True
-            
-            # 1. 修改函数名
-            node.name = f"test_{self.idx}"
-            
-            # 2. 查找并替换包含特定注释的赋值语句
-            for i, stmt in enumerate(node.body):
-                # 检查是否是赋值语句
-                if isinstance(stmt, ast.Assign):
-                    # 检查赋值语句的值是否是带有注释的复合值
-                    if (
-                        isinstance(stmt.value, ast.JoinedStr)
-                        and len(stmt.value.values) >= 2
-                        and isinstance(stmt.value.values[1], ast.Constant)
-                        and stmt.value.values[1].value == "  # This is a test template"
-                    ):
-                        # 替换为新的输入值
-                        stmt.value = ast.Constant(value=self.fuzz_input)
-                        break
-        return node
-
 def substitute_one_repo(
     repo: str,
     targets: list[tuple],  # 每个元素是 (transformed_target, raw_target)
@@ -624,4 +429,4 @@ def main(
 
 if __name__ == "__main__":
     logging.basicConfig(level=logging.INFO)
-    fire.Fire(main)
+    fire.Fire(main)
\ No newline at end of file

From 509a4c473fadbe6c55f95a74c06d0d1e77d28f46 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Mon, 18 Aug 2025 02:23:59 +0000
Subject: [PATCH 126/134]  put all AST related class/module/function in another
 file and import from there.

---
 fuzz/collect_fuzz_python.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index f752512..ccf14fa 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -4,6 +4,8 @@
 """
 
 from pathlib import Path
+import ast
+import astunparse
 import logging
 from typing import Optional
 import fire

From 1686058f9a44c8fe6b5f0a04908ff959de48e662 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Mon, 18 Aug 2025 02:33:14 +0000
Subject: [PATCH 127/134] translation

---
 fuzz/ast_utils.py           |  64 +++++++++++-----------
 fuzz/collect_fuzz_python.py | 102 ++++++++++++++++++++----------------
 2 files changed, 89 insertions(+), 77 deletions(-)

diff --git a/fuzz/ast_utils.py b/fuzz/ast_utils.py
index 60bfce3..4bca4a7 100644
--- a/fuzz/ast_utils.py
+++ b/fuzz/ast_utils.py
@@ -5,25 +5,26 @@
 import os
 import re
 
+
 class TestFunctionTransformer(ast.NodeTransformer):
     """AST transformer for test function conversion"""
 
     def visit_FunctionDef(self, node):
-        # 首先处理 main 函数（移除）
+        # First, process main function (remove it)
         if node.name == "main":
             return None
 
-        # 处理 TestInput/TestOneInput 函数
+        # Process TestInput/TestOneInput functions
         if node.name in ["TestInput", "TestOneInput"]:
-            # a. 记录参数名称（假设只有一个参数）
+            # a. Record parameter name (assume only one parameter)
             param_name = None
             if node.args.args:
                 param_name = node.args.args[0].arg
 
-            # b. 将函数名改为 test_
+            # b. Rename function to test_
             node.name = "test_"
 
-            # c. 移除参数（将参数列表设为空）
+            # c. Remove parameters (set argument list to empty)
             node.args = ast.arguments(
                 posonlyargs=[],
                 args=[],
@@ -34,48 +35,47 @@ def visit_FunctionDef(self, node):
                 defaults=[],
             )
 
-            # d. 在函数体开头插入 原参数名 = b""
+            # d. Insert param_name = b"" at the beginning of the function body
             if param_name:
                 self.add_param_assignment(node, param_name)
 
-        # 确保继续遍历子节点
+        # Ensure traversing child nodes continues
         self.generic_visit(node)
         return node
 
     def add_param_assignment(self, node, param_name):
         """Add param_name = b"..." at the beginning of the function body with an inline comment"""
-        # 创建包含赋值和注释的复合值
+        # Create a compound value containing assignment and comment
         value_with_comment = ast.JoinedStr(
             values=[
                 ast.FormattedValue(value=ast.Constant(value=b""), conversion=-1),
-                ast.Constant(value="  # This is a test template")
+                ast.Constant(value="  # This is a test template"),
             ]
         )
-        
-        # 创建赋值节点
+
+        # Create an assignment node
         assign_node = ast.Assign(
-            targets=[ast.Name(id=param_name, ctx=ast.Store())],
-            value=value_with_comment
+            targets=[ast.Name(id=param_name, ctx=ast.Store())], value=value_with_comment
         )
-        
-        # 如果有文档字符串，插入在文档字符串之后
+
+        # If there is a docstring, insert after the docstring
         if (
             node.body
             and isinstance(node.body[0], ast.Expr)
             and isinstance(node.body[0].value, ast.Constant)
             and isinstance(node.body[0].value.value, str)
         ):
-            # 插入在文档字符串后面
+            # Insert right after the docstring
             node.body.insert(1, assign_node)
         else:
-            # 插入在函数开头
+            # Insert at the beginning of the function
             node.body.insert(0, assign_node)
 
     def remove_print_param(self, node, param_name):
         """Remove print statements for the specific parameter"""
         new_body = []
         for stmt in node.body:
-            # 跳过 print(param_name) 调用
+            # Skip print(param_name) calls
             if (
                 isinstance(stmt, ast.Expr)
                 and isinstance(stmt.value, ast.Call)
@@ -92,7 +92,7 @@ def remove_print_param(self, node, param_name):
 
     def visit_If(self, node):
         """Remove if __name__ == '__main__' blocks"""
-        # 检查是否是主函数保护
+        # Check if this is the main function guard
         if (
             isinstance(node.test, ast.Compare)
             and isinstance(node.test.left, ast.Name)
@@ -102,10 +102,10 @@ def visit_If(self, node):
             and node.test.comparators[0].value == "__main__"
         ):
 
-            # 移除整个 if 块
+            # Remove the entire if block
             return None
 
-        # 确保继续遍历子节点
+        # Ensure traversing child nodes continues
         self.generic_visit(node)
         return node
 
@@ -119,22 +119,22 @@ def __init__(self, idx, fuzz_input):
     def visit_FunctionDef(self, node):
         if node.name == "test_":
             self.found_test_function = True
-            
-            # 1. 修改函数名
+
+            # 1. Modify function name
             node.name = f"test_{self.idx}"
-            
-            # 2. 查找并替换包含特定注释的赋值语句
+
+            # 2. Find and replace assignment statements with the special comment
             for i, stmt in enumerate(node.body):
-                # 检查是否是赋值语句
+                # Check if it's an assignment statement
                 if isinstance(stmt, ast.Assign):
-                    # 检查赋值语句的值是否是带有注释的复合值
+                    # Check if the value is a compound value with a comment
                     if (
                         isinstance(stmt.value, ast.JoinedStr)
                         and len(stmt.value.values) >= 2
                         and isinstance(stmt.value.values[1], ast.Constant)
                         and stmt.value.values[1].value == "  # This is a test template"
                     ):
-                        # 替换为新的输入值
+                        # Replace with new fuzz input
                         stmt.value = ast.Constant(value=self.fuzz_input)
                         break
         return node
@@ -145,9 +145,9 @@ def generate_test_template(target_name: str, repo_path: str):
     Generate Python test template using AST for more precise code transformations
     """
     src_file = os.path.join(repo_path, target_name)
-    logging.info(f"Generating test template for {src_file}")    
+    logging.info(f"Generating test template for {src_file}")
     if not src_file.endswith(".py"):
-        src_file += ".py"    
+        src_file += ".py"
     if not os.path.exists(src_file):
         logging.error(f"Source target file not found: {src_file}")
         return None
@@ -191,11 +191,11 @@ def generate_test_template(target_name: str, repo_path: str):
         with open(init_path, "w", encoding="utf-8") as f:
             f.write("")
 
-    # 使用目标名称的基础部分（移除扩展名）作为输出文件名
+    # Use the base part of target_name (remove extension) as the output file name
     base_target_name = os.path.splitext(target_name)[0]
     template_path = os.path.join(template_dir, f"{base_target_name}.py")
     with open(template_path, "w", encoding="utf-8") as f:
         f.write(shebang + cleaned_code.strip() + "\n")
 
     logging.info(f"Generated cleaned template: {template_path}")
-    return template_path
\ No newline at end of file
+    return template_path
diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index ccf14fa..d4592e0 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -19,9 +19,15 @@
 from difflib import SequenceMatcher
 from itertools import islice
 from datetime import datetime
+import re
+
+# Import AST-related functionality
+from ast_utils import (
+    TestFunctionTransformer,
+    TestGenTransformer,
+    generate_test_template,
+)
 
-# 导入AST相关的功能
-from ast_utils import TestFunctionTransformer, TestGenTransformer, generate_test_template
 
 def build_image(repos: list[str], jobs: int):
     """
@@ -200,22 +206,23 @@ def _transform_repo(repo: str):
         project_name = os.path.basename(repo)
         oss_fuzz_dir = Path(repo).parent.parent
         raw_targets = discover_targets(project_name, oss_fuzz_dir)
-        
-        # 只需移除目标名称中的 "_print1"，不要添加任何新后缀
+
+        # Simply remove "_print1" from target names, don't add any new suffix
         transformed_targets = [t.replace("_print1", "") for t in raw_targets]
-        
-        # 去重
+
+        # Remove duplicates
         targets = list(set(transformed_targets))
-        
-        # 传递给 generate_test_template 的是简单目标名称
+
+        # Pass simple target names to generate_test_template
         return [generate_test_template(t, repo) for t in targets]
 
     with ProcessingPool(jobs) as p:
         return list(p.map(_transform_repo, repos))
 
+
 def substitute_one_repo(
     repo: str,
-    targets: list[tuple],  # 每个元素是 (transformed_target, raw_target)
+    targets: list[tuple],  # Each element is (transformed_target, raw_target)
     n_fuzz: int,
     strategy: str,
     max_len: int,
@@ -225,35 +232,36 @@ def substitute_one_repo(
     Copy files from fuzz target template and generate multiple testgen files based on fuzz inputs
     using AST transformations
     """
+
     input_dir = pjoin(repo, "fuzz_inputs")
     template_dir = pjoin(repo, "tests-gen")
     os.makedirs(template_dir, exist_ok=True)
 
     for transformed_target, raw_target in targets:
-        # 使用转换后的目标名称构建模板文件路径
+        # Build template file path using transformed target name
         source_file = pjoin(template_dir, transformed_target + ".py")
-        
-        # 使用原始目标名称构建输入文件路径
+
+        # Build input file path using raw target name
         input_path = pjoin(input_dir, raw_target)
-        
-        # 确保源文件存在
+
+        # Ensure source file exists
         if not os.path.exists(source_file):
             logging.warning(f"Source file not found: {source_file}")
             continue
         if not os.path.exists(input_path):
             logging.warning(f"Input file not found: {input_path}")
             continue
-        
-        # 读取所有有效的输入数据
+
+        # Read all valid input data
         valid_inputs = []
         with open(input_path, "rb") as f_input:
             lines = f_input.readlines()
-            # 文件已关闭，现在处理数据
+            # File is closed, now process data
             for line in lines:
-                # 使用 errors='replace' 确保解码不会失败
+                # Use errors='replace' to ensure decoding doesn't fail
                 decoded = line.decode("utf-8", errors="replace")
-                
-                # 只处理以 b' 或 b" 开头的行
+
+                # Only process lines starting with b' or b"
                 if decoded.startswith(("b'", 'b"')):
                     if decoded.startswith("b'") and decoded.endswith("'\n"):
                         byte_data = line[2:-2]
@@ -261,21 +269,21 @@ def substitute_one_repo(
                         byte_data = line[2:-2]
                     else:
                         continue
-                    
+
                     if 0 < len(byte_data) <= max_len:
                         valid_inputs.append(byte_data)
-                # 对于其他行，如果长度在范围内且不是以 b' 或 b" 开头，也考虑加入
+                # For other lines, if length is within range and doesn't start with b' or b", also consider adding
                 elif 0 < len(line) <= max_len:
                     valid_inputs.append(line)
 
         if not valid_inputs:
-            # 使用 transformed_target 而不是 target_name
+            # Use transformed_target instead of target_name
             logging.warning(f"No valid inputs found for {transformed_target}")
             continue
 
-        # 使用 transformed_target 而不是 target_name
+        # Use transformed_target instead of target_name
         logging.info(f"Loaded {len(valid_inputs)} inputs for {transformed_target}")
-        # 策略选择输入
+        # Strategy for selecting inputs
         if strategy == "shuffle":
             random.shuffle(valid_inputs)
             inputs = valid_inputs[:n_fuzz]
@@ -284,44 +292,47 @@ def substitute_one_repo(
         else:
             inputs = valid_inputs[:n_fuzz]
 
-        # 每个 fuzz input 生成一个单独的文件（使用 AST）
+        # Generate a separate file for each fuzz input (using AST)
         for idx, fuzz_input in enumerate(inputs, start=1):
             with open(source_file, "r") as f_src:
                 code = f_src.read()
 
             try:
-                # 解析为 AST
+                # Parse into AST
                 tree = ast.parse(code)
 
-                # 应用转换器
+                # Apply transformer
                 transformer = TestGenTransformer(idx, fuzz_input)
                 new_tree = transformer.visit(tree)
                 ast.fix_missing_locations(new_tree)
 
-                # 确保找到并处理了测试函数
+                # Ensure test function was found and processed
                 if not transformer.found_test_function:
                     logging.warning(f"No test_ function found in {source_file}")
                     continue
 
-                # 生成新代码
+                # Generate new code
                 new_code = astunparse.unparse(new_tree)
 
-                # 使用 transformed_target 而不是 target_name
+                # Use transformed_target instead of target_name
                 out_path = pjoin(template_dir, f"{transformed_target}.testgen_{idx}.py")
                 with open(out_path, "w") as f_out:
                     f_out.write(new_code)
 
-                # 格式化代码
+                # Format code
                 try:
                     subprocess.run(["black", out_path], check=False)
                 except FileNotFoundError:
                     logging.warning("Black formatter not found, skipping formatting")
-                
+
             except SyntaxError as e:
                 logging.error(f"Syntax error when processing {source_file}: {e}")
             except Exception as e:
-                # 使用 transformed_target 而不是 target_name
-                logging.error(f"Error generating test case for {transformed_target}: {e}")
+                # Use transformed_target instead of target_name
+                logging.error(
+                    f"Error generating test case for {transformed_target}: {e}"
+                )
+
 
 def testgen_repos(
     repos: list[str],
@@ -348,10 +359,10 @@ def testgen_repos(
         project_name = os.path.basename(repo)
         oss_fuzz_dir = Path(repo).parent.parent
         raw_targets = discover_targets(project_name, oss_fuzz_dir)
-        
-        # 保存原始目标名称和转换后的目标名称
+
+        # Save original target names and transformed target names
         transformed_targets = [t.replace("_print1", "") for t in raw_targets]
-        targets = list(zip(transformed_targets, raw_targets))  # (转换后, 原始)
+        targets = list(zip(transformed_targets, raw_targets))  # (transformed, raw)
         target_map[repo] = targets
 
     # Process each repository in parallel
@@ -359,17 +370,18 @@ def testgen_repos(
         list(
             p.map(
                 lambda item: substitute_one_repo(
-                    item[0],         # repo path
-                    item[1],         # list of (transformed, raw) targets
-                    n_fuzz, 
-                    strategy, 
-                    max_len, 
-                    sim_thresh
+                    item[0],  # repo path
+                    item[1],  # list of (transformed, raw) targets
+                    n_fuzz,
+                    strategy,
+                    max_len,
+                    sim_thresh,
                 ),
                 target_map.items(),
             )
         )
 
+
 def main(
     repo_id: str = "data/valid_projects.txt",
     repo_root: str = "fuzz/oss-fuzz/projects/",
@@ -431,4 +443,4 @@ def main(
 
 if __name__ == "__main__":
     logging.basicConfig(level=logging.INFO)
-    fire.Fire(main)
\ No newline at end of file
+    fire.Fire(main)

From dd0a8ab3ebe8520fd14885d6a6d3f870439ce903 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Mon, 18 Aug 2025 22:00:55 +0000
Subject: [PATCH 128/134] use relative address

---
 fuzz/modify_fuzz_files.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fuzz/modify_fuzz_files.py b/fuzz/modify_fuzz_files.py
index 1ed8b48..1f5d2a4 100644
--- a/fuzz/modify_fuzz_files.py
+++ b/fuzz/modify_fuzz_files.py
@@ -41,7 +41,7 @@ def visit_FunctionDef(self, node):
     return new_content
 
 def main(
-    projects_path="/home/jiayiguo/FuzzAug/fuzz/oss-fuzz/projects",
+    projects_path="fuzz/oss-fuzz/projects",
     valid_projects_file="data/valid_projects.txt"
 ):
     """

From b9956f30a6827500075d6b703092c71e4ee3e61c Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Mon, 18 Aug 2025 22:01:48 +0000
Subject: [PATCH 129/134] use relative address

---
 fuzz/clean_fuzz_dir.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fuzz/clean_fuzz_dir.py b/fuzz/clean_fuzz_dir.py
index 9a9bdf8..af383e7 100644
--- a/fuzz/clean_fuzz_dir.py
+++ b/fuzz/clean_fuzz_dir.py
@@ -3,7 +3,7 @@
 import shutil
 import fire
 
-def clean_project_dirs(root_dir="/home/jiayiguo/FuzzAug/fuzz/oss-fuzz/projects"):
+def clean_project_dirs(root_dir="/fuzz/oss-fuzz/projects"):
     """
     清理 oss-fuzz 项目目录下的 fuzz_inputs 和 tests-gen 文件夹
 

From e771b99d8a6a46d04676a3f8384adb5ef6b6dddd Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Mon, 18 Aug 2025 23:32:02 +0000
Subject: [PATCH 130/134] remove the class outside of the function

---
 fuzz/modify_fuzz_files.py | 63 +++++++++++++++++----------------------
 1 file changed, 27 insertions(+), 36 deletions(-)

diff --git a/fuzz/modify_fuzz_files.py b/fuzz/modify_fuzz_files.py
index 1f5d2a4..c57f0c9 100644
--- a/fuzz/modify_fuzz_files.py
+++ b/fuzz/modify_fuzz_files.py
@@ -3,39 +3,40 @@
 import ast
 import fire
 
+
+class InsertPrintTransformer(ast.NodeTransformer):
+    def visit_FunctionDef(self, node):
+        if node.name in ("TestOneInput", "TestInput") and node.args.args:
+            first_arg_name = node.args.args[0].arg
+            print_stmt = ast.Expr(
+                value=ast.Call(
+                    func=ast.Name(id='print', ctx=ast.Load()),
+                    args=[ast.Name(id=first_arg_name, ctx=ast.Load())],
+                    keywords=[]
+                )
+            )
+            # 添加空body检查
+            if not node.body:
+                node.body.append(print_stmt)
+            else:
+                # 增强重复检查逻辑
+                first_stmt = node.body[0]
+                if not (isinstance(first_stmt, ast.Expr) 
+                        and isinstance(first_stmt.value, ast.Call)
+                        and hasattr(first_stmt.value.func, 'id')
+                        and first_stmt.value.func.id == 'print'):
+                    node.body.insert(0, print_stmt)
+        return node
+
 def add_print_to_testoneinput(file_path):
     with open(file_path, 'r') as f:
         content = f.read()
 
-    # 解析 AST
     tree = ast.parse(content)
-
-    class InsertPrintTransformer(ast.NodeTransformer):
-        def visit_FunctionDef(self, node):
-            if node.name in ("TestOneInput", "TestInput") and node.args.args:
-                first_arg_name = node.args.args[0].arg
-                # 创建 print(参数名) 语句
-                print_stmt = ast.Expr(
-                    value=ast.Call(
-                        func=ast.Name(id='print', ctx=ast.Load()),
-                        args=[ast.Name(id=first_arg_name, ctx=ast.Load())],
-                        keywords=[]
-                    )
-                )
-                # 确保没有重复插入
-                if not (
-                    isinstance(node.body[0], ast.Expr)
-                    and isinstance(node.body[0].value, ast.Call)
-                    and getattr(node.body[0].value.func, "id", None) == "print"
-                ):
-                    node.body.insert(0, print_stmt)
-            return node
-
     transformer = InsertPrintTransformer()
     new_tree = transformer.visit(tree)
     ast.fix_missing_locations(new_tree)
 
-    # 转回代码
     import astor
     new_content = astor.to_source(new_tree)
     return new_content
@@ -44,19 +45,12 @@ def main(
     projects_path="fuzz/oss-fuzz/projects",
     valid_projects_file="data/valid_projects.txt"
 ):
-    """
-    给 fuzz target 的 TestOneInput / TestInput 函数开头插入 print(参数名)
-
-    Args:
-        projects_path (str): OSS-Fuzz 项目的根目录
-        valid_projects_file (str): 包含有效项目名的文件路径
-    """
+    """为fuzz target添加打印语句"""
     with open(valid_projects_file, 'r') as f:
         projects = [line.strip() for line in f if line.strip()]
 
     for project in projects:
         project_dir = os.path.join(projects_path, project)
-
         if not os.path.isdir(project_dir):
             continue
 
@@ -64,11 +58,8 @@ def main(
             for file in files:
                 if file.startswith('fuzz_') and file.endswith('.py'):
                     file_path = os.path.join(root, file)
-
                     try:
                         new_content = add_print_to_testoneinput(file_path)
-
-                        # 保存修改后的文件
                         new_file_path = file_path.rsplit('.', 1)[0] + '_print1.py'
                         with open(new_file_path, 'w') as f:
                             f.write(new_content)
@@ -78,4 +69,4 @@ def main(
                         print(f"Error processing {file_path}: {str(e)}")
 
 if __name__ == "__main__":
-    fire.Fire(main)
+    fire.Fire(main)
\ No newline at end of file

From 4bc7c941743b7009c9812bd858982ac9881c4cc4 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Mon, 18 Aug 2025 23:34:37 +0000
Subject: [PATCH 131/134] add tuple's type

---
 fuzz/collect_fuzz_python.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index d4592e0..c058fe7 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -222,7 +222,7 @@ def _transform_repo(repo: str):
 
 def substitute_one_repo(
     repo: str,
-    targets: list[tuple],  # Each element is (transformed_target, raw_target)
+    targets: list[tuple[str,str]],  # Each element is (transformed_target, raw_target)
     n_fuzz: int,
     strategy: str,
     max_len: int,

From e527188315e9419b60d7be9aec30cf33f886cf94 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Mon, 18 Aug 2025 23:58:00 +0000
Subject: [PATCH 132/134] Properly handle indentation and process data after
 the file is closed.

---
 fuzz/collect_fuzz_python.py | 38 ++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py
index c058fe7..1d45df1 100644
--- a/fuzz/collect_fuzz_python.py
+++ b/fuzz/collect_fuzz_python.py
@@ -256,25 +256,25 @@ def substitute_one_repo(
         valid_inputs = []
         with open(input_path, "rb") as f_input:
             lines = f_input.readlines()
-            # File is closed, now process data
-            for line in lines:
-                # Use errors='replace' to ensure decoding doesn't fail
-                decoded = line.decode("utf-8", errors="replace")
-
-                # Only process lines starting with b' or b"
-                if decoded.startswith(("b'", 'b"')):
-                    if decoded.startswith("b'") and decoded.endswith("'\n"):
-                        byte_data = line[2:-2]
-                    elif decoded.startswith('b"') and decoded.endswith('"\n'):
-                        byte_data = line[2:-2]
-                    else:
-                        continue
-
-                    if 0 < len(byte_data) <= max_len:
-                        valid_inputs.append(byte_data)
-                # For other lines, if length is within range and doesn't start with b' or b", also consider adding
-                elif 0 < len(line) <= max_len:
-                    valid_inputs.append(line)
+        # File is closed, now process data
+        for line in lines:
+            # Use errors='replace' to ensure decoding doesn't fail
+            decoded = line.decode("utf-8", errors="replace")
+
+            # Only process lines starting with b' or b"
+            if decoded.startswith(("b'", 'b"')):
+                if decoded.startswith("b'") and decoded.endswith("'\n"):
+                    byte_data = line[2:-2]
+                elif decoded.startswith('b"') and decoded.endswith('"\n'):
+                    byte_data = line[2:-2]
+                else:
+                    continue
+
+                if 0 < len(byte_data) <= max_len:
+                    valid_inputs.append(byte_data)
+            # For other lines, if length is within range and doesn't start with b' or b", also consider adding
+            elif 0 < len(line) <= max_len:
+                valid_inputs.append(line)
 
         if not valid_inputs:
             # Use transformed_target instead of target_name

From bb2d9e8cbc16e8b571d2f6e68989498c9c54533d Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 19 Aug 2025 07:03:50 +0000
Subject: [PATCH 133/134] correct tne relative path

---
 fuzz/clean_fuzz_dir.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fuzz/clean_fuzz_dir.py b/fuzz/clean_fuzz_dir.py
index af383e7..d420915 100644
--- a/fuzz/clean_fuzz_dir.py
+++ b/fuzz/clean_fuzz_dir.py
@@ -3,7 +3,7 @@
 import shutil
 import fire
 
-def clean_project_dirs(root_dir="/fuzz/oss-fuzz/projects"):
+def clean_project_dirs(root_dir="fuzz/oss-fuzz/projects"):
     """
     清理 oss-fuzz 项目目录下的 fuzz_inputs 和 tests-gen 文件夹
 

From 8eb4971fcd457efe3d3b9a24964de72eb7f61602 Mon Sep 17 00:00:00 2001
From: joyguoguo <599325370@qq.com>
Date: Tue, 19 Aug 2025 07:27:56 +0000
Subject: [PATCH 134/134] add black to requirements.txt

---
 requirements.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index e06d264..25dfb8c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -18,4 +18,5 @@ dacite==1.8.1
 pathos==0.3.2
 bitsandbytes==0.43.2
 pandas==2.2.2
-matplotlib==3.9.2
\ No newline at end of file
+matplotlib==3.9.2
+black==25.1.0
\ No newline at end of file