From 9b10799550a40539628c16da31e0a9fd430f9f60 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 7 Apr 2026 02:59:52 +0000 Subject: [PATCH 1/3] Initial plan From b8bfe50ae252f73a1570c7eaf58daff3ce38573b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 7 Apr 2026 03:14:25 +0000 Subject: [PATCH 2/3] Fix broken GitHub Actions CI: handle 429 rate limiting, remove empty workflow, bump version Agent-Logs-Url: https://github.com/mwang87/MassQueryLanguage/sessions/30ce6a73-73e7-42c1-a6bf-c781294326de Co-authored-by: mwang87 <96528+mwang87@users.noreply.github.com> --- .github/workflows/test-workflow.yml | 37 ----------------------------- setup.py | 2 +- tests/get_data.sh | 11 +++++---- 3 files changed, 8 insertions(+), 42 deletions(-) delete mode 100644 .github/workflows/test-workflow.yml diff --git a/.github/workflows/test-workflow.yml b/.github/workflows/test-workflow.yml deleted file mode 100644 index fb34249..0000000 --- a/.github/workflows/test-workflow.yml +++ /dev/null @@ -1,37 +0,0 @@ -# # This workflow will install Python dependencies, run tests and lint with a single version of Python -# # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions - -# name: NF Workflow Testing - -# on: -# push: -# branches: [ master ] -# pull_request: -# branches: [ master ] - -# jobs: -# nextflow-test: - -# runs-on: ubuntu-latest - -# steps: -# - uses: actions/checkout@v2 -# #- name: Set up Python 3.9 -# # uses: actions/setup-python@v2 -# # with: -# # python-version: 3.9 -# - name: Install dependencies -# run: | -# #python -m pip install --upgrade pip -# #pip install flake8 pytest -# #if [ -f requirements.txt ]; then pip install -r requirements.txt; fi -# wget -qO- get.nextflow.io | bash -# sudo mv nextflow /usr/local/bin/ -# - name: Download Data -# run: | -# cd tests && sh ./get_data.sh && cd .. -# - name: Workflow Testing -# run: | -# cd workflow -# make run_test -# make run_test_no_extract diff --git a/setup.py b/setup.py index a735f09..dcdfbab 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setuptools.setup( name="massql", - version="2026.03.14", + version="2026.04.07", author="Mingxun Wang", author_email="mwang87@gmail.com", description="Mass spectrometry query language python implementation", diff --git a/tests/get_data.sh b/tests/get_data.sh index cde6886..f01d0ab 100644 --- a/tests/get_data.sh +++ b/tests/get_data.sh @@ -1,14 +1,17 @@ mkdir -p data cd data -# Rate limit: at most 10 files/minute => sleep 6s between downloads. +# massiveproxy.gnps2.org enforces per-IP rate limiting (~1 request per 70 seconds). +# Use --retry-on-http-error=429 with exponential backoff (--waitretry=120, --tries=10) +# so wget retries automatically after waiting long enough for the rate limit to reset. download() { - wget --no-verbose --tries=3 --waitretry=5 --output-document="$1" "$2" - sleep 6 + wget --no-verbose --tries=10 --waitretry=120 --retry-on-http-error=429 --output-document="$1" "$2" + sleep 5 } download GNPS00002_A3_p.mzML "https://massiveproxy.gnps2.org/massiveproxy/MSV000084494/ccms_peak/raw/GNPS00002_A3_p.mzML" -download GNPS00002_A3_p.mzml "https://massiveproxy.gnps2.org/massiveproxy/MSV000084494/ccms_peak/raw/GNPS00002_A3_p.mzML" +# mzml (lowercase) is the same file as mzML - copy to avoid an extra rate-limited request +cp GNPS00002_A3_p.mzML GNPS00002_A3_p.mzml download GNPS00002_A10_n.mzML "https://massiveproxy.gnps2.org/massiveproxy/MSV000084494/ccms_peak/raw/GNPS00002_A10_n.mzML" download QC_0.mzML "https://massiveproxy.gnps2.org/massiveproxy/MSV000085852/ccms_peak/QC_raw/QC_0.mzML" download bld_plt1_07_120_1.mzML "https://massiveproxy.gnps2.org/massiveproxy/MSV000085944/ccms_peak/raw_data/bld_plt1_07_120_1.mzML" From 81074372abd1b10ffc9c85ad3a0b68407a823c71 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 7 Apr 2026 06:43:16 +0000 Subject: [PATCH 3/3] Add diagnostic file-size and head printout at end of get_data.sh Agent-Logs-Url: https://github.com/mwang87/MassQueryLanguage/sessions/26ceb5d4-b70c-4989-9b30-6b2efac04d4c Co-authored-by: mwang87 <96528+mwang87@users.noreply.github.com> --- tests/get_data.sh | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/get_data.sh b/tests/get_data.sh index f01d0ab..75b78d7 100644 --- a/tests/get_data.sh +++ b/tests/get_data.sh @@ -31,3 +31,17 @@ download MMSRG_027.mzML "https://massiveproxy.gnps2.org/massiveproxy/MSV00008826 download featurelist_pos.mgf "https://massiveproxy.gnps2.org/massiveproxy/MSV000086995/updates/2022-01-18_mwang87_e619431a/peak/bahbobeh/featurelist_pos.mgf" download GT15A.mzML "https://massiveproxy.gnps2.org/massiveproxy/MSV000087048/ccms_peak/Green_Tea_manuscript_data/GT15A.mzML" download PLT2_B1.mzML "https://massiveproxy.gnps2.org/massiveproxy/MSV000088800/ccms_peak/NRRL_PLT2_czapek_solid_raw/PLT2_B1.mzML" + +# --- Diagnostic: print file sizes and first 5 lines of each downloaded file --- +echo "" +echo "=== Downloaded file sizes ===" +ls -lh . + +echo "" +echo "=== File heads for debugging ===" +for f in *.mzML *.mzml *.mzXML *.mgf *.json; do + [ -f "$f" ] || continue + echo "" + echo "--- $f ($(wc -c < "$f") bytes) ---" + head -5 "$f" +done