diff --git a/adni/10_archive_connectome.sh b/adni/10_archive_connectome.sh new file mode 100644 index 0000000..e1dc0f3 --- /dev/null +++ b/adni/10_archive_connectome.sh @@ -0,0 +1,20 @@ +#!/bin/bash +#SBATCH --account=def-pbellec +#SBATCH --job-name=conn_archive +#SBATCH --output=/home/nclarke/scratch/logs/adni_conn_archive.out +#SBATCH --error=/home/nclarke/scratch/logs/adni_conn_archive.err +#SBATCH --time=06:00:00 +#SBATCH --cpus-per-task=1 +#SBATCH --mem-per-cpu=8G + + +RAW_PATH="/lustre04/scratch/${USER}/adni_connectomes-0.4.1" +DATASET_NAME=`basename $RAW_PATH` + +ARCHIVE_PATH="/lustre03/nearline/6035398/giga_preprocessing_2/adni_fmriprep-20.2.7lts_1682352545/${DATASET_NAME}" + +mkdir -p $ARCHIVE_PATH + +cd ${RAW_PATH} +echo $PWD +tar -vcf ${ARCHIVE_PATH}/${DATASET_NAME}.tar.gz . diff --git a/adni/create_dataset_description.py b/adni/1_create_dataset_description.py similarity index 100% rename from adni/create_dataset_description.py rename to adni/1_create_dataset_description.py diff --git a/adni/create_task-rest_bold.py b/adni/2_create_task-rest_bold.py similarity index 100% rename from adni/create_task-rest_bold.py rename to adni/2_create_task-rest_bold.py diff --git a/adni/create_bidsignore.sh b/adni/3_create_bidsignore.sh similarity index 100% rename from adni/create_bidsignore.sh rename to adni/3_create_bidsignore.sh diff --git a/adni/correct_slice_timing.py b/adni/4_correct_slice_timing.py similarity index 100% rename from adni/correct_slice_timing.py rename to adni/4_correct_slice_timing.py diff --git a/adni/generate_slurm_script.sh b/adni/5_generate_slurm_script.sh similarity index 100% rename from adni/generate_slurm_script.sh rename to adni/5_generate_slurm_script.sh diff --git a/adni/archive_fmriprep.sh b/adni/6_archive_fmriprep.sh similarity index 90% rename from adni/archive_fmriprep.sh rename to adni/6_archive_fmriprep.sh index 2e5e4df..db7e5c6 100644 --- a/adni/archive_fmriprep.sh +++ b/adni/6_archive_fmriprep.sh @@ -1,9 +1,9 @@ #!/bin/bash -#SBATCH --account=rrg-pbellec +#SBATCH --account=def-pbellec #SBATCH --job-name=fmriprep_archive #SBATCH --output=/home/nclarke/scratch/logs/adni_fmriprep_archive.out #SBATCH --error=/home/nclarke/scratch//logs/adni_fmriprep_archive.out -#SBATCH --time=48:00:00 +#SBATCH --time=42:00:00 #SBATCH --cpus-per-task=1 #SBATCH --mem-per-cpu=8G diff --git a/adni/7_submit_qc_array.sh b/adni/7_submit_qc_array.sh new file mode 100644 index 0000000..551f9e7 --- /dev/null +++ b/adni/7_submit_qc_array.sh @@ -0,0 +1,28 @@ +#!/bin/bash +#SBATCH --account=def-pbellec +#SBATCH --job-name=adni_qc +#SBATCH --output=/lustre04/scratch/nclarke/logs/adni_qc/%x_%A.%a.out +#SBATCH --error=/lustre04/scratch/nclarke/logs/adni_qc/%x_%A.%a.out +#SBATCH --time=00:10:00 +#SBATCH --cpus-per-task=1 +#SBATCH --mem=8G +#SBATCH --array=1-747 + +module load apptainer + +FMRIPREP_DIR=/lustre04/scratch/nclarke/adni_fmriprep-20.2.7lts_1682352545/adni_bids_output_func/fmriprep-20.2.7lts +GIGA_AUTO_QC_CONTAINER=/home/nclarke/projects/rrg-pbellec/nclarke/giga_preprocess2/giga_auto_qc-0.3.3.simg +QC_OUTPUT=/lustre04/scratch/nclarke/adni_giga_auto_qc-0.3.3_participant +participant_labels=/home/nclarke/projects/rrg-pbellec/nclarke/giga_preprocess2/adni/participant_labels.txt # One subject number per line + +mkdir -p $QC_OUTPUT + +PARTICIPANT_LABEL=$(sed -n "${SLURM_ARRAY_TASK_ID}p" ${participant_labels}) + +# Create a directory for participant +PARTICIPANT_OUTPUT="${QC_OUTPUT}/${PARTICIPANT_LABEL}" +mkdir -p $PARTICIPANT_OUTPUT + +echo "Running ${PARTICIPANT_LABEL} QC" + +apptainer run --cleanenv -B ${FMRIPREP_DIR}:/inputs -B ${PARTICIPANT_OUTPUT}:/outputs ${GIGA_AUTO_QC_CONTAINER} /inputs /outputs participant --participant_label ${PARTICIPANT_LABEL} diff --git a/adni/8_archive_qc.sh b/adni/8_archive_qc.sh new file mode 100644 index 0000000..9e15425 --- /dev/null +++ b/adni/8_archive_qc.sh @@ -0,0 +1,24 @@ +#!/bin/bash +#SBATCH --account=def-pbellec +#SBATCH --job-name=qc_archive +#SBATCH --output=/home/nclarke/scratch/logs/adni_qc_archive.out +#SBATCH --error=/home/nclarke/scratch/logs/adni_qc_archive.err +#SBATCH --time=01:00:00 +#SBATCH --cpus-per-task=1 + +# Directory to be archived +RAW_PATH="/lustre04/scratch/${USER}/adni_giga_auto_qc-0.3.3_participant" +DATASET_NAME=$(basename $RAW_PATH) + +# Destination path for the archive +ARCHIVE_PATH="/lustre03/nearline/6035398/giga_preprocessing_2/adni_fmriprep-20.2.7lts_1682352545" + +# Ensure the destination directory exists +mkdir -p $ARCHIVE_PATH + +# Navigate to the parent directory of RAW_PATH +cd $(dirname $RAW_PATH) + +# Tar the directory +tar -vcf ${ARCHIVE_PATH}/${DATASET_NAME}.tar.gz ${DATASET_NAME} + diff --git a/adni/9_connectome_slurm_array.bash b/adni/9_connectome_slurm_array.bash new file mode 100644 index 0000000..8fbc3da --- /dev/null +++ b/adni/9_connectome_slurm_array.bash @@ -0,0 +1,47 @@ +#!/bin/bash +#SBATCH --account=rrg-pbellec +#SBATCH --output=/lustre04/scratch/nclarke/logs/adni_conn/%x_%A.out +#SBATCH --error=/lustre04/scratch/nclarke/logs/adni_conn/%x_%A.out +#SBATCH --cpus-per-task=1 +#SBATCH --array=1-747 + +GIGA_CONNECTOME_VERSION=0.4.1 +GIGA_CONNECTOME="/home/${USER}/projects/rrg-pbellec/containers/giga_connectome-${GIGA_CONNECTOME_VERSION}.simg" +FMRIPREP_DIR="/lustre04/scratch/nclarke/adni_fmriprep-20.2.7lts_1682352545/adni_bids_output_func/fmriprep-20.2.7lts" +CONNECTOME_OUTPUT="/home/nclarke/scratch/adni_connectomes-0.4.1" +participant_labels="/home/nclarke/projects/rrg-pbellec/nclarke/giga_preprocess2/adni/participant_labels.txt" # One subject number per line + +WORKINGDIR="/home/nclarke/scratch/adni_connectomes-0.4.1/working_directory" + +module load apptainer + +mkdir -p $WORKINGDIR + +PARTICIPANT_LABEL=$(sed -n "${SLURM_ARRAY_TASK_ID}p" ${participant_labels}) +PARTICIPANT_OUTPUT="${CONNECTOME_OUTPUT}/${PARTICIPANT_LABEL}" + +echo "${FMRIPREP_DIR}" +if [ -d "${FMRIPREP_DIR}" ]; then + mkdir -p ${WORKINGDIR} + mkdir -p ${SLURM_TMPDIR} + mkdir -p ${CONNECTOME_OUTPUT} + mkdir -p ${PARTICIPANT_OUTPUT} + echo "Running ${PARTICIPANT_LABEL} connectomes" + echo "=========${STRATEGY}=========" + echo "${ATLAS}" + apptainer run \ + --bind ${FMRIPREP_DIR}:/data/input \ + --bind ${SLURM_TMPDIR}:/data/output \ + ${GIGA_CONNECTOME} \ + --atlas ${ATLAS} \ + --denoise-strategy ${STRATEGY} \ + ${INTRANETWORK_FLAG} \ + /data/input \ + /data/output \ + participant \ + --participant_label ${PARTICIPANT_LABEL} + exitcode=$? # catch exit code + if [ $exitcode -eq 0 ] ; then rsync -rltv --info=progress2 ${SLURM_TMPDIR}/*.h5 ${PARTICIPANT_OUTPUT} ; fi +else + echo "no preprocessed data for ${DATASET}" +fi diff --git a/adni/9_submit_connectome_array.sh b/adni/9_submit_connectome_array.sh new file mode 100644 index 0000000..644a6bf --- /dev/null +++ b/adni/9_submit_connectome_array.sh @@ -0,0 +1,28 @@ +#!/bin/bash +#SBATCH --account=rrg-pbellec + +DATASET="adni" + +STRATEGIES=("acompcor50" "simple" "simple+gsr" "scrubbing.2" "scrubbing.2+gsr" "scrubbing.5" "scrubbing.5+gsr") +ATLASES=("Schaefer20187Networks" "MIST" "DiFuMo") + +for strategy in "$STRATEGIES"; do + for atlas in "${ATLASES[@]}"; do + mem=12G + time="00:10:00" + INTRANETWORK_FLAG="--calculate-intranetwork-average-correlation" + + if [ "${atlas}" == "DiFuMo" ]; then + time="00:12:00" + mem=14G + INTRANETWORK_FLAG="" + fi + + echo "Submitting ${atlas} ${strategy}" + sbatch \ + --time=${time} --mem-per-cpu=${mem} \ + --job-name=${DATASET}_${atlas}_${strategy} \ + --export=DATASET="${DATASET}",ATLAS="${atlas}",STRATEGY="${strategy}" \ + ./7_connectome_slurm_array.bash + done +done diff --git a/adni/README.md b/adni/README.md index ffac638..e04442e 100644 --- a/adni/README.md +++ b/adni/README.md @@ -5,25 +5,40 @@ Scripts for preprocessing ADNI data with fMRIPrep2.2.7lts. - Python ## Retrieving data Data not openly available. Used BIDS-ified dataset on Elm. -## BIDS errors +## BIDS validation fixes Ran the following scripts to fix errors: -1. `create_dataset_description.py` +- `1_create_dataset_description.py` - creates `dataset_description.json` at the root directory with minimal information - BIDS version used is unknown, so specified latest version (see [discussion]( https://neurostars.org/t/what-bids-version-to-use-for-legacy-dataset/25619)) - fixes BIDS validation error `code: 57 DATASET_DESCRIPTION_JSON_MISSING` -2. `create_task-rest_bold.py` +- `2_create_task-rest_bold.py` - creates `task-rest_bold.json` at the root directory, detailing bold task - fixes BIDS validation error `code: 50 TASK_NAME_MUST_DEFINE` -3. `create_bidsignore.sh` +- `3_create_bidsignore.sh` - creates `.bidsignore` at the root and adds `*T1w_cropped.nii` to ignore these files, which are not needed - fixes BIDS validation error `code: 1 - NOT_INCLUDED` - TODO: check this also fixes error `code: 63 - SESSION_VALUE_CONTAINS_ILLEGAL_CHARACTER`, it should do -3. `correct_slice_timing.py` +- `4_correct_slice_timing.py` - halves the slice timing for sub-109S4594 (ses-20160502), which appears to be doubled (see [discussion](https://neurostars.org/t/help-with-bids-errors-66-and-75-in-legacy-dataset/25625)) - fixes BIDS validation error `code: 66 SLICETIMING_VALUES_GREATOR_THAN_REPETITION_TIME` +## Run fMRIPrep +- `5_generate_slurm_script.sh` +- `6_archive_fmriprep.sh` + +## Run QC +- `7_submit_qc_array.sh` + - run once with `--reindex-bids` flag on one participant to build the database layout (increase wall time) + - remove flag and run again with all participants. +- `8_archive_qc.sh` + +## Generate connectomes +- `9_connectome_slurm_array.bash` and `9_submit_connectome_array.sh`. As above run once with `--reindex-bids` flag and one atlas/strategy. I submitted each atlas/strategy pair individually because I hadn't yet figured out the optimal way to submit them (see e.g. HCP-EP) +- `10_archive_connectome.sh` + n.b. error `code: 75 - NIFTI_PIXDIM4` affected two subjects. Suggested fix to edit nifti header fields (see [discussion](https://neurostars.org/t/help-with-bids-errors-66-and-75-in-legacy-dataset/25625/2)). One subject would fail anyway, since `Time` field is `0`, so in interests of saving time I have left this for now. +n.b.(b?) around 100 subjects processed without --use-aroma flag. Used detect_timeout.py script to remove. diff --git a/adni/archive_raw.sh b/adni/archive_raw.sh new file mode 100644 index 0000000..0897c00 --- /dev/null +++ b/adni/archive_raw.sh @@ -0,0 +1,20 @@ +#!/bin/bash +#SBATCH --account=def-pbellec +#SBATCH --job-name=raw_archive +#SBATCH --output=/home/nclarke/scratch/logs/adni_raw_archive.out +#SBATCH --error=/home/nclarke/scratch//logs/adni_raw_archive.err +#SBATCH --time=12:00:00 +#SBATCH --cpus-per-task=1 +#SBATCH --mem-per-cpu=8G + + +RAW_PATH="/lustre04/scratch/nclarke/adni_bids_output_func" +DATASET_NAME=`basename $RAW_PATH` + +ARCHIVE_PATH="/lustre03/nearline/6035398/giga_preprocessing_2/raw/${DATASET_NAME}" + +mkdir -p $ARCHIVE_PATH + +cd ${RAW_PATH} +echo $PWD +tar -vcf ${ARCHIVE_PATH}/${DATASET_NAME}.tar.gz . diff --git a/adni/submit_qc_scrub5.sh b/adni/submit_qc_scrub5.sh new file mode 100644 index 0000000..d87cb46 --- /dev/null +++ b/adni/submit_qc_scrub5.sh @@ -0,0 +1,29 @@ +#!/bin/bash +#SBATCH --account=def-pbellec +#SBATCH --job-name=qc +#SBATCH --output=/lustre07/scratch/nclarke/logs/%x_%A.out +#SBATCH --error=/lustre07/scratch/nclarke/logs/%x_%A.out +#SBATCH --time=01:00:00 +#SBATCH --cpus-per-task=1 +#SBATCH --mem=12G +#SBATCH --array=1-747 + +module load apptainer + +FMRIPREP_DIR=/lustre07/scratch/${USER}/adni_bids_output_func/fmriprep-20.2.7lts +GIGA_AUTO_QC_CONTAINER=/home/${USER}/giga_auto_qc-0.3.3.simg +QC_OUTPUT=/lustre07/scratch/${USER}/adni_giga_auto_qc-0.3.3_scrub.5 +QC_PARAMS=/home/${USER}/qc_params_scrub5.json +participant_labels=/home/${USER}/participant_labels.txt # One subject number per line + +mkdir -p $QC_OUTPUT + +PARTICIPANT_LABEL=$(sed -n "${SLURM_ARRAY_TASK_ID}p" ${participant_labels}) + +# Create a directory for participant +PARTICIPANT_OUTPUT="${QC_OUTPUT}/${PARTICIPANT_LABEL}" +mkdir -p $PARTICIPANT_OUTPUT + +echo "Running ${PARTICIPANT_LABEL} QC" + +apptainer run --cleanenv -B ${QC_PARAMS} -B ${FMRIPREP_DIR}:/inputs -B ${PARTICIPANT_OUTPUT}:/outputs ${GIGA_AUTO_QC_CONTAINER} /inputs /outputs --quality_control_parameters ${QC_PARAMS} participant --participant_label ${PARTICIPANT_LABEL}