-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathreverse_complement.sh
More file actions
executable file
·82 lines (69 loc) · 3.01 KB
/
reverse_complement.sh
File metadata and controls
executable file
·82 lines (69 loc) · 3.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/bin/bash -l
#SBATCH -D /group/jrigrp3/Velasco/Prunus/fasta/
#SBATCH -o /home/dmvelasc/Projects/Prunus/slurm-log/%A_%a-revcomp-stdout.txt
#SBATCH -e /home/dmvelasc/Projects/Prunus/slurm-log/%A_%a-revcomp-stderr.txt
#SBATCH -J revcomp
#SBATCH -p bigmemh
#SBATCH -t 8-00:00:00
#SBATCH -n 1
#SBATCH -c 1
#SBATCH -a 9,10,13-28,30-34,36-37,39,41-47,49-57,62-67%5
#SBATCH --mem=8G
#SBATCH --exclude=bigmem1
#SBATCH --mail-user=dmvelasco@ucdavis.edu
#SBATCH --mail-type=ALL
set -e
set -u
### all arrays: 1-5,7-10,13-28,30-34,36-37,39,41-47,49-57,62-67%2
########## Extract FASTA for each GENE/CDS interval ##########
### Load modules ###
module load zlib
# Declare number variables
x=$SLURM_ARRAY_TASK_ID
i=$(( x-1 ))
### Declare directories ###
ref="/home/dmvelasc/Data/references/persica-SCF" # FASTA reference directory
scratch="/scratch/dmvelasc" # Scratch directory
script="/home/dmvelasc/Projects/Prunus/Script" # Script directory
#### sample ID file
# column 1: ID, column2: other ID/information
list="${script}/sample.txt"
# gene/cds intervals
gene_list="Prunus_persica_v1.0_genes_list.gff3"
gene_pos_list="Prunus_persica_v1.0_gene_position_list.txt"
##### mapfile to extract sample ID, each line is array item
mapfile -s "$i" -n 1 -t id < "${list}"
# -s number of rows to skip | -n number of rows to read | -t (remove leading/trailing whitespace?)
# id is the array name (anything in this position is the array name)
# create an array from each two column line
arr=(`echo "${id[0]}"`)
# declare variables, created from array
acc="${arr[0]}"
echo -e "$acc"
# create scratch directory for temporary file placement
mkdir -p "$scratch"/"$acc"
###############################################################################
# see possble solution at http://seqanswers.com/forums/showthread.php?t=50008 #
###############################################################################
echo -e "Reverse complement opposite strand FASTAs"
while read p; do
# create an array from each line
locus=(`echo "$p"`)
# declare variables, created from array
gene_id="${locus[4]}"
strand="${locus[3]}"
# reverse complement reverse strand gene sequence
if [ "$strand" = '-' ]; then
"$script"/DNA_reverse_complement.pl "$acc"/"$gene_id"_"$acc"_gene.fa > "$scratch"/"$acc"/"$gene_id"_"$acc"_temp.fa
echo ">${acc}" > "$scratch"/"$acc"/"$gene_id"_"$acc"_gene.fa
tail -n +2 "$scratch"/"$acc"/"$gene_id"_"$acc"_temp.fa | fold -w 60 - >> "$scratch"/"$acc"/"$gene_id"_"$acc"_gene.fa
mv "$scratch"/"$acc"/"$gene_id"_"$acc"_gene.fa "$acc"/
"$script"/DNA_reverse_complement.pl "$acc"/"$gene_id"_"$acc"_cds.fa > "$scratch"/"$acc"/"$gene_id"_"$acc"_temp.fa
echo ">${acc}" > "$scratch"/"$acc"/"$gene_id"_"$acc"_cds.fa
tail -n +2 "$scratch"/"$acc"/"$gene_id"_"$acc"_temp.fa | fold -w 60 - >> "$scratch"/"$acc"/"$gene_id"_"$acc"_cds.fa
mv "$scratch"/"$acc"/"$gene_id"_"$acc"_cds.fa "$acc"/
rm "$scratch"/"$acc"/"$gene_id"_"$acc"_temp.fa
fi
done < "$ref"/"$gene_pos_list"
echo -e "end FASTA reverse complement process for CDS and genes"
date