-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsetup.py
More file actions
77 lines (69 loc) · 2.9 KB
/
setup.py
File metadata and controls
77 lines (69 loc) · 2.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import sys
import os
import subprocess
import argparse
def copy_gm_key():
if not os.path.exists('./.gm_key'):
homedirkey = os.path.expanduser('~') + '/.gm_key'
if not os.path.exists(homedirkey):
print('GeneMark license not found in home directory. Please download .gm_key from https://genemark.bme.gatech.edu/license_download.cgi')
else:
subprocess.call(['cp',homedirkey,'./.gm_key'])
print('Copied GeneMark license to working directory')
else:
print('GeneMark license found in funQCD directory')
def make_samples_csv(path):
flist = os.listdir(path)
sample_id_dict = {}
if os.path.exists('config/samples.csv'):
print('Overwriting config/samples.csv with new version')
for f in flist:
if f.endswith('_R1.fastq.gz') or f.endswith('_R2.fastq.gz'):
sample_id = '_R'.join(f.split('_R')[:-1])
# this should always return the full text to the left of '_R1.fastq.gz' or '_R2.fastq.gz', even if it contains '_R' somewhere other than the end
sample_id_dict[sample_id] = 'run'
elif f.endswith('.fasta'):
sample_id = f.split('.fasta')[0]
sample_id_dict[sample_id] = 'assembly'
with open('config/samples.csv','w') as fhout:
_ = fhout.write('sample_id\tdata_type\n')
for sid in sorted(sample_id_dict.keys()):
_ = fhout.write(f'{sid}\t{sample_id_dict[sid]}\n')
def add_path_to_config(path,prefix,config_file = 'config/config.yaml'):
lines = []
with open(config_file,'r') as fh:
for line in fh:
if line.startswith('short_reads:'):
line = f'short_reads: {path}\n'
if line.startswith('prefix:'):
line = f'prefix: {prefix}\n'
lines.append(line)
with open(config_file,'w') as fh_out:
for line in lines:
_ = fh_out.write(line)
def main():
# define all args
parser = argparse.ArgumentParser()
parser.add_argument(
'--path','-p',type=str,
help='''Provide a path to the raw data to process with the pipeline. This path will be added to config/config.yaml, and
a config/samples.csv file will be created from the directory contents. Files to process must end in _R1.fastq.gz or _R2.fastq.gz''',
default=None,required=True
)
parser.add_argument(
'--prefix','-f',type=str,
help='''Provide a name for this batch. This will be added to config/config.yaml.''',
default=None,required=True
)
args = parser.parse_args()
if not os.path.isdir(args.path):
print(f'Could not locate directory at {args.path}')
quit(1)
# ensure the provided path does not end with '/'
if args.path.endswith('/'):
args.path = args.path[:-1]
copy_gm_key()
make_samples_csv(args.path)
add_path_to_config(args.path,args.prefix)
if __name__ == "__main__":
main()