-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathselective-sweep-vcftools.py
More file actions
51 lines (39 loc) · 2.67 KB
/
selective-sweep-vcftools.py
File metadata and controls
51 lines (39 loc) · 2.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import argparse
import pandas as pd
# Parse command line arguments
parser = argparse.ArgumentParser()
parser.add_argument('-ipiA', '--input_piA', help='Input population A pi file path')
parser.add_argument('-ipiB', '--input_piB', help='Input population B file path')
parser.add_argument('-ifst', '--input_fst', help='Input Fst file path')
parser.add_argument('-o1', '--output_file1', help='Output file path for merged dataframe')
parser.add_argument('-o2', '--output_file2', help='Output file path for populationA selected regions')
parser.add_argument('-o3', '--output_file3', help='Output file path for populationB selected regions')
args = parser.parse_args()
# Read the piA file generated by vcftools
df_piA = pd.read_csv(args.input_piA, sep='\t', index_col=False, header=1, names=['CHROM', 'BIN_START','BIN_END', 'N_VARIANTS', 'PIA'])
# Read the piB file generated by vcftools
df_piB = pd.read_csv(args.input_piB, sep='\t', index_col=False, header=1, names=['CHROM', 'BIN_START','BIN_END', 'N_VARIANTS', 'PIB'])
# Read the Fst file generated by vcftools
df_fst = pd.read_csv(args.input_fst, sep='\t', index_col=False, header=1, names=['CHROM', 'BIN_START','BIN_END', 'WEIGHTED_FST'])
# Merge the piA, piB, and Fst values based on matching columns
merged_df = pd.merge(df_piA, df_piB, on=['CHROM', 'BIN_START'], how='inner')
merged_df = pd.merge(merged_df, df_fst, on=['CHROM', 'BIN_START'], how='inner')
# Calculate the pi ratio
merged_df['pi_Ratio'] = merged_df['PIA'] / merged_df['PIB']
# Rearrange the columns in the merged dataframe
merged_df = merged_df[['CHROM', 'BIN_START','BIN_END', 'WEIGHTED_FST', 'PIA', 'PIB', 'pi_Ratio']]
# Write the merged dataframe to the output file
merged_df.to_csv(args.output_file1, sep='\t', index=False)
# Filter the dataframe based on top 5% WEIGHTED_FST and top 5% pi_Ratio
top_fst_threshold = merged_df['WEIGHTED_FST'].quantile(0.95)
top_pi_ratio_threshold = merged_df['pi_Ratio'].quantile(0.95)
filteredB_df = merged_df[(merged_df['WEIGHTED_FST'] >= top_fst_threshold) & (merged_df['pi_Ratio'] >= top_pi_ratio_threshold)]
# Filter the dataframe based on top 5% WEIGHTED_FST and low 5% pi_Ratio
low_pi_ratio_threshold = merged_df['pi_Ratio'].quantile(0.05)
filteredA_df = merged_df[(merged_df['WEIGHTED_FST'] >= top_fst_threshold) & (merged_df['pi_Ratio'] <= low_pi_ratio_threshold)]
# Write the filtered dataframe to the output file
filteredA_df.to_csv(args.output_file2, sep='\t', index=False)
filteredB_df.to_csv(args.output_file3, sep='\t', index=False)
print("Top 5% FST threshold value:", top_fst_threshold)
print("Low 5% pi_Ratio threshold value(for populationA):", low_pi_ratio_threshold)
print("Top 5% pi_Ratio threshold value(for populationB):", top_pi_ratio_threshold)