-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvalidator.py
More file actions
137 lines (113 loc) · 4.1 KB
/
validator.py
File metadata and controls
137 lines (113 loc) · 4.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Tue Jul 9 14:22:44 2019
@author: lu
"""
# This code is used to analyze the connection, difference corelation of two datasets
import scipy.stats as stats
import scipy.stats as ss
import matplotlib.pyplot as plt
import numpy as np
import random
import math
from numbers import Number
#testing functions
#random.seed(0)
#list1=random.sample(range(100), 100)
#list2=random.sample(range(100), 100)
def testCorelation(x,y,corelationFunction):
print ("this changed")
if "pearson" in corelationFunction:
print ("p value %f"%stats.pearsonr(x, y)[1])
return stats.pearsonr(x, y)[0]
elif "spearman" in corelationFunction:
return stats.spearmanr(x,y,nan_policy="omit")[0]
elif "kendall" in corelationFunction:
return stats.kendalltau(x,y,nan_policy="omit")[0]
def plotCorelation(x,y,xLabel="list1",yLabel="list2",logScale="no",showCorelation="yes"):
plt.figure(dpi=300)
if(logScale=="yes"):
x=logify(x)
y=logify(y)
if "yes" in showCorelation:
plt.text(0.1,0.68,"R =: %0.4f"%(testCorelation(x,y,"pearson")))
x=np.array(x)
y=np.array(y)
plt.xlabel(xLabel)
plt.ylabel(yLabel)
b=estimate_coef(x,y)
y_pred=b[0]+b[1]*x
plt.plot(x, y_pred, color = "g",linewidth=1)
plt.scatter(x,y,s=0.5)
plt.show()
def testTopAndBotAgreeMent(x,y):
listSize=len(x)
print ("Total genes samples %d"%listSize)
selectPercentList=[0.1,0.5,1,5,10,15,20,25]
disagreeIndexList=[]
for selectPercentage in selectPercentList:
disagreeIndexList=[]
botSelectCutOff=int(listSize*0.01*selectPercentage)
topSelectCutOff=int(listSize*0.01*(100-selectPercentage))
list1=x
list2=y
rankList1=ss.rankdata(list1,method="min")
rankList2=ss.rankdata(list2,method="min")
# options are: min, max,averaage, dense,ordinal
# print rankList1
# print rankList2
selectRange=int(listSize*0.01*selectPercentage)
if selectRange==0:
print ("provided list is too small")
selectRange=0.000000000001
print ("Selected Samples: %d"%(selectRange))
cnt=0.0
# print "Top cutoff: %d"%topSelectCutOff
for i in range(listSize):
if rankList1[i]>topSelectCutOff and rankList2[i]>topSelectCutOff:
cnt+=1
percentage=cnt/selectRange*100
print ("Top %s%%: Intersect: %d (%0.2f%%)"%(selectPercentage,cnt,percentage))
cnt=0.0
# print "Bot cutoff: %d"%botSelectCutOff
for i in range(listSize):
if rankList1[i]<botSelectCutOff and rankList2[i]<botSelectCutOff:
cnt+=1
else:
disagreeIndexList.append(i)
percentage=cnt/selectRange*100
print ("Bot %s%%: Intersect: %d (%0.2f%%)"%(selectPercentage,cnt,percentage))
print()
print ("Used min as the ranking Method (allows ties), ranking list [0,2,3,2] will give [1,2,4,2]")
def validate(x,y,corelationFunction="pearson",logScale="no",xLabel="x",yLabel="y",showCorelation="yes"):
if(logScale=="yes"):
x=logify(x)
y=logify(y)
# testTopAndBotAgreeMent(x,y)
print ("corelation is: %s "%testCorelation(x,y,corelationFunction))
plotCorelation(x,y,xLabel=xLabel,yLabel=yLabel)
#====================private helper functions
def estimate_coef(x, y):
# number of observations/points
n = np.size(x)
# mean of x and y vector
m_x, m_y = np.mean(x), np.mean(y)
# calculating cross-deviation and deviation about x
SS_xy = np.sum(y*x) - n*m_y*m_x
SS_xx = np.sum(x*x) - n*m_x*m_x
# calculating regression coefficients
b_1 = SS_xy / SS_xx
b_0 = m_y - b_1*m_x
return(b_0, b_1)
def logify(myList):
# print (myList)
logList=[]
for i in myList:
if i<=0:
i=0.0001# treat 0 as a really small number so log of it makes sense
if not isinstance(i, Number):
i=0.0001
logList.append(math.log(float(i)))
# print (logList)
return logList