Skip to content

Commit 39b915a

Browse files
committed
create corr/plots for all metrics, increase log readability, close matplotlib figures
1 parent 684d79d commit 39b915a

File tree

4 files changed

+44
-30
lines changed

4 files changed

+44
-30
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
.idea
33
/venv
44
__pycache__
5+
.pytest_cache
56

67
# Project-specific
78
/analysis/output

analysis/BehavioralSubjective.py

Lines changed: 26 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,11 @@ def plot_correlation_correctness(df, metric):
5454
plt.xlabel(metric, color=color)
5555

5656
corr = df[metric].corr(df['Correct'], method='kendall')
57-
print('Kendall corr:', corr)
57+
print('Metric: ' + metric + ' ~ Correctness')
58+
print('-> Kendall corr:', corr)
5859

5960
slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(df[metric], df['ResponseTime'])
60-
print('r squared:', r_value**2)
61+
print('-> r squared:', r_value**2)
6162

6263
left, right = plt.xlim()
6364
ax1.text(left+((right-left)/40), 14, 'Kendall τ: ' + format(corr, '.2f'), fontdict=graph_label)
@@ -67,6 +68,7 @@ def plot_correlation_correctness(df, metric):
6768
plt.tight_layout()
6869

6970
plt.savefig(ROOT_DIR + '/analysis/output/' + metric + '_Correctness.pdf', dpi=300, bbox_inches='tight', pad_inches=0)
71+
plt.close(plt.gcf())
7072

7173

7274
def plot_correlation_responsetime(df, metric):
@@ -87,10 +89,11 @@ def plot_correlation_responsetime(df, metric):
8789
plt.xlabel("")
8890

8991
corr = df[metric].corr(df['ResponseTime'], method='kendall')
90-
print('Kendall corr:', corr)
92+
print('Metric: ' + metric + ' ~ ResponseTime')
93+
print('-> Kendall corr:', corr)
9194

9295
slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(df[metric], df['ResponseTime'])
93-
print('r squared:', r_value**2)
96+
print('-> r squared:', r_value**2)
9497

9598
left, right = plt.xlim()
9699
ax1.text(left+((right-left)/40), 8, 'Kendall τ: ' + format(corr, '.2f'), fontdict=graph_label)
@@ -100,6 +103,7 @@ def plot_correlation_responsetime(df, metric):
100103
plt.tight_layout()
101104

102105
plt.savefig(ROOT_DIR + '/analysis/output/' + metric + '_ResponseTime.pdf', dpi=300, bbox_inches='tight', pad_inches=0)
106+
plt.close(plt.gcf())
103107

104108

105109
def plot_correlation_subjcomplexity_metrics(df, metric):
@@ -120,10 +124,11 @@ def plot_correlation_subjcomplexity_metrics(df, metric):
120124
plt.xlabel(metric, color=color)
121125

122126
corr = df[metric].corr(df['subj_complexity'], method='kendall')
123-
print('Kendall corr:', corr)
127+
print('Metric: ' + metric + ' ~ SubjComplexity')
128+
print('-> Kendall corr:', corr)
124129

125130
slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(df[metric], df['subj_complexity'])
126-
print('r squared:', r_value**2)
131+
print('-> r squared:', r_value**2)
127132

128133
left, right = plt.xlim()
129134
ax1.text(left+((right-left)/40), 10, 'Kendall τ: ' + format(corr, '.2f'), fontdict=graph_label)
@@ -133,6 +138,7 @@ def plot_correlation_subjcomplexity_metrics(df, metric):
133138
plt.tight_layout()
134139

135140
plt.savefig(ROOT_DIR + '/analysis/output/SubjComplexity_' + metric + '.pdf', dpi=300, bbox_inches='tight', pad_inches=0)
141+
plt.close(plt.gcf())
136142

137143

138144
def plot_correlation_subjcomplexity_responsetime(df):
@@ -145,10 +151,11 @@ def plot_correlation_subjcomplexity_responsetime(df):
145151
plt.ylim((0, 61))
146152

147153
corr = df['subj_complexity'].corr(df['ResponseTime'], method='kendall')
148-
print('Kendall corr:', corr)
154+
print('SubjComplexity ~ ResponseTime')
155+
print('-> Kendall corr:', corr)
149156

150157
slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(df['subj_complexity'], df['ResponseTime'])
151-
print('r squared:', r_value**2)
158+
print('-> r squared:', r_value**2)
152159

153160
plt.ylabel("Response Time in sec.")
154161
plt.xlabel("Subjective Complexity Rating")
@@ -161,6 +168,7 @@ def plot_correlation_subjcomplexity_responsetime(df):
161168
plt.tight_layout()
162169

163170
plt.savefig(ROOT_DIR + '/analysis/output/SubjComplexity_ResponseTime.pdf', dpi=300, bbox_inches='tight', pad_inches=0)
171+
plt.close(plt.gcf())
164172

165173

166174
def plot_correlation_subjcomplexity_correctness(df):
@@ -173,10 +181,11 @@ def plot_correlation_subjcomplexity_correctness(df):
173181
plt.ylim((0, 100))
174182

175183
corr = df['subj_complexity'].corr(df['Correct'], method='kendall')
176-
print('Kendall corr:', corr)
184+
print('SubjComplexity ~ Correctness')
185+
print('-> Kendall corr:', corr)
177186

178187
slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(df['subj_complexity'], df['Correct'])
179-
print('r squared:', r_value ** 2)
188+
print('-> r squared:', r_value ** 2)
180189

181190
plt.ylabel("Correct Responses in %")
182191
plt.xlabel("Subjective Complexity Rating")
@@ -189,6 +198,7 @@ def plot_correlation_subjcomplexity_correctness(df):
189198
plt.tight_layout()
190199

191200
plt.savefig(ROOT_DIR + '/analysis/output/SubjComplexity_Correctness.pdf', dpi=300, bbox_inches='tight', pad_inches=0)
201+
plt.close(plt.gcf())
192202

193203

194204
def select_color_for_metric(metric):
@@ -231,16 +241,13 @@ def main():
231241
snippet_correctness = snippet_behavioral.groupby('Snippet').mean()
232242
snippet_correctness["Correct"] = snippet_correctness['Correct'].apply(convert_to_percent)
233243

234-
# create plots for metrics ~ response time
235-
plot_correlation_responsetime(snippet_behavioral, "LOC")
236-
plot_correlation_responsetime(snippet_behavioral, "DepDegree")
237-
plot_correlation_responsetime(snippet_behavioral, "McCabe")
238-
plot_correlation_responsetime(snippet_behavioral, "Halstead")
244+
# create plots for metrics ~ response time & correctness
245+
metrics = ["LOC", "DepDegree", "McCabe", "Halstead"] # for a small run with the four main representatives
246+
metrics = list(snippet_metrics)[2:] # for a full run
239247

240-
plot_correlation_correctness(snippet_correctness, "LOC")
241-
plot_correlation_correctness(snippet_correctness, "DepDegree")
242-
plot_correlation_correctness(snippet_correctness, "McCabe")
243-
plot_correlation_correctness(snippet_correctness, "Halstead")
248+
for metric in metrics:
249+
plot_correlation_responsetime(snippet_behavioral, metric)
250+
plot_correlation_correctness(snippet_correctness, metric)
244251

245252
# correlate with behavioral data
246253
print('\n##### \n correlating subjective complexity with behavioral data')

analysis/BrainActivationAnalysis.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ def plot_ba_subj_rating(df, ba, activation=True, participant=None):
3434
plt.xlabel("Subjective Complexity Rating")
3535

3636
corr = df['subj_complexity'].corr(df[ba], method='kendall')
37+
print('subj_complexity: ~ BA: ' + ba)
3738
print('Kendall corr:', corr)
3839

3940
slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(df['subj_complexity'], df[ba])
@@ -56,6 +57,7 @@ def plot_ba_subj_rating(df, ba, activation=True, participant=None):
5657
prefix += participant + '_'
5758

5859
plt.savefig(prefix + ba + '.pdf', dpi=300, bbox_inches='tight', pad_inches=0)
60+
plt.clf()
5961

6062

6163
def plot_ba_for_metric(df, metric, ba, activation=True):
@@ -89,10 +91,11 @@ def plot_ba_for_metric(df, metric, ba, activation=True):
8991
plt.xlabel("")
9092

9193
corr = df[metric].corr(df[ba], method='kendall')
92-
print('Kendall corr:', corr)
94+
print('Metric: ' + metric + ' ~ BA: ' + ba)
95+
print('-> Kendall corr:', corr)
9396

9497
slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(df[metric], df[ba])
95-
print('r squared:', r_value ** 2)
98+
print('-> r squared:', r_value ** 2)
9699

97100
axes = plt.gca()
98101
if activation:
@@ -118,6 +121,8 @@ def plot_ba_for_metric(df, metric, ba, activation=True):
118121
prefix = ROOT_DIR + '/analysis/output/deactivation_'
119122

120123
plt.savefig(prefix + metric + '_' + ba + '.pdf', dpi=300, bbox_inches='tight', pad_inches=0)
124+
plt.clf()
125+
plt.close(fig)
121126

122127

123128
def get_bas(activation):
@@ -130,15 +135,16 @@ def get_bas(activation):
130135
return bas
131136

132137

133-
def create_plots(df, activation=True):
138+
def create_plots(df, snippet_metrics, activation=True):
134139
bas = get_bas(activation)
135140

136141
# plot the stats
137142
for ba in bas:
138-
plot_ba_for_metric(df, 'LOC', ba, activation)
139-
plot_ba_for_metric(df, 'DepDegree', ba, activation)
140-
plot_ba_for_metric(df, 'McCabe', ba, activation)
141-
plot_ba_for_metric(df, 'Halstead', ba, activation)
143+
metrics = ["LOC", "DepDegree", "McCabe", "Halstead"] # for a small run with the four main representatives
144+
metrics = list(snippet_metrics)[2:] # for a full run
145+
146+
for metric in metrics:
147+
plot_ba_for_metric(df, metric, ba, activation)
142148

143149

144150
def compute_statistics(df, activation=True):
@@ -183,8 +189,8 @@ def main():
183189
df_ba_cond_act = pd.merge(df_ba_cond_act, snippet_metrics, how='left', left_on=['condition'], right_on=['Snippet'])
184190
df_ba_cond_deact = pd.merge(df_ba_cond_deact, snippet_metrics, how='left', left_on=['condition'], right_on=['Snippet'])
185191

186-
create_plots(df_ba_cond_act, True)
187-
create_plots(df_ba_cond_deact, False)
192+
create_plots(df_ba_cond_act, snippet_metrics, True)
193+
create_plots(df_ba_cond_deact, snippet_metrics, False)
188194

189195
compute_statistics(df_ba_cond_act, True)
190196
compute_statistics(df_ba_cond_deact, False)

tests/test_pipeline.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def test_behavioral_subjective(self):
2828

2929
BehavioralSubjective.main()
3030
file_count_end = get_number_of_files_in_output()
31-
self.assertEqual(15, file_count_end) # pipeline should create 14 files + .gitkeep
31+
self.assertEqual(75, file_count_end) # pipeline should create 14 files + .gitkeep
3232

3333
def test_behavioral_brain(self):
3434
empty_output_dir()
@@ -48,7 +48,7 @@ def test_brain_activation(self):
4848

4949
BrainActivationAnalysis.main()
5050
file_count_end = get_number_of_files_in_output()
51-
self.assertEqual(31, file_count_end) # pipeline should create 30 files + .gitkeep
51+
self.assertEqual(211, file_count_end) # pipeline should create 30 files + .gitkeep
5252

5353

5454
if __name__ == '__main__':

0 commit comments

Comments
 (0)