-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmain_EventAnalysis_BarPlot_Group.py
More file actions
executable file
·331 lines (266 loc) · 13.4 KB
/
main_EventAnalysis_BarPlot_Group.py
File metadata and controls
executable file
·331 lines (266 loc) · 13.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import os
import glob
# Set the style for plots
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_context("talk")
# Define base directories
base_timing_dir = "/home/jgopal/NAS/SEEG-Smile-Events/"
base_au_dir = "/home/jgopal/NAS/Analysis/outputs_EventAnalysis/"
output_dir = '/home/jgopal/NAS/Analysis/AudioFacialEEG/Results_May_2025/'
# Create output directory if it doesn't exist
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# Find all patient timing files
timing_files = glob.glob(f"{base_timing_dir}/*_timing.csv")
patient_codes = [os.path.basename(f).split('_timing')[0] for f in timing_files]
print(f"Found {len(patient_codes)} patients: {patient_codes}")
# Initialize dictionaries to store aggregated results across all patients
all_patients_results = {
"binarized_avg": {"smile": {}, "neutral": {}},
"threshold_count": {"smile": {}, "neutral": {}},
"raw_avg": {"smile": {}, "neutral": {}}
}
# Process each patient
for patient_code in patient_codes:
print(f"\n===== Processing patient: {patient_code} =====")
# Define file paths for this patient
timing_file = f"{base_timing_dir}/{patient_code}_timing.csv"
au_file = f"{base_au_dir}/combined_events_{patient_code}.csv"
# Check if both files exist
if not os.path.exists(timing_file) or not os.path.exists(au_file):
print(f"Skipping patient {patient_code} - missing files")
continue
# Read the data files
try:
timing_df = pd.read_csv(timing_file)
au_df = pd.read_csv(au_file)
print(f"Successfully loaded data files")
print(f"Timing data shape: {timing_df.shape}")
print(f"AU data shape: {au_df.shape}")
except Exception as e:
print(f"Error loading data files for {patient_code}: {e}")
continue
# Filter timing data for Happiness and Neutral events only
timing_filtered = timing_df[timing_df['EventType'].isin(['Happiness', 'Neutral'])]
print(f"Filtered timing data shape: {timing_filtered.shape}")
# Get list of AU columns (only include those that start with 'AU' but not 'AUL' or 'AUR')
au_columns = [col for col in au_df.columns if col.startswith('AU') and not col.startswith(('AUL', 'AUR'))]
if not au_columns:
print(f"No AU columns found for patient {patient_code}")
continue
print(f"Detected {len(au_columns)} AU columns")
# Initialize patient-specific results
patient_results = {
"binarized_avg": {"smile": {}, "neutral": {}},
"threshold_count": {"smile": {}, "neutral": {}},
"raw_avg": {"smile": {}, "neutral": {}}
}
# Group the data by clip type (smile vs neutral)
smile_clips = timing_filtered[timing_filtered['EventType'] == 'Happiness']['ClipName'].tolist()
neutral_clips = timing_filtered[timing_filtered['EventType'] == 'Neutral']['ClipName'].tolist()
print(f"Found {len(smile_clips)} smile clips and {len(neutral_clips)} neutral clips")
# Function to process clips of a specific type
def process_clips(clip_list, clip_type):
clip_au_data = []
for clip in clip_list:
# Find corresponding rows in AU dataframe
clip_data = au_df[au_df['Clip Name'] == clip]
if len(clip_data) > 0:
# Extract AU values for this clip
clip_au_values = clip_data[au_columns]
# Calculate different metrics
# 1. Average then binarize
clip_avg = clip_au_values.mean()
clip_binarized = (clip_avg >= 0.4).astype(int)
# 2. Count frames above threshold
above_threshold = (clip_au_values >= 0.4).sum()
# Store in results
for au in au_columns:
# Binarized average
if au not in patient_results["binarized_avg"][clip_type]:
patient_results["binarized_avg"][clip_type][au] = []
patient_results["binarized_avg"][clip_type][au].append(clip_binarized[au])
# Threshold count
if au not in patient_results["threshold_count"][clip_type]:
patient_results["threshold_count"][clip_type][au] = []
patient_results["threshold_count"][clip_type][au].append(above_threshold[au])
# Raw average
if au not in patient_results["raw_avg"][clip_type]:
patient_results["raw_avg"][clip_type][au] = []
patient_results["raw_avg"][clip_type][au].append(clip_avg[au])
# Collect all AU data for this clip type
clip_au_data.append(clip_au_values)
else:
print(f"Warning: No AU data found for clip {clip}")
# Combine all frame-level data for this clip type
if clip_au_data:
all_frames = pd.concat(clip_au_data)
return all_frames
return None
# Process all clips for this patient
smile_frames = process_clips(smile_clips, "smile")
neutral_frames = process_clips(neutral_clips, "neutral")
if smile_frames is not None and neutral_frames is not None:
print(f"Collected {len(smile_frames)} frames for smile clips")
print(f"Collected {len(neutral_frames)} frames for neutral clips")
else:
print(f"Warning: One or both expression types have no data for patient {patient_code}")
continue
# Add patient results to aggregated results
for data_type in ["binarized_avg", "threshold_count", "raw_avg"]:
for expression in ["smile", "neutral"]:
for au in au_columns:
if au not in patient_results[data_type][expression]:
continue
# Initialize if this AU hasn't been seen before
if au not in all_patients_results[data_type][expression]:
all_patients_results[data_type][expression][au] = []
# Add this patient's average for this AU to the aggregated results
au_value = np.mean(patient_results[data_type][expression][au])
all_patients_results[data_type][expression][au].append(au_value)
# Create a list of all unique AUs across all patients
all_aus = set()
for data_type in ["binarized_avg"]: # Only focus on binarized_avg as requested
for expression in ["smile", "neutral"]:
all_aus.update(all_patients_results[data_type][expression].keys())
all_aus = sorted(list(all_aus))
print(f"\nFound {len(all_aus)} unique Action Units across all patients")
# Function to perform t-tests and create aggregated bar plot
def analyze_and_plot_aggregated(data_type="binarized_avg"):
smile_data = all_patients_results[data_type]["smile"]
neutral_data = all_patients_results[data_type]["neutral"]
# Check if we have data to analyze
if not smile_data or not neutral_data:
print("No data to analyze. Exiting.")
return
# Get common AUs that have data for both smile and neutral
common_aus = [au for au in all_aus if au in smile_data and au in neutral_data]
if not common_aus:
print("No common AUs found between smile and neutral conditions.")
return
print(f"Analyzing {len(common_aus)} common AUs")
# Prepare data for plotting
mean_smile = {au: np.mean(smile_data[au]) for au in common_aus}
mean_neutral = {au: np.mean(neutral_data[au]) for au in common_aus}
sem_smile = {au: stats.sem(smile_data[au]) for au in common_aus}
sem_neutral = {au: stats.sem(neutral_data[au]) for au in common_aus}
# Perform t-tests
pvalues = {}
significant_levels = {}
significant_smile = []
significant_neutral = []
for au in common_aus:
# Some patients might not have data for all AUs
if len(smile_data[au]) < 2 or len(neutral_data[au]) < 2:
print(f"Skipping {au} due to insufficient data")
pvalues[au] = 1.0
significant_levels[au] = ''
continue
t_stat, p_val = stats.ttest_ind(smile_data[au], neutral_data[au], equal_var=False)
pvalues[au] = p_val
# Determine significance level
if p_val < 0.05:
significant_levels[au] = '*'
else:
significant_levels[au] = ''
if p_val < 0.05:
if mean_smile[au] > mean_neutral[au]:
significant_smile.append(au)
else:
significant_neutral.append(au)
# Sort AUs by significance and difference magnitude
sorted_aus = sorted(common_aus,
key=lambda au: (pvalues[au] < 0.05, abs(mean_smile[au] - mean_neutral[au])),
reverse=True)
# --- Save all the bar plot data and metadata to CSV for external plotting ---
export_rows = []
for i, au in enumerate(sorted_aus):
export_rows.append({
"AU": au,
"Smile_Mean": mean_smile[au],
"Smile_SEM": sem_smile[au],
"Neutral_Mean": mean_neutral[au],
"Neutral_SEM": sem_neutral[au],
"p_value": pvalues[au],
"Significant": significant_levels[au],
"Significant_Higher": (
"Smile" if au in significant_smile else ("Neutral" if au in significant_neutral else "")
),
"Bar_Index": i
})
# Add plot-level metadata as a separate row (or could be a separate file, but here as extra columns)
plot_metadata = {
"Plot_Title": f"Facial Action Unit Presence: Smile vs. Neutral (Aggregated Across {len(patient_codes)} Patients)",
"X_Label": "Facial Action Units",
"Y_Label": "Proportion of Clips (Averaged Across Patients)" if data_type == "binarized_avg" else "",
"Legend_Smile": "Smile",
"Legend_Neutral": "Neutral",
"Legend_Significance": "p < 0.05",
"Num_Patients": len(patient_codes),
"Data_Type": data_type
}
# Add metadata to every row for convenience
for row in export_rows:
row.update(plot_metadata)
export_df = pd.DataFrame(export_rows)
csv_filename = f"{output_dir}/AU_binarized_presence_ALL_PATIENTS_plotdata.csv"
export_df.to_csv(csv_filename, index=False)
print(f"Bar plot data and metadata saved to: {csv_filename}")
# --- End CSV export ---
# Create the plot
fig, ax = plt.subplots(figsize=(16, 10))
x = np.arange(len(sorted_aus))
width = 0.35
# Plot bars
smile_bars = ax.bar(x - width/2, [mean_smile[au] for au in sorted_aus], width,
label='Smile', color='#FFA500',
yerr=[sem_smile[au] for au in sorted_aus], capsize=5)
neutral_bars = ax.bar(x + width/2, [mean_neutral[au] for au in sorted_aus], width,
label='Neutral', color='#4682B4',
yerr=[sem_neutral[au] for au in sorted_aus], capsize=5)
# Add significance markers
for i, au in enumerate(sorted_aus):
if significant_levels[au]:
height = max(mean_smile[au], mean_neutral[au]) + max(sem_smile[au], sem_neutral[au]) + 0.02
ax.text(i, height, significant_levels[au], ha='center', va='bottom', fontsize=14)
# Customize plot
ax.set_xlabel('Facial Action Units', fontsize=14, fontweight='bold')
if data_type == "binarized_avg":
ax.set_ylabel('Proportion of Clips (Averaged Across Patients)', fontsize=14, fontweight='bold')
ax.set_title(f'Facial Action Unit Presence: Smile vs. Neutral\nAggregated Across {len(patient_codes)} Patients',
fontsize=16, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(sorted_aus, rotation=45, ha='right')
# Create custom legend with significance levels
from matplotlib.lines import Line2D
legend_elements = [
Line2D([0], [0], color='#FFA500', lw=4, label='Smile'),
Line2D([0], [0], color='#4682B4', lw=4, label='Neutral'),
Line2D([0], [0], marker='*', color='w', markerfacecolor='k', markersize=10, label='p < 0.05', linestyle='None')
]
first_legend = ax.legend(handles=legend_elements, loc='upper right', fontsize=12)
# Add the first legend
ax.add_artist(first_legend)
# Add grid lines for readability
ax.yaxis.grid(True, linestyle='--', alpha=0.7)
# Add padding to avoid text overlap
plt.tight_layout(pad=2.0)
# Save the plot
plot_filename = f"{output_dir}/AU_binarized_presence_ALL_PATIENTS.png"
plt.savefig(plot_filename, dpi=300, bbox_inches='tight')
print(f"Plot saved as: {plot_filename}")
# Show the plot
plt.show()
# Print significant AUs
print(f"\n--- Significant differences for {data_type} across all patients ---")
print(f"AUs significantly higher in Smile: {', '.join(significant_smile)}")
print(f"AUs significantly higher in Neutral: {', '.join(significant_neutral)}")
return significant_smile, significant_neutral
# Perform analysis for binarized averages across all patients
print(f"\n=== AGGREGATED ANALYSIS: BINARIZED AVERAGES ACROSS ALL PATIENTS ===")
sig_smile_bin, sig_neutral_bin = analyze_and_plot_aggregated("binarized_avg")