-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdataAnalysis.py
More file actions
135 lines (114 loc) · 4.91 KB
/
dataAnalysis.py
File metadata and controls
135 lines (114 loc) · 4.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import os
import pandas as pd
from collections import defaultdict
# 全局记录器
project_status = defaultdict(lambda: {'tasks': set(), 'errors': defaultdict(set)})
SPECIAL_EXCEPTION = "java.lang.RuntimeException: Main-class has no main method!"
SOOT_REASON = "java.lang.RuntimeException: Failed to convert"
NO_REACHABLE = "no reachable"
def parse_exception(line):
"""解析异常行并返回标准分类"""
if line == "no reachable":
return "no reachable"
# 定义状态标记
IN_THREAD = 0 # 正在解析线程名
IN_CLASS = 1 # 正在解析异常类
IN_REASON = 2 # 正在解析原因
AFTER_COLON = 3 # 遇到冒号后的状态
state = IN_THREAD
buffer = []
exception_class = ""
reason = ""
quote_count = 0 # 引号计数器(网页6的字符统计方案)
for i, char in enumerate(line):
# 状态机逻辑(网页5的状态转换方案)
if state == IN_THREAD:
if char == '"':
quote_count += 1
if quote_count == 2: # 结束线程名解析
state = IN_CLASS
continue
if state == IN_CLASS:
if char == ':':
exception_class = ''.join(buffer).strip()
buffer = []
state = AFTER_COLON
elif char == '<' : # 类名结束条件
exception_class = ''.join(buffer).strip()
break
elif i == len(line)-1 :
buffer.append(char)
exception_class = ''.join(buffer).strip()
break
else:
buffer.append(char)
if state == AFTER_COLON:
if char == '<' : # 原因结束条件
reason = ''.join(buffer).strip()
break
elif i == len(line)-1 :
buffer.append(char)
reason = ''.join(buffer).strip()
break
buffer.append(char)
print(f"{exception_class}{reason}")
# 后处理逻辑
if not reason and exception_class:
return exception_class
return f"{exception_class}{reason}" if reason else exception_class
def update_project_status(task_name, project, exceptions):
"""记录项目在各任务中的异常状态"""
project_status[project]['tasks'].add(task_name)
if exceptions:
for e in set(exceptions):
project_status[project]['errors'][task_name].add(e)
else:
project_status[project]['errors'][task_name].add("Success")
def analyze_tasks(task_list, root_dir="."):
# 初始化特殊记录
special_runtime = set()
success_projects = set(os.listdir("."))
bad_projects = set(os.listdir(".")) # 初始包含所有项目 # 初始包含所有项目
exclude_dirs = ['.lib', '.idea','.task_summary','.reports','.git']
for strs in exclude_dirs:
success_projects.remove(strs)
bad_projects.remove(strs)
for task in task_list:
csv_path = os.path.join(root_dir, f"{task}_summary.csv")
if not os.path.exists(csv_path):
print(f"警告:未找到任务文件 {csv_path}")
continue
df = pd.read_csv(csv_path)
# 分类存储结构
category_dict = defaultdict(list)
for _, row in df.iterrows():
# 解析异常分类
if row['异常信息'] != '无':
exceptions = [parse_exception(e) for e in row['异常信息'].split(';')]
categories = [e for e in exceptions if e]
# 记录特殊异常
if SPECIAL_EXCEPTION in categories or NO_REACHABLE in categories or SOOT_REASON in categories:
special_runtime.add(row['项目名称'])
else:
categories = ["Success"]
# 更新分类
for cat in set(categories):
category_dict[cat].append(row['项目名称'])
# 更新全局状态
update_project_status(task, row['项目名称'], categories)
# 生成分类CSV
output_df = pd.DataFrame([
{'分类':k, '项目列表':','.join(v)}
for k,v in category_dict.items()
])
output_df.to_csv(os.path.join(root_dir, f"{task}_category.csv"), index=False)
# 更新全成功项目
success_projects &= set(df[df['运行状态']=='成功']['项目名称'])
bad_projects ^= success_projects
# 生成特殊报告
pd.DataFrame({'未指定正确入口': list(special_runtime)}).to_csv(os.path.join(root_dir, "special_runtime.csv"), index=False)
pd.DataFrame({'全成功项目': list(success_projects)}).to_csv(os.path.join(root_dir, "all_success.csv"), index=False)
pd.DataFrame({'缺失库项目': list(bad_projects)}).to_csv(os.path.join(root_dir, "lack_deps.csv"), index=False)
if __name__ == "__main__":
task_list = ["cha"] # 用户提供的任务列表
analyze_tasks(task_list, root_dir=".reports")