Skip to content

Commit 25a9973

Browse files
[Tracing] Emit grouping events (#4925)
Instrumenting grouper code in order to emit grouping events. These events are only emitted for the testcases being moved to another group (or to a newly formed one). I.e., when a pair of testcases is considered similar: * If both testcases are not grouped yet, an event is emitted for each one. * If only one testcase is already assigned to a group, the event is emitted for the testcase being added to the group. * If both testcases are in groups, an event is emitted for the one being moved to the other group. Then for all the remaining testcases that are also moved due to the group merge. There is also the case of a testcase being ungrouped because its group only contains one testcase. This also triggers an event since it is moving the testcase to an ungrouped state. Rational at: b/394051778
1 parent ff37bda commit 25a9973

File tree

4 files changed

+240
-12
lines changed

4 files changed

+240
-12
lines changed

src/clusterfuzz/_internal/cron/grouper.py

Lines changed: 57 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,30 @@ def get_metadata(self, key=None, default=None):
7373
return getattr(self, key, default)
7474

7575

76+
def _emit_grouping_event(moved_testcase: int,
77+
new_group_id: int,
78+
prev_group_id: int,
79+
similar_testcase: int | None,
80+
reason: str,
81+
group_merge: bool = False):
82+
"""Helper for emitting a testcase grouping event."""
83+
# If this is due to a group merge, we have to use the grouping reason as the
84+
# reason for the merge itself.
85+
group_merge_reason = None
86+
if group_merge:
87+
group_merge_reason = reason
88+
reason = events.GroupingReason.GROUP_MERGE
89+
90+
events.emit(
91+
events.TestcaseGroupingEvent(
92+
testcase_id=moved_testcase,
93+
group_id=new_group_id,
94+
previous_group_id=prev_group_id,
95+
similar_testcase_id=similar_testcase,
96+
grouping_reason=reason,
97+
group_merge_reason=group_merge_reason))
98+
99+
76100
def combine_testcases_into_group(
77101
testcase_1: TestcaseAttributes, testcase_2: TestcaseAttributes,
78102
testcase_map: dict[int, TestcaseAttributes], reason: str) -> None:
@@ -93,26 +117,49 @@ def combine_testcases_into_group(
93117
new_group_id = _get_new_group_id()
94118
testcase_1.group_id = new_group_id
95119
testcase_2.group_id = new_group_id
120+
# Both testcases are moved, so emit an event for each.
121+
_emit_grouping_event(testcase_1.id, new_group_id, 0, testcase_2.id, reason)
122+
_emit_grouping_event(testcase_2.id, new_group_id, 0, testcase_1.id, reason)
96123
return
97124

98125
# If one of the testcase has a group id, then assign the other to reuse that
99126
# group id.
100127
if testcase_1.group_id and not testcase_2.group_id:
101128
testcase_2.group_id = testcase_1.group_id
129+
# Only emit event for moved testcase_2.
130+
_emit_grouping_event(testcase_2.id, testcase_1.group_id, 0, testcase_1.id,
131+
reason)
102132
return
133+
103134
if testcase_2.group_id and not testcase_1.group_id:
104135
testcase_1.group_id = testcase_2.group_id
136+
# Only emit event for moved testcase_1.
137+
_emit_grouping_event(testcase_1.id, testcase_2.group_id, 0, testcase_2.id,
138+
reason)
105139
return
106140

107141
# If both the testcase have their own groups, then just merge the two groups
108142
# together and reuse one of their group ids.
109143
group_id_to_reuse = testcase_1.group_id
110144
group_id_to_move = testcase_2.group_id
145+
# Emit event for testcase from group to be moved.
146+
_emit_grouping_event(testcase_2.id, testcase_1.group_id, testcase_2.group_id,
147+
testcase_1.id, reason)
148+
111149
moved_testcase_ids = []
112150
for testcase in testcase_map.values():
113151
if testcase.group_id == group_id_to_move:
114152
testcase.group_id = group_id_to_reuse
115153
moved_testcase_ids.append(str(testcase.id))
154+
if testcase.id != testcase_2.id:
155+
# Emit event for each testcase moved due to the group merge.
156+
_emit_grouping_event(
157+
testcase.id,
158+
group_id_to_reuse,
159+
group_id_to_move,
160+
testcase_2.id,
161+
reason,
162+
group_merge=True)
116163

117164
logs.info(f'Merged group {group_id_to_move} into {group_id_to_reuse}: ' +
118165
'moved testcases: ' + ', '.join(moved_testcase_ids))
@@ -270,7 +317,7 @@ def _group_testcases_based_on_variants(testcase_map):
270317
continue
271318

272319
combine_testcases_into_group(testcase_1, testcase_2, testcase_map,
273-
'identical variant')
320+
events.GroupingReason.IDENTICAL_VARIANT)
274321

275322

276323
def _group_testcases_with_same_issues(testcase_map):
@@ -301,7 +348,7 @@ def _group_testcases_with_same_issues(testcase_map):
301348
continue
302349

303350
combine_testcases_into_group(testcase_1, testcase_2, testcase_map,
304-
'same issue')
351+
events.GroupingReason.SAME_ISSUE)
305352

306353

307354
def _compare_testcases_crash_states(testcase_1, testcase_2) -> bool:
@@ -362,7 +409,7 @@ def _group_testcases_with_similar_states(testcase_map):
362409
continue
363410

364411
combine_testcases_into_group(testcase_1, testcase_2, testcase_map,
365-
'similar crashes')
412+
events.GroupingReason.SIMILAR_CRASH)
366413

367414

368415
def _has_testcase_with_same_params(testcase, testcase_map):
@@ -603,6 +650,8 @@ def group_testcases():
603650
data_handler.delete_group(updated_group_id, update_testcases=False)
604651
logs.info(
605652
f'Deleted group {updated_group_id} used by only one testcase.')
653+
_emit_grouping_event(testcase_id, 0, updated_group_id, None,
654+
events.GroupingReason.UNGROUPED)
606655
updated_group_id = 0
607656
updated_group_bug_information = 0
608657
updated_is_leader = True
@@ -635,8 +684,11 @@ def group_testcases():
635684

636685
@logs.cron_log_context()
637686
def main():
638-
"""Group testcases (this will be used to run grouper as a standalone cron in
639-
dev/staging environments)."""
687+
"""Group testcases.
688+
689+
This will be useful to run grouper as a standalone cron in dev/staging
690+
environments.
691+
"""
640692
try:
641693
logs.info('Grouping testcases.')
642694
group_testcases()

src/clusterfuzz/_internal/metrics/events.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ class GroupingReason:
106106
SAME_ISSUE = 'same_issue'
107107
IDENTICAL_VARIANT = 'identical_variant'
108108
GROUP_MERGE = 'group_merge'
109+
UNGROUPED = 'ungrouped'
109110

110111

111112
@dataclass(kw_only=True)

src/clusterfuzz/_internal/tests/appengine/handlers/cron/grouper_test.py

Lines changed: 180 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,10 @@ def setUp(self):
3939

4040
helpers.patch(self, [
4141
'clusterfuzz._internal.cron.cleanup.get_top_crashes_for_all_projects_and_platforms',
42+
'clusterfuzz._internal.metrics.events.emit',
43+
'clusterfuzz._internal.metrics.events._get_datetime_now',
4244
])
43-
45+
self.mock._get_datetime_now.return_value = datetime.datetime(2025, 1, 1) # pylint: disable=protected-access
4446
self.mock.get_top_crashes_for_all_projects_and_platforms.return_value = {
4547
'blah': {},
4648
'project1': {
@@ -77,6 +79,7 @@ def test_same_crash_different_security(self):
7779
self.testcases[index] = data_handler.get_testcase_by_id(t.key.id())
7880
self.assertEqual(self.testcases[index].group_id, 0)
7981
self.assertTrue(self.testcases[index].is_leader)
82+
self.mock.emit.assert_not_called()
8083

8184
def test_same_crash_same_security(self):
8285
"""Test that crashes with same crash states and same security flags get
@@ -139,6 +142,7 @@ def test_different_crash_same_security(self):
139142
self.testcases[index] = data_handler.get_testcase_by_id(t.key.id())
140143
self.assertEqual(self.testcases[index].group_id, 0)
141144
self.assertTrue(self.testcases[index].is_leader)
145+
self.mock.emit.assert_not_called()
142146

143147
def test_group_of_one(self):
144148
"""Test that a group id with just one testcase gets removed."""
@@ -152,6 +156,15 @@ def test_group_of_one(self):
152156
self.assertEqual(testcase.group_id, 0)
153157
self.assertTrue(testcase.is_leader)
154158

159+
self.mock.emit.assert_called_once_with(
160+
events.TestcaseGroupingEvent(
161+
testcase_id=testcase.key.id(),
162+
group_id=0,
163+
previous_group_id=1,
164+
similar_testcase_id=None,
165+
grouping_reason=events.GroupingReason.UNGROUPED,
166+
group_merge_reason=None))
167+
155168
def test_same_unique_crash_type_with_same_state(self):
156169
"""Test that the crashes with same unique crash type and same state get
157170
de-duplicated with one of them removed.."""
@@ -196,6 +209,7 @@ def test_same_unique_crash_type_with_different_state(self):
196209
self.testcases[index] = data_handler.get_testcase_by_id(t.key.id())
197210
self.assertEqual(self.testcases[index].group_id, 0)
198211
self.assertTrue(self.testcases[index].is_leader)
212+
self.mock.emit.assert_not_called()
199213

200214
def test_different_unique_crash_type_with_same_state(self):
201215
"""Test that the crashes with different unique crash type but same state
@@ -316,6 +330,7 @@ def test_top_crasher_for_variant_analysis(self):
316330
for testcase in self.testcases:
317331
self.assertEqual(testcase.group_id, 0)
318332
self.assertTrue(testcase.is_leader)
333+
self.mock.emit.assert_not_called()
319334

320335
def test_same_job_type_for_variant_analysis(self):
321336
"""Tests that testcases with the same job_type don't get grouped together"""
@@ -404,6 +419,23 @@ def test_similar_variants_for_variant_analysis(self):
404419
self.assertEqual(self.testcases[i].group_id, 0)
405420
self.assertTrue(self.testcases[i].is_leader)
406421

422+
self.mock.emit.assert_any_call(
423+
events.TestcaseGroupingEvent(
424+
testcase_id=self.testcases[0].key.id(),
425+
group_id=self.testcases[0].group_id,
426+
previous_group_id=0,
427+
similar_testcase_id=self.testcases[1].key.id(),
428+
grouping_reason=events.GroupingReason.IDENTICAL_VARIANT,
429+
group_merge_reason=None))
430+
self.mock.emit.assert_any_call(
431+
events.TestcaseGroupingEvent(
432+
testcase_id=self.testcases[1].key.id(),
433+
group_id=self.testcases[1].group_id,
434+
previous_group_id=0,
435+
similar_testcase_id=self.testcases[0].key.id(),
436+
grouping_reason=events.GroupingReason.IDENTICAL_VARIANT,
437+
group_merge_reason=None))
438+
407439
def test_similar_but_anomalous_variants_for_variant_analysis(self):
408440
"""Tests that testcases with similar variants but anomalous do not
409441
get deduplicated. Anomalous variant matches with more than threshold
@@ -476,6 +508,7 @@ def test_similar_but_anomalous_variants_for_variant_analysis(self):
476508
self.testcases[index] = data_handler.get_testcase_by_id(t.key.id())
477509
self.assertEqual(self.testcases[index].group_id, 0)
478510
self.assertTrue(self.testcases[index].is_leader)
511+
self.mock.emit.assert_not_called()
479512

480513
def test_no_reproducible_for_variant_analysis(self):
481514
"""Tests that no-reproducible testcases with similar variants do not
@@ -515,6 +548,7 @@ def test_no_reproducible_for_variant_analysis(self):
515548
self.testcases[index] = data_handler.get_testcase_by_id(t.key.id())
516549
self.assertEqual(self.testcases[index].group_id, 0)
517550
self.assertTrue(self.testcases[index].is_leader)
551+
self.mock.emit.assert_not_called()
518552

519553
def test_ignored_crash_type_for_variant_analysis(self):
520554
"""Tests that testcases of ignored crash type with similar variants
@@ -555,6 +589,143 @@ def test_ignored_crash_type_for_variant_analysis(self):
555589
self.assertEqual(self.testcases[index].group_id, 0)
556590
self.assertTrue(self.testcases[index].is_leader)
557591

592+
def test_grouping_event_new_group(self):
593+
"""Test correct grouping event for a newly formed group."""
594+
self.testcases[0].security_flag = True
595+
self.testcases[0].crash_state = 'abcdef'
596+
self.testcases[1].security_flag = True
597+
self.testcases[1].crash_state = 'abcde'
598+
599+
for t in self.testcases:
600+
t.put()
601+
602+
grouper.group_testcases()
603+
604+
for index, t in enumerate(self.testcases):
605+
self.testcases[index] = data_handler.get_testcase_by_id(t.key.id())
606+
607+
# Check testcases 0 and 1 are grouped together.
608+
self.assertNotEqual(self.testcases[0].group_id, 0)
609+
self.assertNotEqual(self.testcases[1].group_id, 0)
610+
self.assertEqual(self.testcases[0].group_id, self.testcases[1].group_id)
611+
612+
self.mock.emit.assert_any_call(
613+
events.TestcaseGroupingEvent(
614+
testcase_id=self.testcases[0].key.id(),
615+
group_id=self.testcases[0].group_id,
616+
previous_group_id=0,
617+
similar_testcase_id=self.testcases[1].key.id(),
618+
grouping_reason=events.GroupingReason.SIMILAR_CRASH,
619+
group_merge_reason=None))
620+
self.mock.emit.assert_any_call(
621+
events.TestcaseGroupingEvent(
622+
testcase_id=self.testcases[1].key.id(),
623+
group_id=self.testcases[1].group_id,
624+
previous_group_id=0,
625+
similar_testcase_id=self.testcases[0].key.id(),
626+
grouping_reason=events.GroupingReason.SIMILAR_CRASH,
627+
group_merge_reason=None))
628+
629+
def test_grouping_event_assign_one_testcase(self):
630+
"""Test correct grouping event for new testcase assigned to a group."""
631+
self.testcases[0].security_flag = True
632+
self.testcases[0].crash_state = 'abcdef'
633+
self.testcases[0].group_id = 1
634+
self.testcases[1].security_flag = True
635+
self.testcases[1].crash_state = 'abcde'
636+
self.testcases[1].group_id = 0
637+
638+
for t in self.testcases:
639+
t.put()
640+
641+
grouper.group_testcases()
642+
643+
for index, t in enumerate(self.testcases):
644+
self.testcases[index] = data_handler.get_testcase_by_id(t.key.id())
645+
646+
# Check testcases 0 and 1 are grouped together.
647+
self.assertEqual(self.testcases[0].group_id, 1)
648+
self.assertEqual(self.testcases[1].group_id, 1)
649+
650+
self.mock.emit.assert_called_once_with(
651+
events.TestcaseGroupingEvent(
652+
testcase_id=self.testcases[1].key.id(),
653+
group_id=1,
654+
previous_group_id=0,
655+
similar_testcase_id=self.testcases[0].key.id(),
656+
grouping_reason=events.GroupingReason.SIMILAR_CRASH,
657+
group_merge_reason=None))
658+
659+
def test_grouping_event_merge_groups(self):
660+
"""Test correct grouping event for merging groups."""
661+
self.testcases[0].security_flag = True
662+
self.testcases[0].crash_state = 'abcdef'
663+
self.testcases[0].group_id = 1
664+
self.testcases[1].security_flag = True
665+
self.testcases[1].crash_state = 'ghijk'
666+
self.testcases[1].group_id = 1
667+
668+
self.testcases.append(test_utils.create_generic_testcase())
669+
self.testcases[2].security_flag = True
670+
self.testcases[2].crash_state = 'abcde'
671+
self.testcases[2].group_id = 2
672+
self.testcases.append(test_utils.create_generic_testcase())
673+
self.testcases[3].security_flag = True
674+
self.testcases[3].crash_state = 'lmnopq'
675+
self.testcases[3].group_id = 2
676+
677+
for t in self.testcases:
678+
t.put()
679+
680+
grouper.group_testcases()
681+
682+
for index, t in enumerate(self.testcases):
683+
self.testcases[index] = data_handler.get_testcase_by_id(t.key.id())
684+
685+
common_group_id = self.testcases[0].group_id
686+
self.assertIn(common_group_id, [1, 2])
687+
# Check testcases are all grouped together.
688+
self.assertEqual(self.testcases[0].group_id, common_group_id)
689+
self.assertEqual(self.testcases[1].group_id, common_group_id)
690+
self.assertEqual(self.testcases[2].group_id, common_group_id)
691+
self.assertEqual(self.testcases[3].group_id, common_group_id)
692+
693+
# Avoid possibly flaky test by ensuring which group was kept during merge.
694+
if common_group_id == 1:
695+
self.mock.emit.assert_any_call(
696+
events.TestcaseGroupingEvent(
697+
testcase_id=self.testcases[2].key.id(),
698+
group_id=1,
699+
previous_group_id=2,
700+
similar_testcase_id=self.testcases[0].key.id(),
701+
grouping_reason=events.GroupingReason.SIMILAR_CRASH,
702+
group_merge_reason=None))
703+
self.mock.emit.assert_any_call(
704+
events.TestcaseGroupingEvent(
705+
testcase_id=self.testcases[3].key.id(),
706+
group_id=1,
707+
previous_group_id=2,
708+
similar_testcase_id=self.testcases[2].key.id(),
709+
grouping_reason=events.GroupingReason.GROUP_MERGE,
710+
group_merge_reason=events.GroupingReason.SIMILAR_CRASH))
711+
else:
712+
self.mock.emit.assert_any_call(
713+
events.TestcaseGroupingEvent(
714+
testcase_id=self.testcases[0].key.id(),
715+
group_id=2,
716+
previous_group_id=1,
717+
similar_testcase_id=self.testcases[2].key.id(),
718+
grouping_reason=events.GroupingReason.SIMILAR_CRASH,
719+
group_merge_reason=None))
720+
self.mock.emit.assert_any_call(
721+
events.TestcaseGroupingEvent(
722+
testcase_id=self.testcases[1].key.id(),
723+
group_id=2,
724+
previous_group_id=1,
725+
similar_testcase_id=self.testcases[0].key.id(),
726+
grouping_reason=events.GroupingReason.GROUP_MERGE,
727+
group_merge_reason=events.GroupingReason.SIMILAR_CRASH))
728+
558729

559730
@test_utils.with_cloud_emulators('datastore')
560731
class GroupExceedMaxTestcasesTest(unittest.TestCase):
@@ -644,8 +815,12 @@ def test_group_overflow_rejection_events(self):
644815

645816
grouper.group_testcases()
646817

647-
self.assertEqual(5, len(self.emitted_events))
818+
# Expected: 30 grouping events + 5 rejection events
819+
self.assertEqual(35, len(self.emitted_events))
820+
count_rejection_events = 0
648821
for event in self.emitted_events:
649-
self.assertEqual(event.rejection_reason,
650-
events.RejectionReason.GROUPER_OVERFLOW)
651-
self.assertEqual(5, self.mock.emit.call_count)
822+
if event.event_type == events.EventTypes.TESTCASE_REJECTION:
823+
self.assertEqual(event.rejection_reason,
824+
events.RejectionReason.GROUPER_OVERFLOW)
825+
count_rejection_events += 1
826+
self.assertEqual(5, count_rejection_events)

0 commit comments

Comments
 (0)