From 7e76f7d5aaad87f1a05594d3f707cbee96b85d20 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 1 Dec 2025 17:18:01 +0800 Subject: [PATCH 1/5] Ignore execution_time regressions when binaries have same hash We can remove a lot of the noise from execution time results by just ignoring any changes when the binary hashes are the same. This implements it by adding a column to the TestSuiteSampleFields table, similar to bigger_is_better, and setting it to true for execution_time. We want to still flag regressions on identical binaries for e.g. compile time. This was my first time poking around the migrations infrastructure. They get automatically applied whenever the server is launched so there shouldn't be any need to manually run any scripts. --- .gitignore | 1 + lnt/server/db/migrations/upgrade_17_to_18.py | 26 ++++++++++++++++++++ lnt/server/db/testsuite.py | 12 ++++++++- lnt/server/reporting/analysis.py | 19 +++++++++++--- schemas/nts.yaml | 1 + tests/server/reporting/analysis.py | 22 +++++++++++++++++ tests/server/ui/test_api.py | 2 ++ 7 files changed, 78 insertions(+), 5 deletions(-) create mode 100644 lnt/server/db/migrations/upgrade_17_to_18.py diff --git a/.gitignore b/.gitignore index efa0c218..ebcc96fb 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ lnt/server/ui/static/docs test_run_tmp tests/**/Output venv +*~ diff --git a/lnt/server/db/migrations/upgrade_17_to_18.py b/lnt/server/db/migrations/upgrade_17_to_18.py new file mode 100644 index 00000000..7fb1578e --- /dev/null +++ b/lnt/server/db/migrations/upgrade_17_to_18.py @@ -0,0 +1,26 @@ +"""Adds a ignore_same_hash column to the sample fields table and sets it to +true for execution_time. + +""" + +from sqlalchemy import Column, Integer, update + +from lnt.server.db.migrations.util import introspect_table +from lnt.server.db.util import add_column + + +def upgrade(engine): + ignore_same_hash = Column("ignore_same_hash", Integer, default=0) + add_column(engine, "TestSuiteSampleFields", ignore_same_hash) + + test_suite_sample_fields = introspect_table(engine, "TestSuiteSampleFields") + set_init_value = update(test_suite_sample_fields).values(ignore_same_hash=0) + set_exec_time = ( + update(test_suite_sample_fields) + .where(test_suite_sample_fields.c.Name == "execution_time") + .values(ignore_same_hash=1) + ) + + with engine.begin() as trans: + trans.execute(set_init_value) + trans.execute(set_exec_time) diff --git a/lnt/server/db/testsuite.py b/lnt/server/db/testsuite.py index 158d439a..af88d410 100644 --- a/lnt/server/db/testsuite.py +++ b/lnt/server/db/testsuite.py @@ -173,6 +173,7 @@ def from_json(data): for index, metric_desc in enumerate(data['metrics']): name = metric_desc['name'] bigger_is_better = metric_desc.get('bigger_is_better', False) + ignore_same_hash = metric_desc.get('ignore_same_hash', False) metric_type_name = metric_desc.get('type', 'Real') display_name = metric_desc.get('display_name') unit = metric_desc.get('unit') @@ -182,8 +183,10 @@ def from_json(data): metric_type_name) metric_type = SampleType(metric_type_name) bigger_is_better_int = 1 if bigger_is_better else 0 + ignore_same_hash_int = 1 if ignore_same_hash else 0 field = SampleField(name, metric_type, index, status_field=None, bigger_is_better=bigger_is_better_int, + ignore_same_hash=ignore_same_hash_int, display_name=display_name, unit=unit, unit_abbrev=unit_abbrev) sample_fields.append(field) @@ -196,6 +199,7 @@ def __json__(self): for sample_field in self.sample_fields: metric = { 'bigger_is_better': (sample_field.bigger_is_better != 0), + 'ignore_same_hash': (sample_field.ignore_same_hash != 0), 'display_name': sample_field.display_name, 'name': sample_field.name, 'type': sample_field.type.name, @@ -340,12 +344,18 @@ class SampleField(FieldMixin, Base): # This assumption can be inverted by setting this column to nonzero. bigger_is_better = Column("bigger_is_better", Integer) + # Some fields like execution_time should ignore changes if the binary hash + # is the same. + ignore_same_hash = Column("ignore_same_hash", Integer, default=0) + def __init__(self, name, type, schema_index, status_field=None, bigger_is_better=0, + ignore_same_hash=0, display_name=None, unit=None, unit_abbrev=None): self.name = name self.type = type self.status_field = status_field self.bigger_is_better = bigger_is_better + self.ignore_same_hash = ignore_same_hash self.display_name = name if display_name is None else display_name self.unit = unit self.unit_abbrev = unit_abbrev @@ -367,7 +377,7 @@ def __repr__(self): def __copy__(self): return SampleField(self.name, self.type, self.schema_index, self.status_field, - self.bigger_is_better, self.display_name, self.unit, + self.bigger_is_better, self.ignore_same_hash, self.display_name, self.unit, self.unit_abbrev) def copy_info(self, other): diff --git a/lnt/server/reporting/analysis.py b/lnt/server/reporting/analysis.py index 74c9c2e9..38b2c399 100644 --- a/lnt/server/reporting/analysis.py +++ b/lnt/server/reporting/analysis.py @@ -54,7 +54,8 @@ class ComparisonResult: def __init__(self, aggregation_fn, cur_failed, prev_failed, samples, prev_samples, cur_hash, prev_hash, cur_profile=None, prev_profile=None, - confidence_lv=0.05, bigger_is_better=False): + confidence_lv=0.05, bigger_is_better=False, + ignore_same_hash=False): self.aggregation_fn = aggregation_fn # Special case: if we're using the minimum to aggregate, swap it for @@ -103,6 +104,7 @@ def __init__(self, aggregation_fn, self.confidence_lv = confidence_lv self.bigger_is_better = bigger_is_better + self.ignore_same_hash = ignore_same_hash def __repr__(self): """Print this ComparisonResult's constructor. @@ -118,7 +120,8 @@ def __repr__(self): self.samples, self.prev_samples, self.confidence_lv, - bool(self.bigger_is_better)) + bool(self.bigger_is_better), + bool(self.ignore_same_hash)) def __json__(self): simple_dict = self.__dict__ @@ -176,6 +179,12 @@ def get_value_status(self, confidence_interval=2.576, elif self.prev_failed: return UNCHANGED_PASS + # Ignore changes if the hash of the binary is the same and the field is + # sensitive to the hash, e.g. execution time. + if self.ignore_same_hash: + if self.cur_hash and self.prev_hash and self.cur_hash == self.prev_hash: + return UNCHANGED_PASS + # Always ignore percentage changes below MIN_PERCENTAGE_CHANGE %, for now, we just don't # have enough time to investigate that level of stuff. if ignore_small and abs(self.pct_delta) < MIN_PERCENTAGE_CHANGE: @@ -355,7 +364,8 @@ def get_comparison_result(self, runs, compare_runs, test_id, field, prev_values, cur_hash, prev_hash, cur_profile, prev_profile, self.confidence_lv, - bigger_is_better=field.bigger_is_better) + bigger_is_better=field.bigger_is_better, + ignore_same_hash=field.ignore_same_hash) return r def get_geomean_comparison_result(self, run, compare_to, field, tests): @@ -385,7 +395,8 @@ def get_geomean_comparison_result(self, run, compare_to, field, tests): cur_hash=cur_hash, prev_hash=prev_hash, confidence_lv=0, - bigger_is_better=field.bigger_is_better) + bigger_is_better=field.bigger_is_better, + ignore_same_hash=field.ignore_same_hash) def _load_samples_for_runs(self, session, run_ids, only_tests): # Find the set of new runs to load. diff --git a/schemas/nts.yaml b/schemas/nts.yaml index fe9b7cbe..172316ce 100644 --- a/schemas/nts.yaml +++ b/schemas/nts.yaml @@ -16,6 +16,7 @@ metrics: display_name: Execution Time unit: seconds unit_abbrev: s + ignore_same_hash: true - name: execution_status type: Status - name: score diff --git a/tests/server/reporting/analysis.py b/tests/server/reporting/analysis.py index 98a088a7..3691cb54 100644 --- a/tests/server/reporting/analysis.py +++ b/tests/server/reporting/analysis.py @@ -319,6 +319,28 @@ def test_handle_zero_sample(self): None, None) self.assertEqual(zeroSample.get_value_status(), UNCHANGED_PASS) + def test_ignore_same_hash(self): + """Test ignore_same_hash ignores regressions with the same hash.""" + same_hash = ComparisonResult(min, False, False, [10.], [5.], + 'abc', 'abc', ignore_same_hash=True) + self.assertEqual(same_hash.get_value_status(), UNCHANGED_PASS) + self.assertFalse(same_hash.is_result_interesting()) + + diff_hash = ComparisonResult(min, False, False, [10.], [5.], + 'abc', '123', ignore_same_hash=True) + self.assertEqual(diff_hash.get_value_status(), REGRESSED) + self.assertTrue(diff_hash.is_result_interesting()) + + no_hash = ComparisonResult(min, False, False, [10.], [5.], None, + '123', ignore_same_hash=True) + self.assertEqual(no_hash.get_value_status(), REGRESSED) + self.assertTrue(no_hash.is_result_interesting()) + + disabled = ComparisonResult(min, False, False, [10.], [5.], + 'abc', 'abc', ignore_same_hash=False) + self.assertEqual(disabled.get_value_status(), REGRESSED) + self.assertTrue(disabled.is_result_interesting()) + class AbsMinTester(unittest.TestCase): diff --git a/tests/server/ui/test_api.py b/tests/server/ui/test_api.py index 93e400c4..add80571 100644 --- a/tests/server/ui/test_api.py +++ b/tests/server/ui/test_api.py @@ -290,6 +290,8 @@ def test_schema(self): m['display_name'] = m['name'] if 'bigger_is_better' not in m: m['bigger_is_better'] = False + if 'ignore_same_hash' not in m: + m['ignore_same_hash'] = False yaml_schema['metrics'].sort(key=lambda x: x['name']) yaml_schema['run_fields'].sort(key=lambda x: x['name']) yaml_schema['machine_fields'].sort(key=lambda x: x['name']) From c1f0af9dbe4086da582082cba3ac7c6007913e7d Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 1 Dec 2025 18:56:00 +0800 Subject: [PATCH 2/5] Undo gitignore change --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index ebcc96fb..efa0c218 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,3 @@ lnt/server/ui/static/docs test_run_tmp tests/**/Output venv -*~ From f3de8968c42e6e83e577b0bdb6922a2664b46586 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 1 Dec 2025 19:20:53 +0800 Subject: [PATCH 3/5] Remove FIXME --- lnt/server/reporting/analysis.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/lnt/server/reporting/analysis.py b/lnt/server/reporting/analysis.py index 38b2c399..8d18ec0b 100644 --- a/lnt/server/reporting/analysis.py +++ b/lnt/server/reporting/analysis.py @@ -158,9 +158,6 @@ def get_test_status(self): else: return UNCHANGED_PASS - # FIXME: take into account hash of binary - if available. If the hash is - # the same, the binary is the same and therefore the difference cannot be - # significant - for execution time. It can be significant for compile time. def get_value_status(self, confidence_interval=2.576, value_precision=MIN_VALUE_PRECISION, ignore_small=True): From c02c6115b2b5c4dfacfc155a41680bd61314c632 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 1 Dec 2025 19:50:47 +0800 Subject: [PATCH 4/5] Set ignore_same_hash on score field --- lnt/server/db/migrations/upgrade_17_to_18.py | 5 ++++- schemas/nts.yaml | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/lnt/server/db/migrations/upgrade_17_to_18.py b/lnt/server/db/migrations/upgrade_17_to_18.py index 7fb1578e..38bccf5c 100644 --- a/lnt/server/db/migrations/upgrade_17_to_18.py +++ b/lnt/server/db/migrations/upgrade_17_to_18.py @@ -17,7 +17,10 @@ def upgrade(engine): set_init_value = update(test_suite_sample_fields).values(ignore_same_hash=0) set_exec_time = ( update(test_suite_sample_fields) - .where(test_suite_sample_fields.c.Name == "execution_time") + .where( + (test_suite_sample_fields.c.Name == "execution_time") + | (test_suite_sample_fields.c.Name == "score") + ) .values(ignore_same_hash=1) ) diff --git a/schemas/nts.yaml b/schemas/nts.yaml index 172316ce..c6ad7df5 100644 --- a/schemas/nts.yaml +++ b/schemas/nts.yaml @@ -23,6 +23,7 @@ metrics: type: Real bigger_is_better: true display_name: Score + ignore_same_hash: true - name: mem_bytes type: Real display_name: Memory Usage From 03a94f13fc2196ea01a100ba006b1924a90f030a Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 2 Dec 2025 11:09:44 +0800 Subject: [PATCH 5/5] Fix flake8 warning This undoes black formatting. --- lnt/server/db/migrations/upgrade_17_to_18.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lnt/server/db/migrations/upgrade_17_to_18.py b/lnt/server/db/migrations/upgrade_17_to_18.py index 38bccf5c..ef8e5baf 100644 --- a/lnt/server/db/migrations/upgrade_17_to_18.py +++ b/lnt/server/db/migrations/upgrade_17_to_18.py @@ -18,8 +18,8 @@ def upgrade(engine): set_exec_time = ( update(test_suite_sample_fields) .where( - (test_suite_sample_fields.c.Name == "execution_time") - | (test_suite_sample_fields.c.Name == "score") + (test_suite_sample_fields.c.Name == "execution_time") | + (test_suite_sample_fields.c.Name == "score") ) .values(ignore_same_hash=1) )