|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +This script is intended to be used as part of a GitHub Actions workflow in order to decide if the integration tests should: |
| 4 | +
|
| 5 | +a) be triggered at all |
| 6 | +b) if they should be triggered, should they be triggered for a subset of dialects or all dialects? |
| 7 | +
|
| 8 | +The tests can be triggered manually by using the following directive in either the PR description or a PR comment: |
| 9 | +
|
| 10 | + /integration-tests |
| 11 | +
|
| 12 | +To limit them to a certain dialect or dialects, you can specify: |
| 13 | +
|
| 14 | + /integration-tests dialects=bigquery,duckdb |
| 15 | +
|
| 16 | +If you specify nothing, a `git diff` will be performed between your PR branch and the base branch. |
| 17 | +If any files modified contain one of the SUPPORTED_DIALECTS in the filename, that dialect will be added to the |
| 18 | +list of dialects to test. If no files match, the integration tests will be skipped. |
| 19 | +
|
| 20 | +Note that integration tests in the remote workflow are only implemented for a subset of dialects. |
| 21 | +If new ones are added, update the SUPPORTED_DIALECTS constant below. |
| 22 | +
|
| 23 | +Each dialect is tested against itself (roundtrip) and duckdb (transpilation). |
| 24 | +Supplying a dialect not in this list will cause the tests to get skipped. |
| 25 | +""" |
| 26 | + |
| 27 | +import typing as t |
| 28 | +import os |
| 29 | +import sys |
| 30 | +import json |
| 31 | +import subprocess |
| 32 | +from pathlib import Path |
| 33 | + |
| 34 | +TRIGGER = "/integration-test" |
| 35 | +SUPPORTED_DIALECTS = ["duckdb", "bigquery", "snowflake"] |
| 36 | + |
| 37 | + |
| 38 | +def get_dialects_from_manual_trigger(trigger: str) -> t.Set[str]: |
| 39 | + """ |
| 40 | + Takes a trigger string and parses out the supported dialects |
| 41 | +
|
| 42 | + /integration_test -> [] |
| 43 | + /integration_test dialects=bigquery -> ["bigquery"] |
| 44 | + /integration_test dialects=bigquery,duckdb -> ["bigquery","duckdb"] |
| 45 | + /integration_test dialects=exasol,duckdb -> ["duckdb"] |
| 46 | + """ |
| 47 | + |
| 48 | + if not trigger.startswith(TRIGGER): |
| 49 | + raise ValueError(f"Invalid trigger: {trigger}") |
| 50 | + |
| 51 | + # trim off start at first space (to cover both /integration-test and /integration-tests) |
| 52 | + trigger_parts = trigger.split(" ")[1:] |
| 53 | + |
| 54 | + print(f"Parsing trigger args: {trigger_parts}") |
| 55 | + |
| 56 | + dialects: t.List[str] = [] |
| 57 | + for part in trigger_parts: |
| 58 | + # try to parse key=value pairs |
| 59 | + maybe_kv = part.split("=", maxsplit=1) |
| 60 | + if len(maybe_kv) >= 2: |
| 61 | + k, v = maybe_kv[0], maybe_kv[1] |
| 62 | + if k.lower().startswith("dialect"): |
| 63 | + dialects.extend([d.lower().strip() for d in v.split(",")]) |
| 64 | + |
| 65 | + return {d for d in dialects if d in SUPPORTED_DIALECTS} |
| 66 | + |
| 67 | + |
| 68 | +def get_dialects_from_git(base_ref: str, current_ref: str) -> t.Set[str]: |
| 69 | + """ |
| 70 | + Takes two git refs and runs `git diff --name-only <base_ref> <current_ref>` |
| 71 | +
|
| 72 | + If any of the returned file names contain a dialect from SUPPORTED_DIALECTS as |
| 73 | + a substring, that dialect is included in the returned set |
| 74 | + """ |
| 75 | + print(f"Checking for files changed between '{base_ref}' and '{current_ref}'") |
| 76 | + |
| 77 | + result = subprocess.run( |
| 78 | + ["git", "diff", "--name-only", base_ref, current_ref], |
| 79 | + stdout=subprocess.PIPE, |
| 80 | + stderr=subprocess.STDOUT, |
| 81 | + ) |
| 82 | + output = result.stdout.decode("utf8") |
| 83 | + |
| 84 | + if result.returncode != 0: |
| 85 | + raise ValueError(f"Git process failed with exit code {result.returncode}:\n{output}") |
| 86 | + |
| 87 | + print(f"Git output:\n{output}") |
| 88 | + |
| 89 | + matching_dialects = [] |
| 90 | + |
| 91 | + for l in output.splitlines(): |
| 92 | + l = l.strip().lower() |
| 93 | + |
| 94 | + matching_dialects.extend([d for d in SUPPORTED_DIALECTS if d in l]) |
| 95 | + |
| 96 | + return set(matching_dialects) |
| 97 | + |
| 98 | + |
| 99 | +if __name__ == "__main__": |
| 100 | + github_event_path = os.environ.get("GITHUB_EVENT_PATH") |
| 101 | + github_sha = os.environ.get("GITHUB_SHA") |
| 102 | + github_output = os.environ.get("GITHUB_OUTPUT") |
| 103 | + |
| 104 | + if ( |
| 105 | + not os.environ.get("GITHUB_ACTIONS") |
| 106 | + or not github_event_path |
| 107 | + or not github_sha |
| 108 | + or not github_output |
| 109 | + ): |
| 110 | + print(f"This script needs to run within GitHub Actions") |
| 111 | + sys.exit(1) |
| 112 | + |
| 113 | + github_event_path = Path(github_event_path) |
| 114 | + github_output = Path(github_output) |
| 115 | + |
| 116 | + with github_event_path.open("r") as f: |
| 117 | + event: t.Dict[str, t.Any] = json.load(f) |
| 118 | + |
| 119 | + print(f"Handling event: \n" + json.dumps(event, indent=2)) |
| 120 | + |
| 121 | + # for issue_comment events, the body is located at github.event.comment.body |
| 122 | + # since issues and PR's are the same thing in the GH backend, we also have to check if the issue type is "pull_request" |
| 123 | + comment_body = ( |
| 124 | + event.get("comment", {}).get("body") if event.get("issue", {}).get("pull_request") else None |
| 125 | + ) |
| 126 | + |
| 127 | + # for pull_request events, the body is located at github.event.pull_request.body |
| 128 | + pr_description = event.get("pull_request", {}).get("body") |
| 129 | + |
| 130 | + dialects = [] |
| 131 | + should_run = False |
| 132 | + |
| 133 | + text_blob = f"{comment_body or ''}{pr_description or ''}" |
| 134 | + text_blob_lines = [l.strip().lower() for l in text_blob.splitlines()] |
| 135 | + if trigger_line := [l for l in text_blob_lines if l.startswith(TRIGGER)]: |
| 136 | + # if the user has explicitly requested /integration-tests then use that |
| 137 | + print(f"Handling trigger line: {trigger_line[0]}") |
| 138 | + dialects = get_dialects_from_manual_trigger(trigger_line[0]) |
| 139 | + should_run = True |
| 140 | + else: |
| 141 | + # otherwise, do a git diff and inspect the changed files |
| 142 | + print(f"Explicit trigger line not detected; performing git diff") |
| 143 | + pull_request_base_ref = event.get("pull_request", {}).get("base", {}).get("sha") |
| 144 | + issue_comment_base_ref = event.get("before") |
| 145 | + |
| 146 | + base_ref = pull_request_base_ref or issue_comment_base_ref |
| 147 | + if not base_ref: |
| 148 | + raise ValueError("Unable to determine base ref") |
| 149 | + |
| 150 | + current_ref = github_sha |
| 151 | + print(f"Comparing '{current_ref}' against '{base_ref}'") |
| 152 | + # otherwise, look at git files changed and only trigger if a file relating |
| 153 | + # to a supported dialect has changed |
| 154 | + dialects = get_dialects_from_git(base_ref=base_ref, current_ref=github_sha) |
| 155 | + if dialects: |
| 156 | + should_run = True |
| 157 | + |
| 158 | + if should_run: |
| 159 | + dialects_str = ( |
| 160 | + f"the following dialects: {', '.join(dialects)}" |
| 161 | + if dialects |
| 162 | + else f"all supported dialects" |
| 163 | + ) |
| 164 | + print(f"Conclusion: should run tests for {dialects_str}") |
| 165 | + else: |
| 166 | + print(f"Conclusion: No tests to run") |
| 167 | + |
| 168 | + # write output variables |
| 169 | + lines = [] |
| 170 | + if should_run: |
| 171 | + lines.append("skip=false") |
| 172 | + if dialects: |
| 173 | + lines.append(f"dialects={','.join(dialects)}") |
| 174 | + else: |
| 175 | + lines.append("skip=true") |
| 176 | + |
| 177 | + with github_output.open("a") as f: |
| 178 | + f.writelines(f"{l}\n" for l in lines) |
0 commit comments