|
32 | 32 | SnapshotEvaluator, |
33 | 33 | SnapshotChangeCategory, |
34 | 34 | DeployabilityIndex, |
| 35 | + snapshots_to_dag, |
35 | 36 | ) |
36 | 37 | from sqlmesh.utils.date import to_datetime, to_timestamp, DatetimeRanges, TimeLike |
37 | 38 | from sqlmesh.utils.errors import CircuitBreakerError, NodeAuditsErrors |
@@ -1019,3 +1020,109 @@ def record_execute_environment_statements(*args, **kwargs): |
1019 | 1020 | execute_env_idx = call_order.index("execute_environment_statements") |
1020 | 1021 | snapshots_to_create_idx = call_order.index("get_snapshots_to_create") |
1021 | 1022 | assert env_statements_idx < execute_env_idx < snapshots_to_create_idx |
| 1023 | + |
| 1024 | + |
| 1025 | +def test_dag_transitive_deps(mocker: MockerFixture, make_snapshot): |
| 1026 | + # Create a simple dependency chain: A <- B <- C |
| 1027 | + snapshot_a = make_snapshot(SqlModel(name="a", query=parse_one("SELECT 1 as id"))) |
| 1028 | + snapshot_b = make_snapshot(SqlModel(name="b", query=parse_one("SELECT * FROM a"))) |
| 1029 | + snapshot_c = make_snapshot(SqlModel(name="c", query=parse_one("SELECT * FROM b"))) |
| 1030 | + |
| 1031 | + snapshot_b = snapshot_b.model_copy(update={"parents": (snapshot_a.snapshot_id,)}) |
| 1032 | + snapshot_c = snapshot_c.model_copy(update={"parents": (snapshot_b.snapshot_id,)}) |
| 1033 | + |
| 1034 | + snapshot_a.categorize_as(SnapshotChangeCategory.BREAKING) |
| 1035 | + snapshot_b.categorize_as(SnapshotChangeCategory.BREAKING) |
| 1036 | + snapshot_c.categorize_as(SnapshotChangeCategory.BREAKING) |
| 1037 | + |
| 1038 | + scheduler = Scheduler( |
| 1039 | + snapshots=[snapshot_a, snapshot_b, snapshot_c], |
| 1040 | + snapshot_evaluator=mocker.Mock(), |
| 1041 | + state_sync=mocker.Mock(), |
| 1042 | + default_catalog=None, |
| 1043 | + ) |
| 1044 | + |
| 1045 | + # Test scenario: select only A and C (skip B) |
| 1046 | + merged_intervals = { |
| 1047 | + snapshot_a: [(to_timestamp("2023-01-01"), to_timestamp("2023-01-02"))], |
| 1048 | + snapshot_c: [(to_timestamp("2023-01-01"), to_timestamp("2023-01-02"))], |
| 1049 | + } |
| 1050 | + |
| 1051 | + deployability_index = DeployabilityIndex.create([snapshot_a, snapshot_b, snapshot_c]) |
| 1052 | + |
| 1053 | + full_dag = snapshots_to_dag([snapshot_a, snapshot_b, snapshot_c]) |
| 1054 | + |
| 1055 | + dag = scheduler._dag(merged_intervals, snapshot_dag=full_dag) |
| 1056 | + assert dag.graph == { |
| 1057 | + EvaluateNode( |
| 1058 | + snapshot_name='"a"', |
| 1059 | + interval=(to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), |
| 1060 | + batch_index=0, |
| 1061 | + ): set(), |
| 1062 | + EvaluateNode( |
| 1063 | + snapshot_name='"c"', |
| 1064 | + interval=(to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), |
| 1065 | + batch_index=0, |
| 1066 | + ): { |
| 1067 | + EvaluateNode( |
| 1068 | + snapshot_name='"a"', |
| 1069 | + interval=(to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), |
| 1070 | + batch_index=0, |
| 1071 | + ) |
| 1072 | + }, |
| 1073 | + } |
| 1074 | + |
| 1075 | + |
| 1076 | +def test_dag_multiple_chain_transitive_deps(mocker: MockerFixture, make_snapshot): |
| 1077 | + # Create a more complex dependency graph: |
| 1078 | + # A <- B <- D <- E |
| 1079 | + # A <- C <- D <- E |
| 1080 | + # Select A and E only |
| 1081 | + snapshots = {} |
| 1082 | + for name in ["a", "b", "c", "d", "e"]: |
| 1083 | + snapshots[name] = make_snapshot(SqlModel(name=name, query=parse_one("SELECT 1 as id"))) |
| 1084 | + snapshots[name].categorize_as(SnapshotChangeCategory.BREAKING) |
| 1085 | + |
| 1086 | + # Set up dependencies |
| 1087 | + snapshots["b"] = snapshots["b"].model_copy(update={"parents": (snapshots["a"].snapshot_id,)}) |
| 1088 | + snapshots["c"] = snapshots["c"].model_copy(update={"parents": (snapshots["a"].snapshot_id,)}) |
| 1089 | + snapshots["d"] = snapshots["d"].model_copy( |
| 1090 | + update={"parents": (snapshots["b"].snapshot_id, snapshots["c"].snapshot_id)} |
| 1091 | + ) |
| 1092 | + snapshots["e"] = snapshots["e"].model_copy(update={"parents": (snapshots["d"].snapshot_id,)}) |
| 1093 | + |
| 1094 | + scheduler = Scheduler( |
| 1095 | + snapshots=list(snapshots.values()), |
| 1096 | + snapshot_evaluator=mocker.Mock(), |
| 1097 | + state_sync=mocker.Mock(), |
| 1098 | + default_catalog=None, |
| 1099 | + ) |
| 1100 | + |
| 1101 | + # Only provide intervals for A and E |
| 1102 | + batched_intervals = { |
| 1103 | + snapshots["a"]: [(to_timestamp("2023-01-01"), to_timestamp("2023-01-02"))], |
| 1104 | + snapshots["e"]: [(to_timestamp("2023-01-01"), to_timestamp("2023-01-02"))], |
| 1105 | + } |
| 1106 | + |
| 1107 | + # Create subdag including transitive dependencies |
| 1108 | + full_dag = snapshots_to_dag(snapshots.values()) |
| 1109 | + |
| 1110 | + dag = scheduler._dag(batched_intervals, snapshot_dag=full_dag) |
| 1111 | + assert dag.graph == { |
| 1112 | + EvaluateNode( |
| 1113 | + snapshot_name='"a"', |
| 1114 | + interval=(to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), |
| 1115 | + batch_index=0, |
| 1116 | + ): set(), |
| 1117 | + EvaluateNode( |
| 1118 | + snapshot_name='"e"', |
| 1119 | + interval=(to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), |
| 1120 | + batch_index=0, |
| 1121 | + ): { |
| 1122 | + EvaluateNode( |
| 1123 | + snapshot_name='"a"', |
| 1124 | + interval=(to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), |
| 1125 | + batch_index=0, |
| 1126 | + ) |
| 1127 | + }, |
| 1128 | + } |
0 commit comments