|
11 | 11 | from pydantic_core.core_schema import ValidationInfo |
12 | 12 | from sqlglot import exp |
13 | 13 |
|
14 | | -from sqlmesh.core.console import Console |
15 | | -from sqlmesh.core.dialect import schema_ |
16 | 14 | from sqlmesh.utils.pydantic import PydanticModel, field_validator |
17 | 15 | from sqlmesh.core.environment import Environment, EnvironmentStatements, EnvironmentNamingInfo |
18 | | -from sqlmesh.utils.errors import SQLMeshError |
19 | 16 | from sqlmesh.core.snapshot import ( |
20 | 17 | Snapshot, |
21 | | - SnapshotEvaluator, |
22 | 18 | SnapshotId, |
23 | 19 | SnapshotTableCleanupTask, |
24 | 20 | SnapshotTableInfo, |
25 | 21 | ) |
26 | 22 |
|
27 | 23 | if t.TYPE_CHECKING: |
28 | | - from sqlmesh.core.engine_adapter.base import EngineAdapter |
29 | | - from sqlmesh.core.state_sync.base import Versions, StateReader, StateSync |
| 24 | + from sqlmesh.core.state_sync.base import Versions, StateReader |
30 | 25 |
|
31 | 26 | logger = logging.getLogger(__name__) |
32 | 27 |
|
33 | 28 | EXPIRED_SNAPSHOT_DEFAULT_BATCH_SIZE = 200 |
34 | 29 |
|
35 | 30 |
|
36 | | -def cleanup_expired_views( |
37 | | - default_adapter: EngineAdapter, |
38 | | - engine_adapters: t.Dict[str, EngineAdapter], |
39 | | - environments: t.List[Environment], |
40 | | - warn_on_delete_failure: bool = False, |
41 | | - console: t.Optional[Console] = None, |
42 | | -) -> None: |
43 | | - expired_schema_or_catalog_environments = [ |
44 | | - environment |
45 | | - for environment in environments |
46 | | - if environment.suffix_target.is_schema or environment.suffix_target.is_catalog |
47 | | - ] |
48 | | - expired_table_environments = [ |
49 | | - environment for environment in environments if environment.suffix_target.is_table |
50 | | - ] |
51 | | - |
52 | | - # We have to use the corresponding adapter if the virtual layer is gateway managed |
53 | | - def get_adapter(gateway_managed: bool, gateway: t.Optional[str] = None) -> EngineAdapter: |
54 | | - if gateway_managed and gateway: |
55 | | - return engine_adapters.get(gateway, default_adapter) |
56 | | - return default_adapter |
57 | | - |
58 | | - catalogs_to_drop: t.Set[t.Tuple[EngineAdapter, str]] = set() |
59 | | - schemas_to_drop: t.Set[t.Tuple[EngineAdapter, exp.Table]] = set() |
60 | | - |
61 | | - # Collect schemas and catalogs to drop |
62 | | - for engine_adapter, expired_catalog, expired_schema, suffix_target in { |
63 | | - ( |
64 | | - (engine_adapter := get_adapter(environment.gateway_managed, snapshot.model_gateway)), |
65 | | - snapshot.qualified_view_name.catalog_for_environment( |
66 | | - environment.naming_info, dialect=engine_adapter.dialect |
67 | | - ), |
68 | | - snapshot.qualified_view_name.schema_for_environment( |
69 | | - environment.naming_info, dialect=engine_adapter.dialect |
70 | | - ), |
71 | | - environment.suffix_target, |
72 | | - ) |
73 | | - for environment in expired_schema_or_catalog_environments |
74 | | - for snapshot in environment.snapshots |
75 | | - if snapshot.is_model and not snapshot.is_symbolic |
76 | | - }: |
77 | | - if suffix_target.is_catalog: |
78 | | - if expired_catalog: |
79 | | - catalogs_to_drop.add((engine_adapter, expired_catalog)) |
80 | | - else: |
81 | | - schema = schema_(expired_schema, expired_catalog) |
82 | | - schemas_to_drop.add((engine_adapter, schema)) |
83 | | - |
84 | | - # Drop the views for the expired environments |
85 | | - for engine_adapter, expired_view in { |
86 | | - ( |
87 | | - (engine_adapter := get_adapter(environment.gateway_managed, snapshot.model_gateway)), |
88 | | - snapshot.qualified_view_name.for_environment( |
89 | | - environment.naming_info, dialect=engine_adapter.dialect |
90 | | - ), |
91 | | - ) |
92 | | - for environment in expired_table_environments |
93 | | - for snapshot in environment.snapshots |
94 | | - if snapshot.is_model and not snapshot.is_symbolic |
95 | | - }: |
96 | | - try: |
97 | | - engine_adapter.drop_view(expired_view, ignore_if_not_exists=True) |
98 | | - if console: |
99 | | - console.update_cleanup_progress(expired_view) |
100 | | - except Exception as e: |
101 | | - message = f"Failed to drop the expired environment view '{expired_view}': {e}" |
102 | | - if warn_on_delete_failure: |
103 | | - logger.warning(message) |
104 | | - else: |
105 | | - raise SQLMeshError(message) from e |
106 | | - |
107 | | - # Drop the schemas for the expired environments |
108 | | - for engine_adapter, schema in schemas_to_drop: |
109 | | - try: |
110 | | - engine_adapter.drop_schema( |
111 | | - schema, |
112 | | - ignore_if_not_exists=True, |
113 | | - cascade=True, |
114 | | - ) |
115 | | - if console: |
116 | | - console.update_cleanup_progress(schema.sql(dialect=engine_adapter.dialect)) |
117 | | - except Exception as e: |
118 | | - message = f"Failed to drop the expired environment schema '{schema}': {e}" |
119 | | - if warn_on_delete_failure: |
120 | | - logger.warning(message) |
121 | | - else: |
122 | | - raise SQLMeshError(message) from e |
123 | | - |
124 | | - # Drop any catalogs that were associated with a snapshot where the engine adapter supports dropping catalogs |
125 | | - # catalogs_to_drop is only populated when environment_suffix_target is set to 'catalog' |
126 | | - for engine_adapter, catalog in catalogs_to_drop: |
127 | | - if engine_adapter.SUPPORTS_CREATE_DROP_CATALOG: |
128 | | - try: |
129 | | - engine_adapter.drop_catalog(catalog) |
130 | | - if console: |
131 | | - console.update_cleanup_progress(catalog) |
132 | | - except Exception as e: |
133 | | - message = f"Failed to drop the expired environment catalog '{catalog}': {e}" |
134 | | - if warn_on_delete_failure: |
135 | | - logger.warning(message) |
136 | | - else: |
137 | | - raise SQLMeshError(message) from e |
138 | | - |
139 | | - |
140 | 31 | def transactional() -> t.Callable[[t.Callable], t.Callable]: |
141 | 32 | def decorator(func: t.Callable) -> t.Callable: |
142 | 33 | @wraps(func) |
@@ -429,61 +320,3 @@ def iter_expired_snapshot_batches( |
429 | 320 | start=batch.batch_range.end, |
430 | 321 | end=LimitBoundary(batch_size=batch_size), |
431 | 322 | ) |
432 | | - |
433 | | - |
434 | | -def delete_expired_snapshots( |
435 | | - state_sync: StateSync, |
436 | | - snapshot_evaluator: SnapshotEvaluator, |
437 | | - *, |
438 | | - current_ts: int, |
439 | | - ignore_ttl: bool = False, |
440 | | - batch_size: t.Optional[int] = None, |
441 | | - console: t.Optional[Console] = None, |
442 | | -) -> None: |
443 | | - """Delete all expired snapshots in batches. |
444 | | -
|
445 | | - This helper function encapsulates the logic for deleting expired snapshots in batches, |
446 | | - eliminating code duplication across different use cases. |
447 | | -
|
448 | | - Args: |
449 | | - state_sync: StateSync instance to query and delete expired snapshots from. |
450 | | - snapshot_evaluator: SnapshotEvaluator instance to clean up tables associated with snapshots. |
451 | | - current_ts: Timestamp used to evaluate expiration. |
452 | | - ignore_ttl: If True, include snapshots regardless of TTL (only checks if unreferenced). |
453 | | - batch_size: Maximum number of snapshots to fetch per batch. |
454 | | - console: Optional console for reporting progress. |
455 | | -
|
456 | | - Returns: |
457 | | - The total number of deleted expired snapshots. |
458 | | - """ |
459 | | - num_expired_snapshots = 0 |
460 | | - for batch in iter_expired_snapshot_batches( |
461 | | - state_reader=state_sync, |
462 | | - current_ts=current_ts, |
463 | | - ignore_ttl=ignore_ttl, |
464 | | - batch_size=batch_size, |
465 | | - ): |
466 | | - end_info = ( |
467 | | - f"updated_ts={batch.batch_range.end.updated_ts}" |
468 | | - if isinstance(batch.batch_range.end, RowBoundary) |
469 | | - else f"limit={batch.batch_range.end.batch_size}" |
470 | | - ) |
471 | | - logger.info( |
472 | | - "Processing batch of size %s with end %s", |
473 | | - len(batch.expired_snapshot_ids), |
474 | | - end_info, |
475 | | - ) |
476 | | - snapshot_evaluator.cleanup( |
477 | | - target_snapshots=batch.cleanup_tasks, |
478 | | - on_complete=console.update_cleanup_progress if console else None, |
479 | | - ) |
480 | | - state_sync.delete_expired_snapshots( |
481 | | - batch_range=ExpiredBatchRange( |
482 | | - start=RowBoundary.lowest_boundary(), |
483 | | - end=batch.batch_range.end, |
484 | | - ), |
485 | | - ignore_ttl=ignore_ttl, |
486 | | - ) |
487 | | - logger.info("Cleaned up expired snapshots batch") |
488 | | - num_expired_snapshots += len(batch.expired_snapshot_ids) |
489 | | - logger.info("Cleaned up %s expired snapshots", num_expired_snapshots) |
0 commit comments