Skip to content

Commit 8b5298a

Browse files
feat(duckdb)!: Transpile BQ's WEEK based DATE_DIFF (#6507)
* feat(duckdb)!: Transpile BQ's WEEK-based DATE_DIFF * Update sqlglot/dialects/bigquery.py Co-authored-by: Jo <46752250+georgesittas@users.noreply.github.com> * Update sqlglot/dialects/bigquery.py Co-authored-by: Jo <46752250+georgesittas@users.noreply.github.com> --------- Co-authored-by: Jo <46752250+georgesittas@users.noreply.github.com>
1 parent 3224235 commit 8b5298a

File tree

5 files changed

+200
-3
lines changed

5 files changed

+200
-3
lines changed

sqlglot/dialects/bigquery.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,24 @@ def _build_datetime(args: t.List) -> exp.Func:
242242
return exp.TimestampFromParts.from_arg_list(args)
243243

244244

245+
def build_date_diff(args: t.List) -> exp.Expression:
246+
expr = exp.DateDiff(
247+
this=seq_get(args, 0),
248+
expression=seq_get(args, 1),
249+
unit=seq_get(args, 2),
250+
date_part_boundary=True,
251+
)
252+
253+
# Normalize plain WEEK to WEEK(SUNDAY) to preserve the semantic in the AST to facilitate transpilation
254+
# This is done post exp.DateDiff construction since the TimeUnit mixin performs canonicalizations in its constructor too
255+
unit = expr.args.get("unit")
256+
257+
if isinstance(unit, exp.Var) and unit.name.upper() == "WEEK":
258+
expr.set("unit", exp.WeekStart(this=exp.var("SUNDAY")))
259+
260+
return expr
261+
262+
245263
def _build_regexp_extract(
246264
expr_type: t.Type[E], default_group: t.Optional[exp.Expression] = None
247265
) -> t.Callable[[t.List, BigQuery], E]:
@@ -564,6 +582,7 @@ class Parser(parser.Parser):
564582
"CONTAINS_SUBSTR": _build_contains_substring,
565583
"DATE": _build_date,
566584
"DATE_ADD": build_date_delta_with_interval(exp.DateAdd),
585+
"DATE_DIFF": build_date_diff,
567586
"DATE_SUB": build_date_delta_with_interval(exp.DateSub),
568587
"DATE_TRUNC": lambda args: exp.DateTrunc(
569588
unit=seq_get(args, 1),

sqlglot/dialects/duckdb.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,18 @@
6969
"\u001f": 31,
7070
}
7171

72+
# Days of week to ISO 8601 day-of-week numbers
73+
# ISO 8601 standard: Monday=1, Tuesday=2, Wednesday=3, Thursday=4, Friday=5, Saturday=6, Sunday=7
74+
WEEK_START_DAY_TO_DOW = {
75+
"MONDAY": 1,
76+
"TUESDAY": 2,
77+
"WEDNESDAY": 3,
78+
"THURSDAY": 4,
79+
"FRIDAY": 5,
80+
"SATURDAY": 6,
81+
"SUNDAY": 7,
82+
}
83+
7284

7385
# BigQuery -> DuckDB conversion for the DATE function
7486
def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str:
@@ -250,9 +262,86 @@ def _implicit_datetime_cast(
250262
return arg
251263

252264

265+
def _week_unit_to_dow(unit: t.Optional[exp.Expression]) -> t.Optional[int]:
266+
"""
267+
Compute the Monday-based day shift to align DATE_DIFF('WEEK', ...) coming
268+
from other dialects, e.g BigQuery's WEEK(<day>) or ISOWEEK unit parts.
269+
270+
Args:
271+
unit: The unit expression (Var for ISOWEEK or WeekStart)
272+
273+
Returns:
274+
The ISO 8601 day number (Monday=1, Sunday=7 etc) or None if not a week unit or if day is dynamic (not a constant).
275+
276+
Examples:
277+
"WEEK(SUNDAY)" -> 7
278+
"WEEK(MONDAY)" -> 1
279+
"ISOWEEK" -> 1
280+
"""
281+
# Handle plain Var expressions for ISOWEEK only
282+
if isinstance(unit, exp.Var) and unit.name.upper() in "ISOWEEK":
283+
return 1
284+
285+
# Handle WeekStart expressions with explicit day
286+
if isinstance(unit, exp.WeekStart):
287+
return WEEK_START_DAY_TO_DOW.get(unit.name.upper())
288+
289+
return None
290+
291+
292+
def _build_week_trunc_expression(date_expr: exp.Expression, start_dow: int) -> exp.Expression:
293+
"""
294+
Build DATE_TRUNC expression for week boundaries with custom start day.
295+
296+
Args:
297+
date_expr: The date expression to truncate
298+
shift_days: ISO 8601 day-of-week number (Monday=0, ..., Sunday=6)
299+
300+
DuckDB's DATE_TRUNC('WEEK', ...) aligns weeks to Monday (ISO standard).
301+
To align to a different start day, we shift the date before truncating.
302+
303+
Shift formula: Sunday (7) gets +1, others get (1 - start_dow)
304+
Examples:
305+
Monday (1): shift = 0 (no shift needed)
306+
Tuesday (2): shift = -1 (shift back 1 day) ...
307+
Sunday (7): shift = +1 (shift forward 1 day, wraps to next Monday-based week)
308+
"""
309+
shift_days = 1 if start_dow == 7 else 1 - start_dow
310+
311+
# Shift date to align week boundaries with the desired start day
312+
# No shift needed for Monday-based weeks (shift_days == 0)
313+
shifted_date = (
314+
exp.DateAdd(
315+
this=date_expr,
316+
expression=exp.Interval(this=exp.Literal.string(str(shift_days)), unit=exp.var("DAY")),
317+
)
318+
if shift_days != 0
319+
else date_expr
320+
)
321+
322+
return exp.DateTrunc(unit=exp.var("WEEK"), this=shifted_date)
323+
324+
253325
def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str:
254326
this = _implicit_datetime_cast(expression.this)
255327
expr = _implicit_datetime_cast(expression.expression)
328+
unit = expression.args.get("unit")
329+
330+
# DuckDB's WEEK diff does not respect Monday crossing (week boundaries), it checks (end_day - start_day) / 7:
331+
# SELECT DATE_DIFF('WEEK', CAST('2024-12-13' AS DATE), CAST('2024-12-17' AS DATE)) --> 0 (Monday crossed)
332+
# SELECT DATE_DIFF('WEEK', CAST('2024-12-13' AS DATE), CAST('2024-12-20' AS DATE)) --> 1 (7 days difference)
333+
# Whereas for other units such as MONTH it does respect month boundaries:
334+
# SELECT DATE_DIFF('MONTH', CAST('2024-11-30' AS DATE), CAST('2024-12-01' AS DATE)) --> 1 (Month crossed)
335+
date_part_boundary = expression.args.get("date_part_boundary")
336+
337+
# Extract week start day; returns None if day is dynamic (column/placeholder)
338+
week_start = _week_unit_to_dow(unit)
339+
if date_part_boundary and week_start and this and expr:
340+
expression.set("unit", exp.Literal.string("WEEK"))
341+
342+
# Truncate both dates to week boundaries to respect input dialect semantics
343+
this = _build_week_trunc_expression(this, week_start)
344+
expr = _build_week_trunc_expression(expr, week_start)
256345

257346
return self.func("DATE_DIFF", unit_to_str(expression), expr, this)
258347

sqlglot/expressions.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6398,7 +6398,14 @@ class DateSub(Func, IntervalOp):
63986398

63996399
class DateDiff(Func, TimeUnit):
64006400
_sql_names = ["DATEDIFF", "DATE_DIFF"]
6401-
arg_types = {"this": True, "expression": True, "unit": False, "zone": False, "big_int": False}
6401+
arg_types = {
6402+
"this": True,
6403+
"expression": True,
6404+
"unit": False,
6405+
"zone": False,
6406+
"big_int": False,
6407+
"date_part_boundary": False,
6408+
}
64026409

64036410

64046411
class DateTrunc(Func):

sqlglot/generator.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,6 @@ class Generator(metaclass=_Generator):
226226
exp.VarMap: lambda self, e: self.func("MAP", e.args["keys"], e.args["values"]),
227227
exp.ViewAttributeProperty: lambda self, e: f"WITH {self.sql(e, 'this')}",
228228
exp.VolatileProperty: lambda *_: "VOLATILE",
229-
exp.WeekStart: lambda self, e: f"WEEK({self.sql(e, 'this')})",
230229
exp.WithJournalTableProperty: lambda self, e: f"WITH JOURNAL TABLE={self.sql(e, 'this')}",
231230
exp.WithProcedureOptions: lambda self, e: f"WITH {self.expressions(e, flat=True)}",
232231
exp.WithSchemaBindingProperty: lambda self, e: f"WITH SCHEMA {self.sql(e, 'this')}",
@@ -5456,3 +5455,11 @@ def localtime_sql(self, expression: exp.Localtime) -> str:
54565455
def localtimestamp_sql(self, expression: exp.Localtime) -> str:
54575456
this = expression.this
54585457
return self.func("LOCALTIMESTAMP", this) if this else "LOCALTIMESTAMP"
5458+
5459+
def weekstart_sql(self, expression: exp.WeekStart) -> str:
5460+
this = expression.this.name.upper()
5461+
if self.dialect.WEEK_OFFSET == -1 and this == "SUNDAY":
5462+
# BigQuery specific optimization since WEEK(SUNDAY) == WEEK
5463+
return "WEEK"
5464+
5465+
return self.func("WEEK", expression.this)

tests/dialects/test_bigquery.py

Lines changed: 76 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3232,7 +3232,7 @@ def test_week(self):
32323232
self.validate_identity("DATE_TRUNC(date, WEEK(MONDAY))")
32333233
self.validate_identity(
32343234
"LAST_DAY(DATETIME '2008-11-10 15:30:00', WEEK(SUNDAY))",
3235-
"LAST_DAY(CAST('2008-11-10 15:30:00' AS DATETIME), WEEK(SUNDAY))",
3235+
"LAST_DAY(CAST('2008-11-10 15:30:00' AS DATETIME), WEEK)",
32363236
)
32373237
self.validate_identity("DATE_DIFF('2017-12-18', '2017-12-17', WEEK(SATURDAY))")
32383238
self.validate_identity("DATETIME_DIFF('2017-12-18', '2017-12-17', WEEK(MONDAY))")
@@ -3241,6 +3241,81 @@ def test_week(self):
32413241
"EXTRACT(WEEK(THURSDAY) FROM CAST('2013-12-25' AS DATE))",
32423242
)
32433243

3244+
# BigQuery → DuckDB transpilation tests for DATE_DIFF with week units
3245+
self.validate_all(
3246+
"SELECT DATE_DIFF('2024-06-15', '2024-01-08', WEEK(MONDAY))",
3247+
write={
3248+
"bigquery": "SELECT DATE_DIFF('2024-06-15', '2024-01-08', WEEK(MONDAY))",
3249+
"duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE)), DATE_TRUNC('WEEK', CAST('2024-06-15' AS DATE)))",
3250+
},
3251+
)
3252+
self.validate_all(
3253+
"SELECT DATE_DIFF('2026-01-15', '2024-01-08', WEEK(SUNDAY))",
3254+
write={
3255+
"bigquery": "SELECT DATE_DIFF('2026-01-15', '2024-01-08', WEEK)",
3256+
"duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE) + INTERVAL '1' DAY), DATE_TRUNC('WEEK', CAST('2026-01-15' AS DATE) + INTERVAL '1' DAY))",
3257+
},
3258+
)
3259+
self.validate_all(
3260+
"SELECT DATE_DIFF('2024-01-15', '2022-04-28', WEEK(SATURDAY))",
3261+
write={
3262+
"bigquery": "SELECT DATE_DIFF('2024-01-15', '2022-04-28', WEEK(SATURDAY))",
3263+
"duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2022-04-28' AS DATE) + INTERVAL '-5' DAY), DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE) + INTERVAL '-5' DAY))",
3264+
},
3265+
)
3266+
self.validate_all(
3267+
"SELECT DATE_DIFF('2024-01-15', '2024-01-08', WEEK)",
3268+
write={
3269+
"bigquery": "SELECT DATE_DIFF('2024-01-15', '2024-01-08', WEEK)",
3270+
"duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE) + INTERVAL '1' DAY), DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE) + INTERVAL '1' DAY))",
3271+
},
3272+
)
3273+
# Test WEEK - Saturday to Sunday boundary (critical test for Sunday-start weeks)
3274+
# In BigQuery: Saturday -> Sunday crosses week boundary = 1 week
3275+
# Without fix: DuckDB treats as Monday-start weeks = 0 weeks (both in same week)
3276+
self.validate_all(
3277+
"SELECT DATE_DIFF('2024-01-07', '2024-01-06', WEEK)",
3278+
write={
3279+
"bigquery": "SELECT DATE_DIFF('2024-01-07', '2024-01-06', WEEK)",
3280+
"duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-01-06' AS DATE) + INTERVAL '1' DAY), DATE_TRUNC('WEEK', CAST('2024-01-07' AS DATE) + INTERVAL '1' DAY))",
3281+
},
3282+
)
3283+
self.validate_all(
3284+
"SELECT DATE_DIFF('2024-01-15', '2024-01-08', ISOWEEK)",
3285+
write={
3286+
"bigquery": "SELECT DATE_DIFF('2024-01-15', '2024-01-08', ISOWEEK)",
3287+
"duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE)), DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE)))",
3288+
},
3289+
)
3290+
self.validate_all(
3291+
"SELECT DATE_DIFF(DATE '2024-09-15', DATE '2024-01-08', WEEK(MONDAY))",
3292+
write={
3293+
"bigquery": "SELECT DATE_DIFF(CAST('2024-09-15' AS DATE), CAST('2024-01-08' AS DATE), WEEK(MONDAY))",
3294+
"duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE)), DATE_TRUNC('WEEK', CAST('2024-09-15' AS DATE)))",
3295+
},
3296+
)
3297+
self.validate_all(
3298+
"SELECT DATE_DIFF(DATE '2024-01-01', DATE '2024-01-15', WEEK(SUNDAY))",
3299+
write={
3300+
"bigquery": "SELECT DATE_DIFF(CAST('2024-01-01' AS DATE), CAST('2024-01-15' AS DATE), WEEK)",
3301+
"duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE) + INTERVAL '1' DAY), DATE_TRUNC('WEEK', CAST('2024-01-01' AS DATE) + INTERVAL '1' DAY))",
3302+
},
3303+
)
3304+
self.validate_all(
3305+
"SELECT DATE_DIFF(DATE '2023-05-01', DATE '2024-01-15', ISOWEEK)",
3306+
write={
3307+
"bigquery": "SELECT DATE_DIFF(CAST('2023-05-01' AS DATE), CAST('2024-01-15' AS DATE), ISOWEEK)",
3308+
"duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE)), DATE_TRUNC('WEEK', CAST('2023-05-01' AS DATE)))",
3309+
},
3310+
)
3311+
self.validate_all(
3312+
"SELECT DATE_DIFF(DATE '2024-01-01', DATE '2024-01-15', DAY)",
3313+
write={
3314+
"bigquery": "SELECT DATE_DIFF(CAST('2024-01-01' AS DATE), CAST('2024-01-15' AS DATE), DAY)",
3315+
"duckdb": "SELECT DATE_DIFF('DAY', CAST('2024-01-15' AS DATE), CAST('2024-01-01' AS DATE))",
3316+
},
3317+
)
3318+
32443319
def test_approx_qunatiles(self):
32453320
self.validate_identity("APPROX_QUANTILES(foo, 2)")
32463321
self.validate_identity("APPROX_QUANTILES(DISTINCT foo, 2 RESPECT NULLS)")

0 commit comments

Comments
 (0)