From 5949dedd58bc3669a8056f3f45007674948c8f04 Mon Sep 17 00:00:00 2001 From: Piotr Szul Date: Thu, 30 Apr 2026 11:55:29 +1000 Subject: [PATCH 1/5] Add spec tests for repeat with complex nesting scenarios Co-Authored-By: Claude Sonnet 4.6 (1M context) --- tests/spec-tests/repeat.json | 1035 ++++++++++++++++++++++++++++++++++ 1 file changed, 1035 insertions(+) create mode 100644 tests/spec-tests/repeat.json diff --git a/tests/spec-tests/repeat.json b/tests/spec-tests/repeat.json new file mode 100644 index 0000000..5b9c642 --- /dev/null +++ b/tests/spec-tests/repeat.json @@ -0,0 +1,1035 @@ +{ + "title": "repeat", + "description": "Recursive traversal with repeat directive", + "fhirVersion": ["5.0.0", "4.0.1", "3.0.2"], + "resources": [ + { + "resourceType": "Questionnaire", + "id": "q1", + "item": [ + { + "linkId": "g1", + "text": "Group 1", + "type": "group", + "item": [ + { + "linkId": "g1.1", + "text": "Question 1.1", + "type": "string", + "item": [ + { + "linkId": "g1.1.1", + "text": "Sub-question 1.1.1", + "type": "string" + } + ] + } + ] + }, + { + "linkId": "g2", + "text": "Group 2", + "type": "group" + } + ] + }, + { + "resourceType": "QuestionnaireResponse", + "id": "qr1", + "item": [ + { + "linkId": "1", + "text": "Group 1", + "item": [ + { + "linkId": "1.1", + "text": "Question 1.1", + "answer": [ + { + "valueString": "Answer 1.1", + "item": [ + { + "linkId": "1.1.1", + "text": "Follow-up to 1.1" + } + ] + } + ] + }, + { + "linkId": "1.2", + "text": "Question 1.2", + "item": [ + { + "linkId": "1.2.1", + "text": "Question 1.2.1" + } + ] + } + ] + }, + { + "linkId": "2", + "text": "Group 2" + } + ] + } + ], + "tests": [ + { + "title": "basic", + "tags": ["shareable"], + "view": { + "resource": "QuestionnaireResponse", + "status": "active", + "select": [ + { + "column": [ + { + "name": "id", + "path": "id", + "type": "id" + } + ] + }, + { + "repeat": ["item"], + "column": [ + { + "name": "linkId", + "path": "linkId", + "type": "string" + }, + { + "name": "text", + "path": "text", + "type": "string" + } + ] + } + ] + }, + "expect": [ + { + "id": "qr1", + "linkId": "1", + "text": "Group 1" + }, + { + "id": "qr1", + "linkId": "1.1", + "text": "Question 1.1" + }, + { + "id": "qr1", + "linkId": "1.2", + "text": "Question 1.2" + }, + { + "id": "qr1", + "linkId": "1.2.1", + "text": "Question 1.2.1" + }, + { + "id": "qr1", + "linkId": "2", + "text": "Group 2" + } + ] + }, + { + "title": "item and answer.item", + "tags": ["shareable"], + "view": { + "resource": "QuestionnaireResponse", + "status": "active", + "select": [ + { + "column": [ + { + "name": "id", + "path": "id", + "type": "id" + } + ] + }, + { + "repeat": ["item", "answer.item"], + "column": [ + { + "name": "linkId", + "path": "linkId", + "type": "string" + }, + { + "name": "text", + "path": "text", + "type": "string" + } + ] + } + ] + }, + "expect": [ + { + "id": "qr1", + "linkId": "1", + "text": "Group 1" + }, + { + "id": "qr1", + "linkId": "1.1", + "text": "Question 1.1" + }, + { + "id": "qr1", + "linkId": "1.1.1", + "text": "Follow-up to 1.1" + }, + { + "id": "qr1", + "linkId": "1.2", + "text": "Question 1.2" + }, + { + "id": "qr1", + "linkId": "1.2.1", + "text": "Question 1.2.1" + }, + { + "id": "qr1", + "linkId": "2", + "text": "Group 2" + } + ] + }, + { + "title": "empty expression", + "tags": ["shareable"], + "view": { + "resource": "QuestionnaireResponse", + "status": "active", + "select": [ + { + "column": [ + { + "name": "id", + "path": "id", + "type": "id" + } + ] + }, + { + "repeat": ["jurisdiction"], + "column": [ + { + "name": "code", + "path": "coding.code", + "type": "code" + } + ] + } + ] + }, + "expect": [] + }, + { + "title": "empty child expression", + "tags": ["shareable"], + "view": { + "resource": "QuestionnaireResponse", + "status": "active", + "select": [ + { + "column": [ + { + "name": "id", + "path": "id", + "type": "id" + } + ] + }, + { + "repeat": ["item"], + "column": [ + { + "name": "linkId", + "path": "linkId", + "type": "string" + }, + { + "name": "definition", + "path": "definition", + "type": "uri" + } + ] + } + ] + }, + "expect": [ + { + "id": "qr1", + "linkId": "1", + "definition": null + }, + { + "id": "qr1", + "linkId": "1.1", + "definition": null + }, + { + "id": "qr1", + "linkId": "1.2", + "definition": null + }, + { + "id": "qr1", + "linkId": "1.2.1", + "definition": null + }, + { + "id": "qr1", + "linkId": "2", + "definition": null + } + ] + }, + { + "title": "combined with forEach", + "tags": ["shareable"], + "view": { + "resource": "QuestionnaireResponse", + "status": "active", + "select": [ + { + "column": [ + { + "name": "id", + "path": "id", + "type": "id" + } + ] + }, + { + "repeat": ["item"], + "select": [ + { + "column": [ + { + "name": "linkId", + "path": "linkId", + "type": "string" + } + ] + }, + { + "forEach": "answer", + "column": [ + { + "name": "answerValue", + "path": "value.ofType(string)", + "type": "string" + } + ] + } + ] + } + ] + }, + "expect": [ + { + "id": "qr1", + "linkId": "1.1", + "answerValue": "Answer 1.1" + } + ] + }, + { + "title": "combined with forEachOrNull", + "tags": ["shareable"], + "view": { + "resource": "QuestionnaireResponse", + "status": "active", + "select": [ + { + "column": [ + { + "name": "id", + "path": "id", + "type": "id" + } + ] + }, + { + "repeat": ["item"], + "select": [ + { + "column": [ + { + "name": "linkId", + "path": "linkId", + "type": "string" + } + ] + }, + { + "forEachOrNull": "answer", + "column": [ + { + "name": "answerValue", + "path": "value.ofType(string)", + "type": "string" + } + ] + } + ] + } + ] + }, + "expect": [ + { + "id": "qr1", + "linkId": "1", + "answerValue": null + }, + { + "id": "qr1", + "linkId": "1.1", + "answerValue": "Answer 1.1" + }, + { + "id": "qr1", + "linkId": "1.2", + "answerValue": null + }, + { + "id": "qr1", + "linkId": "1.2.1", + "answerValue": null + }, + { + "id": "qr1", + "linkId": "2", + "answerValue": null + } + ] + }, + { + "title": "combined with unionAll", + "tags": ["shareable"], + "view": { + "resource": "QuestionnaireResponse", + "status": "active", + "select": [ + { + "column": [ + { + "name": "id", + "path": "id", + "type": "id" + } + ] + }, + { + "unionAll": [ + { + "repeat": ["item"], + "column": [ + { + "name": "type", + "path": "'item'", + "type": "string" + }, + { + "name": "linkId", + "path": "linkId", + "type": "string" + }, + { + "name": "text", + "path": "text", + "type": "string" + } + ] + }, + { + "repeat": ["item", "answer.item"], + "column": [ + { + "name": "type", + "path": "'answer-item'", + "type": "string" + }, + { + "name": "linkId", + "path": "linkId", + "type": "string" + }, + { + "name": "text", + "path": "text", + "type": "string" + } + ] + } + ] + } + ] + }, + "expect": [ + { + "id": "qr1", + "type": "item", + "linkId": "1", + "text": "Group 1" + }, + { + "id": "qr1", + "type": "item", + "linkId": "1.1", + "text": "Question 1.1" + }, + { + "id": "qr1", + "type": "item", + "linkId": "1.2", + "text": "Question 1.2" + }, + { + "id": "qr1", + "type": "item", + "linkId": "1.2.1", + "text": "Question 1.2.1" + }, + { + "id": "qr1", + "type": "item", + "linkId": "2", + "text": "Group 2" + }, + { + "id": "qr1", + "type": "answer-item", + "linkId": "1", + "text": "Group 1" + }, + { + "id": "qr1", + "type": "answer-item", + "linkId": "1.1", + "text": "Question 1.1" + }, + { + "id": "qr1", + "type": "answer-item", + "linkId": "1.1.1", + "text": "Follow-up to 1.1" + }, + { + "id": "qr1", + "type": "answer-item", + "linkId": "1.2", + "text": "Question 1.2" + }, + { + "id": "qr1", + "type": "answer-item", + "linkId": "1.2.1", + "text": "Question 1.2.1" + }, + { + "id": "qr1", + "type": "answer-item", + "linkId": "2", + "text": "Group 2" + } + ] + }, + { + "title": "repeat inside forEach", + "tags": ["shareable"], + "view": { + "resource": "Questionnaire", + "status": "active", + "select": [ + { + "column": [ + { + "name": "id", + "path": "id", + "type": "id" + } + ] + }, + { + "forEach": "item", + "select": [ + { + "column": [ + { + "name": "groupLinkId", + "path": "linkId", + "type": "string" + } + ] + }, + { + "repeat": ["item"], + "column": [ + { + "name": "linkId", + "path": "linkId", + "type": "string" + }, + { + "name": "text", + "path": "text", + "type": "string" + } + ] + } + ] + } + ] + }, + "expect": [ + { + "id": "q1", + "groupLinkId": "g1", + "linkId": "g1.1", + "text": "Question 1.1" + }, + { + "id": "q1", + "groupLinkId": "g1", + "linkId": "g1.1.1", + "text": "Sub-question 1.1.1" + } + ] + }, + { + "title": "repeat inside repeat", + "tags": ["shareable"], + "view": { + "resource": "Questionnaire", + "status": "active", + "select": [ + { + "column": [ + { + "name": "id", + "path": "id", + "type": "id" + } + ] + }, + { + "repeat": ["item"], + "select": [ + { + "column": [ + { + "name": "ancestorLinkId", + "path": "linkId", + "type": "string" + } + ] + }, + { + "repeat": ["item"], + "column": [ + { + "name": "descendantLinkId", + "path": "linkId", + "type": "string" + } + ] + } + ] + } + ] + }, + "expect": [ + { + "id": "q1", + "ancestorLinkId": "g1", + "descendantLinkId": "g1.1" + }, + { + "id": "q1", + "ancestorLinkId": "g1", + "descendantLinkId": "g1.1.1" + }, + { + "id": "q1", + "ancestorLinkId": "g1.1", + "descendantLinkId": "g1.1.1" + } + ] + }, + { + "title": "repeat inside forEachOrNull", + "tags": ["shareable"], + "view": { + "resource": "Questionnaire", + "status": "active", + "select": [ + { + "column": [ + { + "name": "id", + "path": "id", + "type": "id" + } + ] + }, + { + "forEachOrNull": "item", + "select": [ + { + "column": [ + { + "name": "groupLinkId", + "path": "linkId", + "type": "string" + } + ] + }, + { + "repeat": ["item"], + "column": [ + { + "name": "linkId", + "path": "linkId", + "type": "string" + } + ] + } + ] + } + ] + }, + "expect": [ + { + "id": "q1", + "groupLinkId": "g1", + "linkId": "g1.1" + }, + { + "id": "q1", + "groupLinkId": "g1", + "linkId": "g1.1.1" + } + ] + }, + { + "title": "sibling repeats at top level", + "tags": ["shareable"], + "view": { + "resource": "Questionnaire", + "status": "active", + "select": [ + { + "column": [ + { + "name": "id", + "path": "id", + "type": "id" + } + ] + }, + { + "repeat": ["item"], + "column": [ + { + "name": "linkIdA", + "path": "linkId", + "type": "string" + } + ] + }, + { + "repeat": ["item"], + "column": [ + { + "name": "linkIdB", + "path": "linkId", + "type": "string" + } + ] + } + ] + }, + "expect": [ + {"id": "q1", "linkIdA": "g1", "linkIdB": "g1"}, + {"id": "q1", "linkIdA": "g1", "linkIdB": "g1.1"}, + {"id": "q1", "linkIdA": "g1", "linkIdB": "g1.1.1"}, + {"id": "q1", "linkIdA": "g1", "linkIdB": "g2"}, + {"id": "q1", "linkIdA": "g1.1", "linkIdB": "g1"}, + {"id": "q1", "linkIdA": "g1.1", "linkIdB": "g1.1"}, + {"id": "q1", "linkIdA": "g1.1", "linkIdB": "g1.1.1"}, + {"id": "q1", "linkIdA": "g1.1", "linkIdB": "g2"}, + {"id": "q1", "linkIdA": "g1.1.1", "linkIdB": "g1"}, + {"id": "q1", "linkIdA": "g1.1.1", "linkIdB": "g1.1"}, + {"id": "q1", "linkIdA": "g1.1.1", "linkIdB": "g1.1.1"}, + {"id": "q1", "linkIdA": "g1.1.1", "linkIdB": "g2"}, + {"id": "q1", "linkIdA": "g2", "linkIdB": "g1"}, + {"id": "q1", "linkIdA": "g2", "linkIdB": "g1.1"}, + {"id": "q1", "linkIdA": "g2", "linkIdB": "g1.1.1"}, + {"id": "q1", "linkIdA": "g2", "linkIdB": "g2"} + ] + }, + { + "title": "sibling repeats inside forEach", + "tags": ["shareable"], + "view": { + "resource": "Questionnaire", + "status": "active", + "select": [ + { + "column": [ + { + "name": "id", + "path": "id", + "type": "id" + } + ] + }, + { + "forEach": "item", + "select": [ + { + "column": [ + { + "name": "groupLinkId", + "path": "linkId", + "type": "string" + } + ] + }, + { + "repeat": ["item"], + "column": [ + { + "name": "linkIdA", + "path": "linkId", + "type": "string" + } + ] + }, + { + "repeat": ["item"], + "column": [ + { + "name": "linkIdB", + "path": "linkId", + "type": "string" + } + ] + } + ] + } + ] + }, + "expect": [ + {"id": "q1", "groupLinkId": "g1", "linkIdA": "g1.1", "linkIdB": "g1.1"}, + {"id": "q1", "groupLinkId": "g1", "linkIdA": "g1.1", "linkIdB": "g1.1.1"}, + {"id": "q1", "groupLinkId": "g1", "linkIdA": "g1.1.1", "linkIdB": "g1.1"}, + {"id": "q1", "groupLinkId": "g1", "linkIdA": "g1.1.1", "linkIdB": "g1.1.1"} + ] + }, + { + "title": "top-level repeat with sibling forEach containing repeat", + "tags": ["shareable"], + "view": { + "resource": "Questionnaire", + "status": "active", + "select": [ + { + "column": [ + { + "name": "id", + "path": "id", + "type": "id" + } + ] + }, + { + "repeat": ["item"], + "column": [ + { + "name": "topLinkId", + "path": "linkId", + "type": "string" + } + ] + }, + { + "forEach": "item", + "select": [ + { + "column": [ + { + "name": "groupLinkId", + "path": "linkId", + "type": "string" + } + ] + }, + { + "repeat": ["item"], + "column": [ + { + "name": "innerLinkId", + "path": "linkId", + "type": "string" + } + ] + } + ] + } + ] + }, + "expect": [ + {"id": "q1", "topLinkId": "g1", "groupLinkId": "g1", "innerLinkId": "g1.1"}, + {"id": "q1", "topLinkId": "g1", "groupLinkId": "g1", "innerLinkId": "g1.1.1"}, + {"id": "q1", "topLinkId": "g1.1", "groupLinkId": "g1", "innerLinkId": "g1.1"}, + {"id": "q1", "topLinkId": "g1.1", "groupLinkId": "g1", "innerLinkId": "g1.1.1"}, + {"id": "q1", "topLinkId": "g1.1.1", "groupLinkId": "g1", "innerLinkId": "g1.1"}, + {"id": "q1", "topLinkId": "g1.1.1", "groupLinkId": "g1", "innerLinkId": "g1.1.1"}, + {"id": "q1", "topLinkId": "g2", "groupLinkId": "g1", "innerLinkId": "g1.1"}, + {"id": "q1", "topLinkId": "g2", "groupLinkId": "g1", "innerLinkId": "g1.1.1"} + ] + }, + { + "title": "forEach with repeat with forEach (triple nesting)", + "tags": ["shareable"], + "view": { + "resource": "QuestionnaireResponse", + "status": "active", + "select": [ + { + "column": [ + { + "name": "id", + "path": "id", + "type": "id" + } + ] + }, + { + "forEach": "item", + "select": [ + { + "column": [ + { + "name": "outerLinkId", + "path": "linkId", + "type": "string" + } + ] + }, + { + "repeat": ["item"], + "select": [ + { + "column": [ + { + "name": "midLinkId", + "path": "linkId", + "type": "string" + } + ] + }, + { + "forEach": "answer", + "column": [ + { + "name": "answerValue", + "path": "value.ofType(string)", + "type": "string" + } + ] + } + ] + } + ] + } + ] + }, + "expect": [ + { + "id": "qr1", + "outerLinkId": "1", + "midLinkId": "1.1", + "answerValue": "Answer 1.1" + } + ] + }, + { + "title": "repeat with forEach with repeat (triple nesting)", + "tags": ["shareable"], + "view": { + "resource": "QuestionnaireResponse", + "status": "active", + "select": [ + { + "column": [ + { + "name": "id", + "path": "id", + "type": "id" + } + ] + }, + { + "repeat": ["item"], + "select": [ + { + "column": [ + { + "name": "outerLinkId", + "path": "linkId", + "type": "string" + } + ] + }, + { + "forEach": "answer", + "select": [ + { + "column": [ + { + "name": "midValue", + "path": "value.ofType(string)", + "type": "string" + } + ] + }, + { + "repeat": ["item"], + "column": [ + { + "name": "innerLinkId", + "path": "linkId", + "type": "string" + } + ] + } + ] + } + ] + } + ] + }, + "expect": [ + { + "id": "qr1", + "outerLinkId": "1.1", + "midValue": "Answer 1.1", + "innerLinkId": "1.1.1" + } + ] + } + ] +} From 103c62f218aea6b7804a0eaa20467c7176297644 Mon Sep 17 00:00:00 2001 From: Piotr Szul Date: Thu, 30 Apr 2026 12:12:13 +1000 Subject: [PATCH 2/5] Implement repeat with fixed-depth list_cat unrolling. The vectorised list_cat approach processes all rows in a single pipeline pass. Depth defaults to 5 and is configurable via _nestedRepeatDepth. Co-Authored-By: Claude Sonnet 4.6 (1M context) --- src/ddb-sql-builder.js | 341 +++++++++++++++++++++++++++++++++++++++-- src/query-builder.js | 39 ++++- src/view-parser.js | 22 ++- tests/view.test.js | 54 +++++++ 4 files changed, 436 insertions(+), 20 deletions(-) diff --git a/src/ddb-sql-builder.js b/src/ddb-sql-builder.js index 238e1e8..0e95b6f 100644 --- a/src/ddb-sql-builder.js +++ b/src/ddb-sql-builder.js @@ -17,11 +17,27 @@ export function tablesToSql(tables) { } -export function astToSql(node, inLambda, inputType={}) { +let _astToSqlOptions = {}; + +export function astToSql(node, inLambda, inputType={}, options) { + if (options !== undefined) _astToSqlOptions = options; function flattenSql(querySegments) { if (!querySegments) return; if (!Array.isArray(querySegments)) querySegments = [querySegments]; + + // _repeat substitution: when a segment is marked isRepeat, replace __INPUT__ + // in its SQL with the joined SQL of preceding nav segments, then drop those + // preceding segments (they are now embedded in the repeat expression). + for (let i = 0; i < querySegments.length; i++) { + if (querySegments[i] && querySegments[i].outputType && querySegments[i].outputType.isRepeat) { + const prevSql = querySegments.slice(0, i).map(s => s && s.sql).filter(s => !!s).join("."); + querySegments[i].sql = querySegments[i].sql.replace(/__INPUT__/g, prevSql); + querySegments.splice(0, i); + break; + } + } + //group nav path items with parens let inNav; querySegments.map( (s,i) => { @@ -69,14 +85,32 @@ export function astToSql(node, inLambda, inputType={}) { case 'nav': if (inLambda) { - sql = `(el.${node.value})` + const lambdaVar = _astToSqlOptions._lambdaVar || 'el'; + if (_astToSqlOptions._jsonScope) { + // JSON-scope: lambda var holds a JSON value (because the iterated array + // is a top-level field retyped to JSON[] for an enclosing repeat). Use + // ->> for scalar leaves (returns VARCHAR) and -> for nested/array access + // (returns JSON). + const op = (node.type.isArray || (node.type.fhirType && node.type.fhirType[0] === node.type.fhirType[0].toUpperCase())) + ? '->' + : '->>'; + sql = `(${lambdaVar}${op}'${node.value}')`; + } else { + sql = `(${lambdaVar}.${node.value})`; + } outputType = {fhirType: node.type.fhirType, isArray: node.type.isArray, isNav: false} } else if (inputType.fhirType && inputType.isArray) { sql = `list_transform(el -> el.${node.value})${node.type.isArray ? ".flatten()" : ""}`; outputType = {fhirType: node.type.fhirType, isArray: true, isNav: false} } else { sql = node.value; - outputType = {fhirType: node.type.fhirType, isArray: node.type.isArray, isNav: true}; + const isJsonField = _astToSqlOptions._jsonScopeFields && _astToSqlOptions._jsonScopeFields.has(node.value); + outputType = { + fhirType: node.type.fhirType, + isArray: node.type.isArray, + isNav: true, + json: isJsonField || undefined + }; } return {sql, outputType} @@ -115,7 +149,7 @@ export function astToSql(node, inLambda, inputType={}) { case 'this': return { - sql: inLambda ? "el" : "", + sql: inLambda ? (_astToSqlOptions._lambdaVar || "el") : "", outputType: inputType } @@ -133,7 +167,10 @@ export function astToSql(node, inLambda, inputType={}) { sql = `list_aggregate('string_agg', ${(firstArg && firstArg.value) || "''"}).ifnull2('')`; return {sql, outputType: {fhirType: "string", isArray: false}} - case 'where': + case 'where': { + const savedLv = _astToSqlOptions._lambdaVar; + _astToSqlOptions._lambdaVar = undefined; + let sql, outputType; if (inputType && inputType.isArray) { sql = `list_filter(el -> ${flattenSql(astToSql(firstArg, true)).sql})`; outputType = {fhirType: inputType.fhirType, isArray: true} @@ -141,10 +178,12 @@ export function astToSql(node, inLambda, inputType={}) { sql = `as_list().list_filter(el -> ${flattenSql(astToSql(firstArg, true)).sql}).slice(1)`; outputType = {fhirType: inputType.fhirType, isArray: false} } else { - sql = flattenSql(astToSql(firstArg)).sql; + sql = flattenSql(astToSql(firstArg)).sql; outputType = {fhirType: "boolean_expr", isArray: false} } - return {sql, outputType} + _astToSqlOptions._lambdaVar = savedLv; + return {sql, outputType}; + } case 'not': sql = inputType.isArray @@ -179,7 +218,7 @@ export function astToSql(node, inLambda, inputType={}) { outputType: {isArray: true, fhirType: "string"} } : { - sql: `${inLambda ? "el." : ""}parse_path('/')[${firstArg.value}]`, + sql: `${inLambda ? `${_astToSqlOptions._lambdaVar || 'el'}.` : ""}parse_path('/')[${firstArg.value}]`, outputType: {isArray: false, fhirType: "string"} } @@ -208,27 +247,129 @@ export function astToSql(node, inLambda, inputType={}) { //non-standard case '_forEach': - case '_forEachOrNull': + case '_forEachOrNull': { + const orNullSql = node.name == "_forEachOrNull" ? ".ifnull2([NULL])" : "" - //TODO: error if each arg is not a col function - const orNullSql = node.name == "_forEachOrNull" - ? ".ifnull2([NULL])" - : "" - if (!inputType.fhirType) { const cols = node.args.map(a => astToSql(a, inLambda, inputType)).map(flattenSql).map(a => a.sql).join(","); sql = `{${cols}}`; outputType = {fhirType: inputType.fhirType, isArray: false}; } else if (inputType.fhirType && !inputType.isArray) { + const savedLv = _astToSqlOptions._lambdaVar; + const savedScope = _astToSqlOptions._inLambdaScope; + _astToSqlOptions._lambdaVar = undefined; + _astToSqlOptions._inLambdaScope = true; const cols = node.args.map(a => astToSql(a, true, inputType)).map(flattenSql).map(a => a.sql).join(","); + _astToSqlOptions._lambdaVar = savedLv; + _astToSqlOptions._inLambdaScope = savedScope; sql = `as_list().list_transform(el -> {${cols}})${orNullSql}`; outputType = {fhirType: inputType.fhirType, isArray: true}; } else { + const lv = _astToSqlOptions._lambdaVar || 'el'; + const savedLv = _astToSqlOptions._lambdaVar; + const savedScope = _astToSqlOptions._inLambdaScope; + const savedJsonScope = _astToSqlOptions._jsonScope; + _astToSqlOptions._lambdaVar = undefined; + _astToSqlOptions._inLambdaScope = true; + if (inputType.json) _astToSqlOptions._jsonScope = true; const cols = node.args.map(a => astToSql(a, true, inputType)).map(flattenSql).map(a => a.sql).join(","); - sql = `${inLambda ? "el.as_list()." : ""}list_transform(el -> {${cols}})${orNullSql}`; + _astToSqlOptions._lambdaVar = savedLv; + _astToSqlOptions._inLambdaScope = savedScope; + _astToSqlOptions._jsonScope = savedJsonScope; + sql = `${inLambda ? `${lv}.as_list().` : ""}list_transform(el -> {${cols}})${orNullSql}`; outputType = {fhirType: inputType.fhirType, isArray: true}; } return {sql, outputType} + } + + //non-standard + case '_repeat': { + // Split args into repeat-path string literals (positional 0..N-1) and column args (the rest). + const repeatPaths = []; + const repeatColArgs = []; + for (const arg of node.args) { + const firstNode = arg[0]; + if (firstNode && firstNode.segmentType === 'literal' && firstNode.type.fhirType === 'string') { + repeatPaths.push(firstNode.value.replace(/^'|'$/g, '')); + } else { + repeatColArgs.push(arg); + } + } + + // Detect nesting depth via the existing _lambdaVar. Top-level _repeat sees + // _lambdaVar unset (or non-_ri); a nested _repeat sees the outer's '_ri' + // (or '_ri2', '_ri3'...). We allocate a unique lambda var per nesting depth + // because DuckDB lambdas don't reliably shadow same-named outer columns — + // `_ri.linkId` inside a `_ri -> ...` lambda may bind to the outer projection's + // `_ri` column rather than the lambda's. Numbering avoids the collision. + const outerLv = _astToSqlOptions._lambdaVar; + // Track repeat nesting independently of _lambdaVar, because an intervening + // _forEach resets _lambdaVar to undefined. Without this, a `repeat > forEach + // > repeat` shape would re-use `_ri` for the inner repeat's lambda, clashing + // with the outer top-level repeat's SELECT-scope `AS _ri` column alias — + // DuckDB then parses `_ri -> {...}` as a JSON arrow on the outer column + // rather than a lambda definition. + const nestedDepth = (_astToSqlOptions._repeatLevel || 0) + 1; + const lv = nestedDepth === 1 ? '_ri' : `_ri${nestedDepth}`; + const lvJson = `${lv}_json`; + + // Build the column projection with `lv` as lambda var. + // `lv` (e.g. _ri) holds the result of json_transform, which is a typed + // STRUCT — not raw JSON — so reset _jsonScope while building the + // projection. Without this reset, an enclosing forEach's _jsonScope=true + // would leak in and cause JSON arrows (->/->>) to be emitted on `_ri`, + // which fails for nested struct/array fields. + const savedJsonScope = _astToSqlOptions._jsonScope; + const savedRepeatLevel = _astToSqlOptions._repeatLevel; + _astToSqlOptions._lambdaVar = lv; + _astToSqlOptions._jsonScope = false; + _astToSqlOptions._repeatLevel = nestedDepth; + const projection = repeatColArgs.map(a => astToSql(a, true, inputType)).map(flattenSql).map(a => a.sql).join(", "); + _astToSqlOptions._lambdaVar = outerLv; + _astToSqlOptions._jsonScope = savedJsonScope; + _astToSqlOptions._repeatLevel = savedRepeatLevel; + + // Build the json_transform schema for `lv` from the column args. + const jsonSchema = buildRepeatTransformSchema(repeatColArgs); + + // Fixed-depth list_cat unrolling. Each level applies the recursive step + // once to the previous level's items, then list_cat merges every level + // into a single flat array. Vectorised: DuckDB processes all rows + // through each level in a single pipeline pass (vs. inline WITH RECURSIVE, + // which executes once per row as a correlated subquery and OOMs on + // large datasets). Trade-off: depth is bounded; nodes deeper than the + // configured depth are silently dropped. Override via + // `_astToSqlOptions._nestedRepeatDepth`. + const depth = _astToSqlOptions._nestedRepeatDepth || 5; + const stepVar = `n${nestedDepth}`; + const stepParts = repeatPaths.map(p => jsonNav(stepVar, p)); + const step = stepParts.length === 1 + ? stepParts[0] + : `list_cat(${stepParts.join(', ')})`; + + let level = `coalesce(CAST(__INPUT__ AS JSON[]), CAST([] AS JSON[]))`; + const levels = [level]; + for (let d = 1; d < depth; d++) { + level = `flatten(list_transform(${level}, ${stepVar} -> ${step}))`; + levels.push(level); + } + const allItems = levels.length === 1 + ? levels[0] + : `list_cat(${levels.join(', ')})`; + + sql = `coalesce(` + + `list_transform(` + + `list_transform(${allItems}, ${lvJson} -> json_transform(${lvJson}, '${jsonSchema}')), ` + + `${lv} -> {${projection}}` + + `), [])`; + outputType = { + fhirType: inputType.fhirType || 'BackboneElement', + isArray: true, + isRepeat: true, + nullHandled: true + }; + return {sql, outputType}; + } //non-standard case '_unionAll': @@ -326,3 +467,173 @@ export function pathsToSchema(node, isInRoot=true) { } return isInRoot ? `${node.value}: '${sqlType}'` : `${node.value} ${sqlType}` }; + +function jsonNav(nodeRef, pathStr) { + // Translate a dotted path into nested JSON array navigation. + // Single segment: coalesce(CAST(->'seg' AS JSON[]), CAST([] AS JSON[])) + // Multi-segment: flattens through intermediate arrays via list_transform. + const segments = pathStr.split("."); + let expr = nodeRef; + for (let i = 0; i < segments.length; i++) { + const seg = segments[i]; + if (i === 0) { + expr = `coalesce(CAST(${expr}->'${seg}' AS JSON[]), CAST([] AS JSON[]))`; + } else { + expr = `coalesce(flatten(list_transform(${expr}, _a -> coalesce(CAST(_a->'${seg}' AS JSON[]), CAST([] AS JSON[])))), CAST([] AS JSON[]))`; + } + } + return expr; +} + +function buildRepeatTransformSchema(colArgs) { + // Walk colArgs (each is an AST array for one column) and collect the leaf + // navigation paths. Build a json_transform schema string. + // For Task 6: only direct _col / _col_collection with simple nav paths. + // Extended in later tasks for nested forEach / repeat. + + const tree = {}; // {fieldName: {fhirType, isArray, children:{...}}} + + function addPathToTree(pathAst, root) { + let cursor = root; + for (const segment of pathAst) { + if (segment.segmentType === 'nav') { + if (!cursor[segment.value]) { + cursor[segment.value] = { + fhirType: segment.type.fhirType, + isArray: segment.type.isArray, + children: {} + }; + } + cursor = cursor[segment.value].children; + } + } + } + + function walk(arg) { + if (!Array.isArray(arg)) return; + for (let i = 0; i < arg.length; i++) { + const node = arg[i]; + if (!node) continue; + if (node.segmentType === 'nav') { + // Look ahead: is this nav followed by a _forEach/_forEachOrNull? + const next = arg[i + 1]; + if (next && next.segmentType === 'fn' && (next.name === '_forEach' || next.name === '_forEachOrNull')) { + if (!tree[node.value]) { + tree[node.value] = { + fhirType: node.type.fhirType, + isArray: true, + children: {} + }; + } + next.args.forEach(a => walkInto(a, tree[node.value].children)); + i++; // skip the consumed _forEach + } else if (next && next.segmentType === 'fn' && next.name === '_repeat') { + // Inner repeat: declare entry field as JSON[] so json_transform leaves + // the value as a JSON list that the inner _repeat seed can consume. + tree[node.value] = { + fhirType: 'JSON', + isArray: true, + children: {}, + json: true + }; + i++; + } + } else if (node.segmentType === 'fn') { + if (node.name === '_col' || node.name === '_col_collection') { + // args[1] may contain nav segments followed by _forEach/_forEachOrNull. + // Walk it with look-ahead so nested forEach builds typed substructure. + // IMPORTANT: walk MUST run before addPathToTree. Both helpers guard with + // `if (!tree[node.value])`, so they are idempotent individually but not + // commutative: walk installs the nav-then-_forEach key as a typed array + // with children; addPathToTree would otherwise install it as an untyped + // leaf and walk would skip it on the second pass, dropping the + // substructure that downstream `_ri.` access depends on. + walk(node.args[1]); + addPathToTree(node.args[1], tree); + } else if (node.name === '_forEach' || node.name === '_forEachOrNull') { + node.args.forEach(walk); + } + } + } + } + + function walkInto(arg, subTree) { + // Same as walk but builds into a given subtree. + if (!Array.isArray(arg)) return; + for (let i = 0; i < arg.length; i++) { + const node = arg[i]; + if (!node) continue; + if (node.segmentType === 'nav') { + const next = arg[i + 1]; + if (next && next.segmentType === 'fn' && (next.name === '_forEach' || next.name === '_forEachOrNull')) { + if (!subTree[node.value]) { + subTree[node.value] = { + fhirType: node.type.fhirType, + isArray: true, + children: {} + }; + } + next.args.forEach(a => walkInto(a, subTree[node.value].children)); + i++; + } else if (next && next.segmentType === 'fn' && next.name === '_repeat') { + subTree[node.value] = { + fhirType: 'JSON', + isArray: true, + children: {}, + json: true + }; + i++; + } + } else if (node.segmentType === 'fn') { + if (node.name === '_col' || node.name === '_col_collection') { + walkInto(node.args[1], subTree); + addPathIntoTree(node.args[1], subTree); + } else if (node.name === '_forEach' || node.name === '_forEachOrNull') { + node.args.forEach(a => walkInto(a, subTree)); + } + } + } + } + + function addPathIntoTree(pathAst, root) { + let cursor = root; + for (const segment of pathAst) { + if (segment.segmentType === 'nav') { + if (!cursor[segment.value]) { + cursor[segment.value] = { + fhirType: segment.type.fhirType, + isArray: segment.type.isArray, + children: {} + }; + } + cursor = cursor[segment.value].children; + } + } + } + + colArgs.forEach(walk); + + return formatJsonTransformSchema(tree); +} + +function formatJsonTransformSchema(tree) { + function nodeType(node) { + if (node.json) { + // Declare as JSON (DuckDB json_transform supports "JSON" as a type marker + // that leaves the value as a JSON value rather than coercing to a typed scalar). + return node.isArray ? `["JSON"]` : `"JSON"`; + } + if (Object.keys(node.children).length > 0) { + const inner = `{${Object.entries(node.children).map(([k, v]) => `"${k}":${nodeType(v)}`).join(',')}}`; + return node.isArray ? `[${inner}]` : inner; + } + let leaf; + if (node.fhirType === 'decimal') leaf = 'DOUBLE'; + else if (node.fhirType === 'boolean') leaf = 'BOOLEAN'; + else if (node.fhirType === 'integer') leaf = 'INTEGER'; + else leaf = 'VARCHAR'; + return node.isArray ? `["${leaf}"]` : `"${leaf}"`; + } + const inner = Object.entries(tree).map(([k, v]) => `"${k}":${nodeType(v)}`).join(','); + return `{${inner}}`; +} diff --git a/src/query-builder.js b/src/query-builder.js index 780683c..d93843b 100644 --- a/src/query-builder.js +++ b/src/query-builder.js @@ -8,7 +8,10 @@ export function buildQuery(vd, schema, filterByResourceType, verbose, vars) { if (verbose) console.log(parsedVd.path) const fpAst = fhirpathToAst(parsedVd.path, vd.resource, schema, vars); - const fpSql = astToSql(fpAst).sql; + + const repeatEntryFields = collectRepeatEntryFields(vd); + const options = {_jsonScopeFields: new Set(repeatEntryFields)}; + const fpSql = astToSql(fpAst, false, {}, options).sql; const whereAsts = (vd.where||[]).map(w => w.path) .concat([filterByResourceType ? `resourceType = '${vd.resource}'` : null]) @@ -23,18 +26,46 @@ export function buildQuery(vd, schema, filterByResourceType, verbose, vars) { }).join(" and "); const schemaPaths = extractPathsFromAst({asts: [fpAst].concat(whereAsts)}); + + // For each repeat block, retype the entry path's first segment to JSON[] + // in the source schema so read_json_auto preserves the full nested data + // for JSON traversal in the recursive CTE. + for (const field of repeatEntryFields) { + const entryNode = schemaPaths.find(n => n.value === field); + if (entryNode) { + entryNode.fhirType = 'JSON'; + entryNode.children = []; + } + } + const schemaSql = pathsToSchema(schemaPaths) const outputSql = tablesToSql(parsedVd.tables); return {pathSql: fpSql, schemaSql, outputSql, whereSql} } +function collectRepeatEntryFields(vd) { + // Walk the VD tree, return the set of top-level field names that are + // entry paths for any repeat block (the first segment of repeat[0]). + const fields = new Set(); + function walk(node) { + if (!node || typeof node !== 'object') return; + if (node.repeat && Array.isArray(node.repeat) && node.repeat[0]) { + fields.add(node.repeat[0].split('.')[0]); + } + if (node.select) node.select.forEach(walk); + if (node.unionAll) node.unionAll.forEach(walk); + } + walk(vd); + return Array.from(fields); +} + //TODO: consider replacing this with a full template language export function templateToQuery(vd, schema, template, args=[], verbose, filterByResourceType, customMacros=null, vars=null) { - //Setting filterByResourceType to btrue can only be used if the schema for the + //Setting filterByResourceType to true can only be used if the schema for the //elements being use is compatible between all of the resources being read //(e.g., element with the same names have the same structure). This is used //in some of the tests that mix resource types. - + const queryParts = buildQuery(vd, schema, filterByResourceType, verbose, vars); const whereSql = queryParts.whereSql ? "WHERE " + queryParts.whereSql : ""; const schemaSql = queryParts.schemaSql ? `, columns=${queryParts.schemaSql}` : ""; @@ -61,4 +92,4 @@ export function templateToQuery(vd, schema, template, args=[], verbose, filterBy }) return template; -} \ No newline at end of file +} diff --git a/src/view-parser.js b/src/view-parser.js index 14afe9a..cce6379 100644 --- a/src/view-parser.js +++ b/src/view-parser.js @@ -20,7 +20,18 @@ export function validateVd(vd) { if (!node.select && !node.column && !node.unionAll) throw new Error("forEach and forEachOrNull elements must be used together with a column, select or unionAll element"); } - + + if (node.repeat) { + if (!Array.isArray(node.repeat) || !node.repeat.every(r => typeof r === "string")) + throw new Error("repeat must be an array of strings"); + if (node.repeat.length === 0) + throw new Error("repeat must contain at least one path"); + if (node.forEach || node.forEachOrNull) + throw new Error("repeat cannot be used with forEach or forEachOrNull"); + if (!node.select && !node.column && !node.unionAll) + throw new Error("repeat must be used with a column, select or unionAll element"); + } + //collection must be boolean if (node.select) { if (!Array.isArray(node.select)) @@ -88,6 +99,15 @@ export function parseVd(vd, skipValidation) { } function parseNode(node, isRoot, inUnion, parentTable) { + if (node.repeat) { + const repeatTable = !inUnion ? addTable("each", parentTable, false) : parentTable; + const rest = parseNode({...node, repeat: undefined}, false, false, repeatTable); + const firstPath = node.repeat[0]; + const pathArgs = node.repeat.map(p => `'${p}'`).join(", "); + const path = `${firstPath}._repeat(${pathArgs}, ${rest})`; + return !inUnion ? `_col_collection('${repeatTable}', ${path})` : path; + } + if (node.forEach || node.forEachOrNull) { const eachTable = !inUnion ? addTable(node.forEach ? "each" : "nullEach", parentTable, !!node.forEachOrNull) : parentTable; if (inUnion && node.forEachOrNull) updateTable(eachTable, true); diff --git a/tests/view.test.js b/tests/view.test.js index 8e87578..2be2a83 100644 --- a/tests/view.test.js +++ b/tests/view.test.js @@ -92,5 +92,59 @@ describe("parse view definitions into superpath", () => { }).toThrow(); }); + test("repeat must be an array of strings", () => { + expect(() => parseVd({resource: "QuestionnaireResponse", select: [{repeat: "item", column: [{name: "x"}]}]})).toThrow(/array of strings/); + expect(() => parseVd({resource: "QuestionnaireResponse", select: [{repeat: [1], column: [{name: "x"}]}]})).toThrow(/array of strings/); + }); + + test("repeat must contain at least one path", () => { + expect(() => parseVd({resource: "QuestionnaireResponse", select: [{repeat: [], column: [{name: "x"}]}]})).toThrow(/at least one path/); + }); + + test("repeat cannot be used with forEach or forEachOrNull", () => { + expect(() => parseVd({resource: "QuestionnaireResponse", select: [{repeat: ["item"], forEach: "item", column: [{name: "x"}]}]})).toThrow(/cannot be used with forEach/); + expect(() => parseVd({resource: "QuestionnaireResponse", select: [{repeat: ["item"], forEachOrNull: "item", column: [{name: "x"}]}]})).toThrow(/cannot be used with forEach/); + }); + + test("repeat must be used with column, select, or unionAll", () => { + expect(() => parseVd({resource: "QuestionnaireResponse", select: [{repeat: ["item"]}]})).toThrow(/repeat must be used with/); + }); + + test("repeat emits _repeat fhirpath function", () => { + const view = { + select: [{ + repeat: ["item"], + column: [{name: "linkId"}, {name: "text"}] + }] + }; + const result = parseVd(view, true).path; + const fp = `_forEach( + _col_collection('e_1', + item._repeat('item', + _col('linkId', linkId), + _col('text', text) + ) + ) + )`; + expect(result.replace(/\s*/g, "")).toEqual(fp.replace(/\s*/g, "")); + }); + + test("repeat with multiple paths emits all paths as args", () => { + const view = { + select: [{ + repeat: ["item", "answer.item"], + column: [{name: "linkId"}] + }] + }; + const result = parseVd(view, true).path; + const fp = `_forEach( + _col_collection('e_1', + item._repeat('item', 'answer.item', + _col('linkId', linkId) + ) + ) + )`; + expect(result.replace(/\s*/g, "")).toEqual(fp.replace(/\s*/g, "")); + }); }); \ No newline at end of file From 85c980784ef15b829496005b0af56ef335ca906c Mon Sep 17 00:00:00 2001 From: Piotr Szul Date: Thu, 30 Apr 2026 14:38:03 +1000 Subject: [PATCH 3/5] Optimise repeat SQL generation with lateral column aliases For top-level repeats (not inside a lambda), emit each depth level as a lateral SELECT column alias instead of inlining the full expression into every list_cat argument. This avoids re-evaluating ancestor levels and makes DuckDB's deduplication explicit rather than relying on the optimizer. Falls back to inline unrolling inside lambda scope (forEach, nested repeat) where lateral aliases cannot reference lambda variables. Also removes the double null-guard: _col_collection no longer appends .ifnull2([]) when the value's outputType.nullHandled is already true, eliminating the redundant coalesce(...,[]).ifnull2([]) chain emitted by _repeat. Benchmarks show no measurable runtime difference on the QuestionnaireResponse repeat_view (ndjson ~9.6s, parquet ~97s), indicating DuckDB already CSEs the inline expressions. The structural improvement still stands for correctness and future query plan legibility. Co-Authored-By: Claude Sonnet 4.6 (1M context) --- src/ddb-sql-builder.js | 66 +++++++++++++++++++++++++++++++++++------ src/query-builder.js | 10 +++++-- templates/csv.sql | 2 +- templates/dbt_model.sql | 2 +- templates/explore.sql | 2 +- templates/ndjson.sql | 2 +- templates/parquet.sql | 2 +- tests/test-util.js | 2 +- 8 files changed, 70 insertions(+), 18 deletions(-) diff --git a/src/ddb-sql-builder.js b/src/ddb-sql-builder.js index 0e95b6f..0e74762 100644 --- a/src/ddb-sql-builder.js +++ b/src/ddb-sql-builder.js @@ -33,6 +33,13 @@ export function astToSql(node, inLambda, inputType={}, options) { if (querySegments[i] && querySegments[i].outputType && querySegments[i].outputType.isRepeat) { const prevSql = querySegments.slice(0, i).map(s => s && s.sql).filter(s => !!s).join("."); querySegments[i].sql = querySegments[i].sql.replace(/__INPUT__/g, prevSql); + // Substitute __INPUT__ in lateral defs and register them with the accumulator. + if (querySegments[i].lateralDefs) { + for (const def of querySegments[i].lateralDefs) { + def.sql = def.sql.replace(/__INPUT__/g, prevSql); + } + (_astToSqlOptions._lateralDefs ||= []).push(...querySegments[i].lateralDefs); + } querySegments.splice(0, i); break; } @@ -240,7 +247,7 @@ export function astToSql(node, inLambda, inputType={}, options) { //if array of non-array type then slice by default (should this be a setting?) if (colValue.segmentType == "nav" && colValueSql.outputType.isArray && node.name !== "_col_collection") { colValueSql.sql += ".as_value()" - } else if (node.name == "_col_collection") { + } else if (node.name == "_col_collection" && !colValueSql.outputType.nullHandled) { colValueSql.sql += ".ifnull2([])" } return {sql: `${colName}: ${colValueSql.sql}`, outputType: colValueSql.outputType}; @@ -332,14 +339,9 @@ export function astToSql(node, inLambda, inputType={}, options) { // Build the json_transform schema for `lv` from the column args. const jsonSchema = buildRepeatTransformSchema(repeatColArgs); - // Fixed-depth list_cat unrolling. Each level applies the recursive step - // once to the previous level's items, then list_cat merges every level - // into a single flat array. Vectorised: DuckDB processes all rows - // through each level in a single pipeline pass (vs. inline WITH RECURSIVE, - // which executes once per row as a correlated subquery and OOMs on - // large datasets). Trade-off: depth is bounded; nodes deeper than the - // configured depth are silently dropped. Override via - // `_astToSqlOptions._nestedRepeatDepth`. + // Fixed-depth list_cat unrolling. + // Trade-off: depth is bounded; nodes deeper than the configured + // depth are silently dropped. Override via `_nestedRepeatDepth`. const depth = _astToSqlOptions._nestedRepeatDepth || 5; const stepVar = `n${nestedDepth}`; const stepParts = repeatPaths.map(p => jsonNav(stepVar, p)); @@ -347,6 +349,52 @@ export function astToSql(node, inLambda, inputType={}, options) { ? stepParts[0] : `list_cat(${stepParts.join(', ')})`; + // Lateral alias optimisation: hoist level columns to the CTE SELECT + // so each level is computed once per row. Only valid when __INPUT__ + // is a bare column reference (not inside a lambda), because lateral + // aliases are evaluated at the top-level row context, not inside a + // list_transform callback. Inside a lambda (_inLambdaScope or a + // non-null lambdaVar) we must inline instead. + const canUseLateral = !_astToSqlOptions._inLambdaScope && outerLv === undefined; + + if (canUseLateral) { + const repeatId = ((_astToSqlOptions._nextRepeatId || 0) + 1); + _astToSqlOptions._nextRepeatId = repeatId; + const prefix = `_r${repeatId}`; + + const lateralDefs = []; + // Level 0: base cast — __INPUT__ is substituted by flattenSql. + const l0 = `${prefix}_l0`; + lateralDefs.push({name: l0, sql: `coalesce(CAST(__INPUT__ AS JSON[]), CAST([] AS JSON[]))`}); + // Levels 1..depth-1: each references the previous lateral alias. + let prevAlias = l0; + for (let d = 1; d < depth; d++) { + const ln = `${prefix}_l${d}`; + lateralDefs.push({name: ln, sql: `flatten(list_transform(${prevAlias}, ${stepVar} -> ${step}))`}); + prevAlias = ln; + } + // Collect all levels into one flat alias. + const allAlias = `${prefix}_all`; + lateralDefs.push({name: allAlias, sql: `list_cat(${lateralDefs.map(d => d.name).join(', ')})`}); + + sql = `coalesce(` + + `list_transform(` + + `list_transform(${allAlias}, ${lvJson} -> json_transform(${lvJson}, '${jsonSchema}')), ` + + `${lv} -> {${projection}}` + + `), [])`; + outputType = { + fhirType: inputType.fhirType || 'BackboneElement', + isArray: true, + isRepeat: true, + nullHandled: true + }; + return {sql, outputType, lateralDefs}; + } + + // Inline unrolling fallback (used when inside a lambda scope). + // Each level is inlined into the next, so the base expression is + // re-evaluated once per depth level. Necessary because lateral + // aliases cannot reference lambda-scope variables (e.g. `el`). let level = `coalesce(CAST(__INPUT__ AS JSON[]), CAST([] AS JSON[]))`; const levels = [level]; for (let d = 1; d < depth; d++) { diff --git a/src/query-builder.js b/src/query-builder.js index d93843b..e9a57f0 100644 --- a/src/query-builder.js +++ b/src/query-builder.js @@ -10,8 +10,9 @@ export function buildQuery(vd, schema, filterByResourceType, verbose, vars) { const fpAst = fhirpathToAst(parsedVd.path, vd.resource, schema, vars); const repeatEntryFields = collectRepeatEntryFields(vd); - const options = {_jsonScopeFields: new Set(repeatEntryFields)}; + const options = {_jsonScopeFields: new Set(repeatEntryFields), _lateralDefs: []}; const fpSql = astToSql(fpAst, false, {}, options).sql; + const lateralDefs = options._lateralDefs; const whereAsts = (vd.where||[]).map(w => w.path) .concat([filterByResourceType ? `resourceType = '${vd.resource}'` : null]) @@ -40,7 +41,7 @@ export function buildQuery(vd, schema, filterByResourceType, verbose, vars) { const schemaSql = pathsToSchema(schemaPaths) const outputSql = tablesToSql(parsedVd.tables); - return {pathSql: fpSql, schemaSql, outputSql, whereSql} + return {pathSql: fpSql, schemaSql, outputSql, whereSql, lateralDefs} } function collectRepeatEntryFields(vd) { @@ -69,6 +70,9 @@ export function templateToQuery(vd, schema, template, args=[], verbose, filterBy const queryParts = buildQuery(vd, schema, filterByResourceType, verbose, vars); const whereSql = queryParts.whereSql ? "WHERE " + queryParts.whereSql : ""; const schemaSql = queryParts.schemaSql ? `, columns=${queryParts.schemaSql}` : ""; + const lateralColsSql = queryParts.lateralDefs && queryParts.lateralDefs.length > 0 + ? queryParts.lateralDefs.map(d => `${d.sql} AS ${d.name}`).join(',\n\t\t') + ',\n\t\t' + : ''; // Concatenate base macros with custom macros const allMacros = customMacros ? macros + '\n' + customMacros : macros; @@ -77,7 +81,7 @@ export function templateToQuery(vd, schema, template, args=[], verbose, filterBy ["fq_input_dir", process.cwd()], ["fq_output_dir", process.cwd()], ["fq_where_filter", whereSql], - ["fq_sql_transform_expression", queryParts.pathSql], + ["fq_sql_transform_expression", lateralColsSql + queryParts.pathSql], ["fq_sql_input_schema", schemaSql], ["fq_sql_flattening_cols", queryParts.outputSql.fieldSql], ["fq_sql_flattening_tables", queryParts.outputSql.joinSql], diff --git a/templates/csv.sql b/templates/csv.sql index 7ba3fcc..e33dfe7 100644 --- a/templates/csv.sql +++ b/templates/csv.sql @@ -2,7 +2,7 @@ COPY ( WITH transformed AS ( - SELECT {{fq_sql_transform_expression}} AS result + SELECT {{fq_sql_transform_expression}} AS result FROM read_json_auto( '{{fq_input_dir}}/**/*{{fq_vd_resource}}*.ndjson' {{fq_sql_input_schema}} diff --git a/templates/dbt_model.sql b/templates/dbt_model.sql index e82ed51..7f0fffe 100644 --- a/templates/dbt_model.sql +++ b/templates/dbt_model.sql @@ -1,5 +1,5 @@ WITH transformed AS ( - SELECT {{fq_sql_transform_expression}} AS result + SELECT {{fq_sql_transform_expression}} AS result FROM {{ source('fhir_db', '{{fq_vd_resource}}') }} {{fq_where_filter}} ) diff --git a/templates/explore.sql b/templates/explore.sql index 9097991..514ab2c 100644 --- a/templates/explore.sql +++ b/templates/explore.sql @@ -1,7 +1,7 @@ {{fq_sql_macros}} WITH transformed AS ( - SELECT {{fq_sql_transform_expression}} AS result + SELECT {{fq_sql_transform_expression}} AS result FROM read_json_auto( '{{fq_input_dir}}/**/*{{fq_vd_resource}}*.ndjson' {{fq_sql_input_schema}} diff --git a/templates/ndjson.sql b/templates/ndjson.sql index 434e314..bd79fee 100644 --- a/templates/ndjson.sql +++ b/templates/ndjson.sql @@ -2,7 +2,7 @@ COPY ( WITH transformed AS ( - SELECT {{fq_sql_transform_expression}} AS result + SELECT {{fq_sql_transform_expression}} AS result FROM read_json_auto( '{{fq_input_dir}}/**/*{{fq_vd_resource}}*.ndjson' {{fq_sql_input_schema}} diff --git a/templates/parquet.sql b/templates/parquet.sql index 25f2ab5..fe6551e 100644 --- a/templates/parquet.sql +++ b/templates/parquet.sql @@ -2,7 +2,7 @@ COPY ( WITH transformed AS ( - SELECT {{fq_sql_transform_expression}} AS result + SELECT {{fq_sql_transform_expression}} AS result FROM read_json_auto( '{{fq_input_dir}}/**/*{{fq_vd_resource}}*.ndjson' {{fq_sql_input_schema}} diff --git a/tests/test-util.js b/tests/test-util.js index 7444ec5..c085220 100644 --- a/tests/test-util.js +++ b/tests/test-util.js @@ -3,7 +3,7 @@ import macros from "../templates/duck-macros"; export const testQueryTemplate = ` WITH transformed AS ( - SELECT {{fq_sql_transform_expression}} AS result + SELECT {{fq_sql_transform_expression}} AS result FROM read_json_auto( '{{test_file_path}}' {{fq_sql_input_schema}} From 3495214f7e9fed6e33abe2a9d0cae1df6b41c4cb Mon Sep 17 00:00:00 2001 From: Piotr Szul Date: Fri, 1 May 2026 19:48:34 +1000 Subject: [PATCH 4/5] Fix unionAll inside repeat and mixed repeat/non-repeat unionAll branches Three bugs in ddb-sql-builder.js: 1. buildRepeatTransformSchema didn't walk _unionAll branches, producing an empty JSON schema ({}) when a repeat's column arg contained a unionAll. 2. A standalone _forEach inside a _repeat lambda (e.g. a unionAll branch inside repeat) incorrectly tried to iterate via _ri.as_list().list_transform instead of projecting the current item as a plain struct. 3. nav with inputType.json=true (a JSON[] repeat entry field accessed outside a lambda) generated el.field instead of el->>'field', causing a DuckDB conversion error when a non-repeat unionAll branch navigated into that field. Co-Authored-By: Claude Sonnet 4.6 --- src/ddb-sql-builder.js | 18 ++- tests/spec-tests/repeat.json | 235 +++++++++++++++++++++++++++++++++++ 2 files changed, 252 insertions(+), 1 deletion(-) diff --git a/src/ddb-sql-builder.js b/src/ddb-sql-builder.js index 0e74762..020888e 100644 --- a/src/ddb-sql-builder.js +++ b/src/ddb-sql-builder.js @@ -107,7 +107,12 @@ export function astToSql(node, inLambda, inputType={}, options) { } outputType = {fhirType: node.type.fhirType, isArray: node.type.isArray, isNav: false} } else if (inputType.fhirType && inputType.isArray) { - sql = `list_transform(el -> el.${node.value})${node.type.isArray ? ".flatten()" : ""}`; + if (inputType.json) { + const op = node.type.isArray ? '->' : '->>'; + sql = `list_transform(el -> (el${op}'${node.value}'))${node.type.isArray ? ".flatten()" : ""}`; + } else { + sql = `list_transform(el -> el.${node.value})${node.type.isArray ? ".flatten()" : ""}`; + } outputType = {fhirType: node.type.fhirType, isArray: true, isNav: false} } else { sql = node.value; @@ -271,6 +276,13 @@ export function astToSql(node, inLambda, inputType={}, options) { _astToSqlOptions._inLambdaScope = savedScope; sql = `as_list().list_transform(el -> {${cols}})${orNullSql}`; outputType = {fhirType: inputType.fhirType, isArray: true}; + } else if (inLambda && _astToSqlOptions._lambdaVar) { + // Standalone _forEach inside a _repeat lambda (e.g., a unionAll branch + // inside repeat). The caller's lambda var (_ri) already binds the current + // item — just project its fields as a struct without iterating. + const cols = node.args.map(a => astToSql(a, inLambda, inputType)).map(flattenSql).map(a => a.sql).join(","); + sql = `{${cols}}`; + outputType = {fhirType: inputType.fhirType, isArray: false}; } else { const lv = _astToSqlOptions._lambdaVar || 'el'; const savedLv = _astToSqlOptions._lambdaVar; @@ -600,6 +612,8 @@ function buildRepeatTransformSchema(colArgs) { addPathToTree(node.args[1], tree); } else if (node.name === '_forEach' || node.name === '_forEachOrNull') { node.args.forEach(walk); + } else if (node.name === '_unionAll') { + node.args.forEach(branch => walk(branch)); } } } @@ -638,6 +652,8 @@ function buildRepeatTransformSchema(colArgs) { addPathIntoTree(node.args[1], subTree); } else if (node.name === '_forEach' || node.name === '_forEachOrNull') { node.args.forEach(a => walkInto(a, subTree)); + } else if (node.name === '_unionAll') { + node.args.forEach(branch => walkInto(branch, subTree)); } } } diff --git a/tests/spec-tests/repeat.json b/tests/spec-tests/repeat.json index 5b9c642..8fbf3f3 100644 --- a/tests/spec-tests/repeat.json +++ b/tests/spec-tests/repeat.json @@ -1030,6 +1030,241 @@ "innerLinkId": "1.1.1" } ] + }, + { + "title": "unionAll inside repeat", + "tags": ["shareable"], + "view": { + "resource": "Questionnaire", + "status": "active", + "select": [ + { + "column": [ + { + "name": "id", + "path": "id", + "type": "id" + } + ] + }, + { + "repeat": ["item"], + "unionAll": [ + { + "column": [ + { + "name": "kind", + "path": "'link'", + "type": "string" + }, + { + "name": "value", + "path": "linkId", + "type": "string" + } + ] + }, + { + "column": [ + { + "name": "kind", + "path": "'text'", + "type": "string" + }, + { + "name": "value", + "path": "text", + "type": "string" + } + ] + } + ] + } + ] + }, + "expect": [ + { "id": "q1", "kind": "link", "value": "g1" }, + { "id": "q1", "kind": "text", "value": "Group 1" }, + { "id": "q1", "kind": "link", "value": "g1.1" }, + { "id": "q1", "kind": "text", "value": "Question 1.1" }, + { "id": "q1", "kind": "link", "value": "g1.1.1" }, + { "id": "q1", "kind": "text", "value": "Sub-question 1.1.1" }, + { "id": "q1", "kind": "link", "value": "g2" }, + { "id": "q1", "kind": "text", "value": "Group 2" } + ] + }, + { + "title": "repeat inside repeat inside repeat", + "tags": ["shareable"], + "view": { + "resource": "Questionnaire", + "status": "active", + "select": [ + { + "column": [ + { + "name": "id", + "path": "id", + "type": "id" + } + ] + }, + { + "repeat": ["item"], + "select": [ + { + "column": [ + { + "name": "level1", + "path": "linkId", + "type": "string" + } + ] + }, + { + "repeat": ["item"], + "select": [ + { + "column": [ + { + "name": "level2", + "path": "linkId", + "type": "string" + } + ] + }, + { + "repeat": ["item"], + "column": [ + { + "name": "level3", + "path": "linkId", + "type": "string" + } + ] + } + ] + } + ] + } + ] + }, + "expect": [ + { + "id": "q1", + "level1": "g1", + "level2": "g1.1", + "level3": "g1.1.1" + } + ] + }, + { + "title": "multi-path repeat inside forEach", + "tags": ["shareable"], + "view": { + "resource": "QuestionnaireResponse", + "status": "active", + "select": [ + { + "column": [ + { + "name": "id", + "path": "id", + "type": "id" + } + ] + }, + { + "forEach": "item", + "select": [ + { + "column": [ + { + "name": "groupLinkId", + "path": "linkId", + "type": "string" + } + ] + }, + { + "repeat": ["item", "answer.item"], + "column": [ + { + "name": "linkId", + "path": "linkId", + "type": "string" + } + ] + } + ] + } + ] + }, + "expect": [ + { "id": "qr1", "groupLinkId": "1", "linkId": "1.1" }, + { "id": "qr1", "groupLinkId": "1", "linkId": "1.1.1" }, + { "id": "qr1", "groupLinkId": "1", "linkId": "1.2" }, + { "id": "qr1", "groupLinkId": "1", "linkId": "1.2.1" } + ] + }, + { + "title": "unionAll with repeat and non-repeat branches", + "tags": ["shareable"], + "view": { + "resource": "Questionnaire", + "status": "active", + "select": [ + { + "column": [ + { + "name": "id", + "path": "id", + "type": "id" + } + ] + }, + { + "unionAll": [ + { + "column": [ + { + "name": "kind", + "path": "'root'", + "type": "string" + }, + { + "name": "linkId", + "path": "item.linkId.first()", + "type": "string" + } + ] + }, + { + "repeat": ["item"], + "column": [ + { + "name": "kind", + "path": "'item'", + "type": "string" + }, + { + "name": "linkId", + "path": "linkId", + "type": "string" + } + ] + } + ] + } + ] + }, + "expect": [ + { "id": "q1", "kind": "root", "linkId": "g1" }, + { "id": "q1", "kind": "item", "linkId": "g1" }, + { "id": "q1", "kind": "item", "linkId": "g1.1" }, + { "id": "q1", "kind": "item", "linkId": "g1.1.1" }, + { "id": "q1", "kind": "item", "linkId": "g2" } + ] } ] } From 70460e43488faf239d7c8b068c846f05f31c821d Mon Sep 17 00:00:00 2001 From: Piotr Szul Date: Fri, 1 May 2026 20:13:00 +1000 Subject: [PATCH 5/5] Add --repeat-depth CLI option to configure repeat traversal depth Exposes the previously internal _nestedRepeatDepth option as a proper user-facing flag. Threads repeatDepth through templateToQuery and buildQuery into astToSql options, and documents it in the README. Co-Authored-By: Claude Sonnet 4.6 --- README.md | 1 + src/cli.js | 5 ++++- src/ddb-sql-builder.js | 5 ++--- src/query-builder.js | 7 ++++--- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 231184d..aee241a 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,7 @@ Additional steps if you would like to run scripts, unit tests or edit the projec | `--macros` | | | Experimental - Path to file(s) or directory(ies) containing additional SQL macros. Prefix with `@` to reference files in the templates directory. This argument may be repeated. See [details below](#macros---macros-parameter).| | `--param` | | | `name=value` pair of user defined variables to be used when generating SQL with a [custom template](#custom-templates). This argument may be repeated. | | `--var` | | | `name=value` pair of FHIRPath variables for use in ViewDefinition expressions (referenced as `%name`). This argument may be repeated. | +| `--repeat-depth` | | `5` | Maximum nesting depth for `repeat` traversal. Nodes deeper than this limit are silently dropped. Increase this value if your data has deeply nested recursive structures (e.g. `--repeat-depth 10`). | | `--verbose` | | false | Print debugging information to the console when running FlatQuack. | #### Modes (--mode parameter) diff --git a/src/cli.js b/src/cli.js index 7842796..fb6e0d8 100644 --- a/src/cli.js +++ b/src/cli.js @@ -29,6 +29,7 @@ Options: --macros Custom macro file or directory (can be repeated) --var Values for FHIRPath constants in ViewDefinition (can be repeated) --param Template parameters (can be used repeated) + --repeat-depth Maximum nesting depth for repeat traversal (default: 5) --verbose Enable verbose output --help Show this help message --version Show version information @@ -167,6 +168,7 @@ const args = parseArgs({ "mode": {type: "string", short: "m", default: "preview"}, "param": {type: "string", multiple: true}, "var": {type: "string", multiple: true}, + "repeat-depth": {type: "string"}, "help": {type: "boolean"}, "version": {type: "boolean"} } @@ -213,7 +215,8 @@ for (const file of glob.scanSync(args.values["view-path"],{onlyFiles:true})) { const outputPath = path.join(path.dirname(inputPath), basename + ".sql"); const view = JSON.parse(fs.readFileSync(inputPath)); - const query = templateToQuery(view, schema, template, params, args.values["verbose"], undefined, customMacros, vars); + const repeatDepth = args.values["repeat-depth"] != null ? parseInt(args.values["repeat-depth"], 10) : null; + const query = templateToQuery(view, schema, template, params, args.values["verbose"], undefined, customMacros, vars, repeatDepth); const formattedQuery = formatSQL(query); if (args.values["mode"] == "build") { diff --git a/src/ddb-sql-builder.js b/src/ddb-sql-builder.js index 020888e..7ec793c 100644 --- a/src/ddb-sql-builder.js +++ b/src/ddb-sql-builder.js @@ -351,9 +351,8 @@ export function astToSql(node, inLambda, inputType={}, options) { // Build the json_transform schema for `lv` from the column args. const jsonSchema = buildRepeatTransformSchema(repeatColArgs); - // Fixed-depth list_cat unrolling. - // Trade-off: depth is bounded; nodes deeper than the configured - // depth are silently dropped. Override via `_nestedRepeatDepth`. + // Fixed-depth list_cat unrolling. Nodes deeper than `depth` are + // silently dropped. Configurable via the --repeat-depth CLI flag. const depth = _astToSqlOptions._nestedRepeatDepth || 5; const stepVar = `n${nestedDepth}`; const stepParts = repeatPaths.map(p => jsonNav(stepVar, p)); diff --git a/src/query-builder.js b/src/query-builder.js index e9a57f0..9cb849e 100644 --- a/src/query-builder.js +++ b/src/query-builder.js @@ -3,7 +3,7 @@ import {astToSql, pathsToSchema, tablesToSql} from "./ddb-sql-builder.js" import {parseVd, extractPathsFromAst} from "./view-parser.js"; import macros from "../templates/duck-macros.js"; -export function buildQuery(vd, schema, filterByResourceType, verbose, vars) { +export function buildQuery(vd, schema, filterByResourceType, verbose, vars, repeatDepth) { const parsedVd = parseVd(vd); if (verbose) console.log(parsedVd.path) @@ -11,6 +11,7 @@ export function buildQuery(vd, schema, filterByResourceType, verbose, vars) { const repeatEntryFields = collectRepeatEntryFields(vd); const options = {_jsonScopeFields: new Set(repeatEntryFields), _lateralDefs: []}; + if (repeatDepth != null) options._nestedRepeatDepth = repeatDepth; const fpSql = astToSql(fpAst, false, {}, options).sql; const lateralDefs = options._lateralDefs; @@ -61,13 +62,13 @@ function collectRepeatEntryFields(vd) { } //TODO: consider replacing this with a full template language -export function templateToQuery(vd, schema, template, args=[], verbose, filterByResourceType, customMacros=null, vars=null) { +export function templateToQuery(vd, schema, template, args=[], verbose, filterByResourceType, customMacros=null, vars=null, repeatDepth=null) { //Setting filterByResourceType to true can only be used if the schema for the //elements being use is compatible between all of the resources being read //(e.g., element with the same names have the same structure). This is used //in some of the tests that mix resource types. - const queryParts = buildQuery(vd, schema, filterByResourceType, verbose, vars); + const queryParts = buildQuery(vd, schema, filterByResourceType, verbose, vars, repeatDepth); const whereSql = queryParts.whereSql ? "WHERE " + queryParts.whereSql : ""; const schemaSql = queryParts.schemaSql ? `, columns=${queryParts.schemaSql}` : ""; const lateralColsSql = queryParts.lateralDefs && queryParts.lateralDefs.length > 0