Skip to content

Commit 1727e7f

Browse files
committed
GenericDialect: support colon operator for JsonAccess
- Port JsonAccess colon operator from Snowflake to Generic dialect - This will be used in variant data type support in Datafusion - see discussion in datafusion-contrib/datafusion-variant#2
1 parent 326f111 commit 1727e7f

File tree

5 files changed

+143
-113
lines changed

5 files changed

+143
-113
lines changed

src/dialect/mod.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -759,6 +759,13 @@ pub trait Dialect: Debug + Any {
759759
Token::DoubleColon | Token::ExclamationMark | Token::LBracket | Token::CaretAt => {
760760
Ok(p!(DoubleColon))
761761
}
762+
Token::Colon => match parser.peek_nth_token(1).token {
763+
// When colon is followed by a string or a number, it's usually in MAP syntax.
764+
Token::SingleQuotedString(_) | Token::Number(_, _) => Ok(self.prec_unknown()),
765+
// In other cases, it's used in semi-structured data traversal like in variant or JSON
766+
// string columns. See `JsonAccess`.
767+
_ => Ok(p!(Pipe)),
768+
},
762769
Token::Arrow
763770
| Token::LongArrow
764771
| Token::HashArrow

src/dialect/mssql.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,15 @@ impl Dialect for MsSqlDialect {
148148
None
149149
}
150150
}
151+
152+
fn get_next_precedence(&self, parser: &Parser) -> Option<Result<u8, ParserError>> {
153+
let token = parser.peek_token();
154+
match token.token {
155+
// lowest prec to prevent it from turning into a binary op
156+
Token::Colon => Some(Ok(self.prec_unknown())),
157+
_ => None,
158+
}
159+
}
151160
}
152161

153162
impl MsSqlDialect {

src/dialect/postgresql.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,8 @@ impl Dialect for PostgreSqlDialect {
136136
| Token::ShiftRight
137137
| Token::ShiftLeft
138138
| Token::CustomBinaryOperator(_) => Some(Ok(PG_OTHER_PREC)),
139+
// lowest prec to prevent it from turning into a binary op
140+
Token::Colon => Some(Ok(self.prec_unknown())),
139141
_ => None,
140142
}
141143
}

tests/sqlparser_common.rs

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17905,3 +17905,126 @@ fn test_parse_set_session_authorization() {
1790517905
}))
1790617906
);
1790717907
}
17908+
17909+
// https://docs.snowflake.com/en/user-guide/querying-semistructured
17910+
#[test]
17911+
fn parse_semi_structured_data_traversal() {
17912+
let dialects = TestedDialects::new(vec![
17913+
Box::new(GenericDialect {}),
17914+
Box::new(SnowflakeDialect {}),
17915+
]);
17916+
17917+
// most basic case
17918+
let sql = "SELECT a:b FROM t";
17919+
let select = dialects.verified_only_select(sql);
17920+
assert_eq!(
17921+
SelectItem::UnnamedExpr(Expr::JsonAccess {
17922+
value: Box::new(Expr::Identifier(Ident::new("a"))),
17923+
path: JsonPath {
17924+
path: vec![JsonPathElem::Dot {
17925+
key: "b".to_owned(),
17926+
quoted: false
17927+
}]
17928+
},
17929+
}),
17930+
select.projection[0]
17931+
);
17932+
17933+
// identifier can be quoted
17934+
let sql = r#"SELECT a:"my long object key name" FROM t"#;
17935+
let select = dialects.verified_only_select(sql);
17936+
assert_eq!(
17937+
SelectItem::UnnamedExpr(Expr::JsonAccess {
17938+
value: Box::new(Expr::Identifier(Ident::new("a"))),
17939+
path: JsonPath {
17940+
path: vec![JsonPathElem::Dot {
17941+
key: "my long object key name".to_owned(),
17942+
quoted: true
17943+
}]
17944+
},
17945+
}),
17946+
select.projection[0]
17947+
);
17948+
17949+
dialects.verified_stmt("SELECT a:b::INT FROM t");
17950+
17951+
// unquoted keywords are permitted in the object key
17952+
let sql = "SELECT a:select, a:from FROM t";
17953+
let select = dialects.verified_only_select(sql);
17954+
assert_eq!(
17955+
vec![
17956+
SelectItem::UnnamedExpr(Expr::JsonAccess {
17957+
value: Box::new(Expr::Identifier(Ident::new("a"))),
17958+
path: JsonPath {
17959+
path: vec![JsonPathElem::Dot {
17960+
key: "select".to_owned(),
17961+
quoted: false
17962+
}]
17963+
},
17964+
}),
17965+
SelectItem::UnnamedExpr(Expr::JsonAccess {
17966+
value: Box::new(Expr::Identifier(Ident::new("a"))),
17967+
path: JsonPath {
17968+
path: vec![JsonPathElem::Dot {
17969+
key: "from".to_owned(),
17970+
quoted: false
17971+
}]
17972+
},
17973+
})
17974+
],
17975+
select.projection
17976+
);
17977+
17978+
// multiple levels can be traversed
17979+
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
17980+
let sql = r#"SELECT a:foo."bar".baz"#;
17981+
let select = dialects.verified_only_select(sql);
17982+
assert_eq!(
17983+
vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
17984+
value: Box::new(Expr::Identifier(Ident::new("a"))),
17985+
path: JsonPath {
17986+
path: vec![
17987+
JsonPathElem::Dot {
17988+
key: "foo".to_owned(),
17989+
quoted: false,
17990+
},
17991+
JsonPathElem::Dot {
17992+
key: "bar".to_owned(),
17993+
quoted: true,
17994+
},
17995+
JsonPathElem::Dot {
17996+
key: "baz".to_owned(),
17997+
quoted: false,
17998+
}
17999+
]
18000+
},
18001+
})],
18002+
select.projection
18003+
);
18004+
18005+
// dot and bracket notation can be mixed (starting with : case)
18006+
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
18007+
let sql = r#"SELECT a:foo[0].bar"#;
18008+
let select = dialects.verified_only_select(sql);
18009+
assert_eq!(
18010+
vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
18011+
value: Box::new(Expr::Identifier(Ident::new("a"))),
18012+
path: JsonPath {
18013+
path: vec![
18014+
JsonPathElem::Dot {
18015+
key: "foo".to_owned(),
18016+
quoted: false,
18017+
},
18018+
JsonPathElem::Bracket {
18019+
key: Expr::value(number("0")),
18020+
},
18021+
JsonPathElem::Dot {
18022+
key: "bar".to_owned(),
18023+
quoted: false,
18024+
}
18025+
]
18026+
},
18027+
})],
18028+
select.projection
18029+
);
18030+
}

tests/sqlparser_snowflake.rs

Lines changed: 2 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -1265,37 +1265,8 @@ fn parse_lateral_flatten() {
12651265
// https://docs.snowflake.com/en/user-guide/querying-semistructured
12661266
#[test]
12671267
fn parse_semi_structured_data_traversal() {
1268-
// most basic case
1269-
let sql = "SELECT a:b FROM t";
1270-
let select = snowflake().verified_only_select(sql);
1271-
assert_eq!(
1272-
SelectItem::UnnamedExpr(Expr::JsonAccess {
1273-
value: Box::new(Expr::Identifier(Ident::new("a"))),
1274-
path: JsonPath {
1275-
path: vec![JsonPathElem::Dot {
1276-
key: "b".to_owned(),
1277-
quoted: false
1278-
}]
1279-
},
1280-
}),
1281-
select.projection[0]
1282-
);
1283-
1284-
// identifier can be quoted
1285-
let sql = r#"SELECT a:"my long object key name" FROM t"#;
1286-
let select = snowflake().verified_only_select(sql);
1287-
assert_eq!(
1288-
SelectItem::UnnamedExpr(Expr::JsonAccess {
1289-
value: Box::new(Expr::Identifier(Ident::new("a"))),
1290-
path: JsonPath {
1291-
path: vec![JsonPathElem::Dot {
1292-
key: "my long object key name".to_owned(),
1293-
quoted: true
1294-
}]
1295-
},
1296-
}),
1297-
select.projection[0]
1298-
);
1268+
// see `tests/sqlparser_common.rs` -> `parse_semi_structured_data_traversal` for more test
1269+
// cases. This test only has Snowflake-specific syntax like array access.
12991270

13001271
// expressions are allowed in bracket notation
13011272
let sql = r#"SELECT a[2 + 2] FROM t"#;
@@ -1316,88 +1287,6 @@ fn parse_semi_structured_data_traversal() {
13161287
select.projection[0]
13171288
);
13181289

1319-
snowflake().verified_stmt("SELECT a:b::INT FROM t");
1320-
1321-
// unquoted keywords are permitted in the object key
1322-
let sql = "SELECT a:select, a:from FROM t";
1323-
let select = snowflake().verified_only_select(sql);
1324-
assert_eq!(
1325-
vec![
1326-
SelectItem::UnnamedExpr(Expr::JsonAccess {
1327-
value: Box::new(Expr::Identifier(Ident::new("a"))),
1328-
path: JsonPath {
1329-
path: vec![JsonPathElem::Dot {
1330-
key: "select".to_owned(),
1331-
quoted: false
1332-
}]
1333-
},
1334-
}),
1335-
SelectItem::UnnamedExpr(Expr::JsonAccess {
1336-
value: Box::new(Expr::Identifier(Ident::new("a"))),
1337-
path: JsonPath {
1338-
path: vec![JsonPathElem::Dot {
1339-
key: "from".to_owned(),
1340-
quoted: false
1341-
}]
1342-
},
1343-
})
1344-
],
1345-
select.projection
1346-
);
1347-
1348-
// multiple levels can be traversed
1349-
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
1350-
let sql = r#"SELECT a:foo."bar".baz"#;
1351-
let select = snowflake().verified_only_select(sql);
1352-
assert_eq!(
1353-
vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
1354-
value: Box::new(Expr::Identifier(Ident::new("a"))),
1355-
path: JsonPath {
1356-
path: vec![
1357-
JsonPathElem::Dot {
1358-
key: "foo".to_owned(),
1359-
quoted: false,
1360-
},
1361-
JsonPathElem::Dot {
1362-
key: "bar".to_owned(),
1363-
quoted: true,
1364-
},
1365-
JsonPathElem::Dot {
1366-
key: "baz".to_owned(),
1367-
quoted: false,
1368-
}
1369-
]
1370-
},
1371-
})],
1372-
select.projection
1373-
);
1374-
1375-
// dot and bracket notation can be mixed (starting with : case)
1376-
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
1377-
let sql = r#"SELECT a:foo[0].bar"#;
1378-
let select = snowflake().verified_only_select(sql);
1379-
assert_eq!(
1380-
vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
1381-
value: Box::new(Expr::Identifier(Ident::new("a"))),
1382-
path: JsonPath {
1383-
path: vec![
1384-
JsonPathElem::Dot {
1385-
key: "foo".to_owned(),
1386-
quoted: false,
1387-
},
1388-
JsonPathElem::Bracket {
1389-
key: Expr::value(number("0")),
1390-
},
1391-
JsonPathElem::Dot {
1392-
key: "bar".to_owned(),
1393-
quoted: false,
1394-
}
1395-
]
1396-
},
1397-
})],
1398-
select.projection
1399-
);
1400-
14011290
// dot and bracket notation can be mixed (starting with bracket case)
14021291
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
14031292
let sql = r#"SELECT a[0].foo.bar"#;

0 commit comments

Comments
 (0)