Skip to content

Commit 7cc4dc4

Browse files
committed
update RIGHT and REGEXP_LIKE string functions
1 parent 6592285 commit 7cc4dc4

File tree

6 files changed

+81
-28
lines changed

6 files changed

+81
-28
lines changed

documentation/functions_string.md

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ SELECT SUBSTRING('abcdef' FROM 4) AS s;
124124

125125
### Function: LEFT
126126
**Description:**
127-
Leftmost characters.
127+
Returns the leftmost characters from a string.
128128

129129
**Inputs:**
130130
- `str` (`VARCHAR`) `,`|`FOR` `length` (`INT`)
@@ -140,7 +140,10 @@ SELECT LEFT('abcdef', 3) AS l;
140140

141141
### Function: RIGHT
142142
**Description:**
143-
Rightmost characters.
143+
Returns the rightmost characters from a string.
144+
If `length` exceeds the string size, the implementation returns the full string.
145+
If `length = 0`, an empty string is returned.
146+
If `length < 0`, a validation error is raised.
144147

145148
**Inputs:**
146149
- `str` (`VARCHAR`) `,`|`FOR` `length` (`INT`)
@@ -174,7 +177,7 @@ SELECT CONCAT(firstName, ' ', lastName) AS full FROM users;
174177

175178
### Function: REPLACE
176179
**Description:**
177-
Replace substring occurrences.
180+
Replaces all occurrences of a substring with another substring.
178181

179182
**Inputs:**
180183
- `str, search, replace`
@@ -188,26 +191,27 @@ SELECT REPLACE('Mr. John', 'Mr. ', '') AS r;
188191
-- Result: 'John'
189192
```
190193

191-
### Function: REPLACE
194+
### Function: REVERSE
192195
**Description:**
193-
Replace substring occurrences.
196+
Reverses the characters in a string.
194197

195198
**Inputs:**
196-
- `str, search, replace`
199+
- `str` (`VARCHAR`)
197200

198201
**Output:**
199202
- `VARCHAR`
200203

201204
**Example:**
202205
```sql
203-
SELECT REPLACE('Mr. John', 'Mr. ', '') AS r;
204-
-- Result: 'John'
206+
SELECT REVERSE('abcdef') AS r;
207+
-- Result: 'fedcba'
205208
```
206209

207210
### Function: POSITION / STRPOS
208211
**Description:**
209-
1-based index, 0 if not found.
210-
The first position of the `substr` in the `str`, starting at the optional `FROM` position (1-based).
212+
Returns the 1-based position of the first occurrence of a substring in a string.
213+
If the substring is not found, returns 0.
214+
An optional FROM position (1-based) can be provided to start the search.
211215

212216
**Inputs:**
213217
- `substr` `,` | `IN` `str` optional `,` | `FROM` `INT`
@@ -229,17 +233,28 @@ SELECT POSITION('z' IN 'Elasticsearch') AS pos;
229233

230234
### Function: REGEXP_LIKE / RLIKE
231235
**Description:**
232-
Regex match predicate.
236+
`REGEXP_LIKE(string, pattern [, match_param])`
237+
238+
Returns `TRUE` if the input string matches the regular expression `pattern`.
239+
By default, the match is case-sensitive.
233240

234241
**Inputs:**
235-
- `str, pattern`
242+
- `string`: The input string to test.
243+
- `pattern`: A regular expression pattern.
244+
- `match_param` *(optional)*: A string controlling the regex matching behavior.
245+
- `'i'`: Case-insensitive match.
246+
- `'c'`: Case-sensitive match (default).
247+
- `'m'`: Multi-line mode.
248+
- `'n'`: Allows the `.` to match newline characters.
236249

237250
**Output:**
238251
- `BOOLEAN`
239252

240-
**Example:**
253+
**Examples:**
241254
```sql
242-
SELECT REGEXP_LIKE(email, '.*@example\.com') AS ok FROM users;
255+
SELECT REGEXP_LIKE('Hello', 'HEL'); -- false
256+
SELECT REGEXP_LIKE('Hello', 'HEL', 'i'); -- true
257+
SELECT REGEXP_LIKE('abc\nxyz', '^xyz', 'm') -- true
243258
```
244259

245260
[Back to index](./README.md)

es6/sql-bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2481,7 +2481,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers {
24812481
| "r": {
24822482
| "script": {
24832483
| "lang": "painless",
2484-
| "source": "(def arg0 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (arg0 == null) ? null : 3 == 0 ? \"\" : 3 > arg0.length() ? null : arg0.substring(arg0.length() - 3))"
2484+
| "source": "(def arg0 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (arg0 == null) ? null : 3 == 0 ? \"\" : arg0.substring(arg0.length() - Math.min(3, arg0.length())))"
24852485
| }
24862486
| },
24872487
| "rep": {
@@ -2505,7 +2505,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers {
25052505
| "reg": {
25062506
| "script": {
25072507
| "lang": "painless",
2508-
| "source": "(def arg0 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (arg0 == null) ? null : arg0.matches(\"soft\"))"
2508+
| "source": "(def arg0 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (arg0 == null) ? null : java.util.regex.Pattern.compile(\"soft\", java.util.regex.Pattern.CASE_INSENSITIVE | java.util.regex.Pattern.MULTILINE).matcher(arg0).find())"
25092509
| }
25102510
| }
25112511
| },
@@ -2553,6 +2553,9 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers {
25532553
.replaceAll(":(\\d)", " : $1")
25542554
.replaceAll("new", "new ")
25552555
.replaceAll(""",\\"le""", """, \\"le""")
2556+
.replaceAll(":arg", " : arg")
2557+
.replaceAll(",java", ", java")
2558+
.replaceAll("\\|java", " | java")
25562559
}
25572560

25582561
it should "handle top hits aggregation" in {

sql/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2470,7 +2470,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers {
24702470
| "r": {
24712471
| "script": {
24722472
| "lang": "painless",
2473-
| "source": "(def arg0 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (arg0 == null) ? null : 3 == 0 ? \"\" : 3 > arg0.length() ? null : arg0.substring(arg0.length() - 3))"
2473+
| "source": "(def arg0 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (arg0 == null) ? null : 3 == 0 ? \"\" : arg0.substring(arg0.length() - Math.min(3, arg0.length())))"
24742474
| }
24752475
| },
24762476
| "rep": {
@@ -2494,7 +2494,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers {
24942494
| "reg": {
24952495
| "script": {
24962496
| "lang": "painless",
2497-
| "source": "(def arg0 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (arg0 == null) ? null : arg0.matches(\"soft\"))"
2497+
| "source": "(def arg0 = (!doc.containsKey('identifier2') || doc['identifier2'].empty ? null : doc['identifier2'].value); (arg0 == null) ? null : java.util.regex.Pattern.compile(\"soft\", java.util.regex.Pattern.CASE_INSENSITIVE | java.util.regex.Pattern.MULTILINE).matcher(arg0).find())"
24982498
| }
24992499
| }
25002500
| },
@@ -2542,6 +2542,9 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers {
25422542
.replaceAll(":(\\d)", " : $1")
25432543
.replaceAll("new", "new ")
25442544
.replaceAll(""",\\"le""", """, \\"le""")
2545+
.replaceAll(":arg", " : arg")
2546+
.replaceAll(",java", ", java")
2547+
.replaceAll("\\|java", " | java")
25452548
}
25462549

25472550
it should "handle top hits aggregation" in {

sql/src/main/scala/app/softnetwork/elastic/sql/function/string/package.scala

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,31 @@ package object string {
5454
override lazy val words: List[String] = List(sql, "STRPOS")
5555
override def painless: String = ".indexOf"
5656
}
57+
5758
case object RegexpLike extends Expr("REGEXP_LIKE") with StringOp {
5859
override lazy val words: List[String] = List(sql, "REGEXP")
5960
override def painless: String = ".matches"
6061
}
6162

63+
case class MatchFlags(flags: String) extends PainlessScript {
64+
override def sql: String = s"'$flags'"
65+
override def painless: String = flags.toCharArray
66+
.map {
67+
case 'i' => "java.util.regex.Pattern.CASE_INSENSITIVE"
68+
case 'c' => "0"
69+
case 'n' => "java.util.regex.Pattern.DOTALL"
70+
case 'm' => "java.util.regex.Pattern.MULTILINE"
71+
case _ => ""
72+
}
73+
.filter(_.nonEmpty)
74+
.mkString(" | ") match {
75+
case "" => "0"
76+
case s => s
77+
}
78+
79+
override def nullable: Boolean = false
80+
}
81+
6282
sealed trait StringFunction[Out <: SQLType]
6383
extends TransformFunction[SQLVarchar, Out]
6484
with FunctionWithIdentifier {
@@ -191,7 +211,7 @@ package object string {
191211
override def toPainlessCall(callArgs: List[String]): String = {
192212
callArgs match {
193213
case List(arg0, arg1) =>
194-
s"""$arg1 == 0 ? "" : $arg1 > $arg0.length() ? null : $arg0.substring($arg0.length() - $arg1)"""
214+
s"""$arg1 == 0 ? "" : $arg0.substring($arg0.length() - Math.min($arg1, $arg0.length()))"""
195215
case _ => throw new IllegalArgumentException("RIGHT requires 2 arguments")
196216
}
197217
}
@@ -274,20 +294,25 @@ package object string {
274294
override def toSQL(base: String): String = sql
275295
}
276296

277-
case class RegexpLike(str: PainlessScript, pattern: PainlessScript)
278-
extends StringFunction[SQLBool] {
297+
case class RegexpLike(
298+
str: PainlessScript,
299+
pattern: PainlessScript,
300+
matchFlags: Option[MatchFlags] = None
301+
) extends StringFunction[SQLBool] {
279302
override def outputType: SQLBool = SQLTypes.Boolean
280303

281304
override def stringOp: StringOp = RegexpLike
282305

283-
override def args: List[PainlessScript] = List(str, pattern)
306+
override def args: List[PainlessScript] = List(str, pattern) ++ matchFlags.toList
284307

285308
override def nullable: Boolean = str.nullable || pattern.nullable
286309

287310
override def toPainlessCall(callArgs: List[String]): String = {
288311
callArgs match {
289-
case List(arg0, arg1) => s"$arg0.matches($arg1)"
290-
case _ => throw new IllegalArgumentException("REGEXP_LIKE requires 2 arguments")
312+
case List(arg0, arg1) => s"java.util.regex.Pattern.compile($arg1).matcher($arg0).find()"
313+
case List(arg0, arg1, arg2) =>
314+
s"java.util.regex.Pattern.compile($arg1, $arg2).matcher($arg0).find()"
315+
case _ => throw new IllegalArgumentException("REGEXP_LIKE requires 2 or 3 arguments")
291316
}
292317
}
293318

sql/src/main/scala/app/softnetwork/elastic/sql/parser/function/string/package.scala

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,16 @@ package object string {
5252
}
5353

5454
def regexp: PackratParser[StringFunction[SQLBool]] =
55-
RegexpLike.regex ~ start ~ valueExpr ~ separator ~ valueExpr ~ end ^^ {
56-
case _ ~ _ ~ str ~ _ ~ pattern ~ _ =>
57-
RegexpLike(str, pattern)
55+
RegexpLike.regex ~ start ~ valueExpr ~ separator ~ valueExpr ~ (separator ~ literal).? ~ end ^^ {
56+
case _ ~ _ ~ str ~ _ ~ pattern ~ flags ~ _ =>
57+
RegexpLike(
58+
str,
59+
pattern,
60+
flags match {
61+
case Some(_ ~ f) => Some(MatchFlags(f.value))
62+
case _ => None
63+
}
64+
)
5865
}
5966

6067
def stringFunctionWithIdentifier: PackratParser[Identifier] =

sql/src/test/scala/app/softnetwork/elastic/sql/SQLParserSpec.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ object Queries {
163163
"SELECT identifier, (ABS(identifier) + 1.0) * 2, CEIL(identifier), FLOOR(identifier), SQRT(identifier), EXP(identifier), LOG(identifier), LOG10(identifier), POW(identifier, 3), ROUND(identifier), ROUND(identifier, 2), SIGN(identifier), COS(identifier), ACOS(identifier), SIN(identifier), ASIN(identifier), TAN(identifier), ATAN(identifier), ATAN2(identifier, 3.0) FROM Table WHERE SQRT(identifier) > 100.0"
164164

165165
val string: String =
166-
"SELECT identifier, LENGTH(identifier2) AS len, LOWER(identifier2) AS low, UPPER(identifier2) AS upp, SUBSTRING(identifier2, 1, 3) AS sub, TRIM(identifier2) AS tr, LTRIM(identifier2) AS ltr, RTRIM(identifier2) AS rtr, CONCAT(identifier2, '_test', 1) AS con, LEFT(identifier2, 5) AS l, RIGHT(identifier2, 3) AS r, REPLACE(identifier2, 'el', 'le') AS rep, REVERSE(identifier2) AS rev, POSITION('soft', identifier2, 1) AS pos, REGEXP_LIKE(identifier2, 'soft') AS reg FROM Table WHERE LENGTH(TRIM(identifier2)) > 10"
166+
"SELECT identifier, LENGTH(identifier2) AS len, LOWER(identifier2) AS low, UPPER(identifier2) AS upp, SUBSTRING(identifier2, 1, 3) AS sub, TRIM(identifier2) AS tr, LTRIM(identifier2) AS ltr, RTRIM(identifier2) AS rtr, CONCAT(identifier2, '_test', 1) AS con, LEFT(identifier2, 5) AS l, RIGHT(identifier2, 3) AS r, REPLACE(identifier2, 'el', 'le') AS rep, REVERSE(identifier2) AS rev, POSITION('soft', identifier2, 1) AS pos, REGEXP_LIKE(identifier2, 'soft', 'im') AS reg FROM Table WHERE LENGTH(TRIM(identifier2)) > 10"
167167

168168
val topHits: String =
169169
"SELECT department AS dept, firstName, CAST(hire_date AS DATE) AS hire_date, COUNT(DISTINCT salary) AS cnt, FIRST_VALUE(salary) OVER (PARTITION BY department ORDER BY hire_date ASC) AS first_salary, LAST_VALUE(salary) OVER (PARTITION BY department ORDER BY hire_date ASC) AS last_salary, ARRAY_AGG(name) OVER (PARTITION BY department ORDER BY hire_date ASC, salary DESC LIMIT 1000) AS employees FROM emp"

0 commit comments

Comments
 (0)