Skip to content

Commit be0a0cf

Browse files
committed
refactor(content-type): rewrite parser to use lexmatch and handle quoted parameters
The previous implementation used manual string splitting and trimming, which was error-prone for quoted parameter values containing special characters like semicolons. The new implementation uses lexmatch for more robust tokenization and properly handles quoted values, allowing semicolons within quoted strings. Add test cases to verify quoted parameter parsing and invalid media type handling.
1 parent 79f19ee commit be0a0cf

1 file changed

Lines changed: 65 additions & 21 deletions

File tree

content_type.mbt

Lines changed: 65 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7,31 +7,53 @@ priv struct ContentType {
77

88
///|
99
fn parse_content_type(s : StringView) -> ContentType? {
10-
let parts = s.split(";").to_array()
11-
if parts.is_empty() {
12-
None
13-
} else {
14-
let main_part_str = parts[0].trim()
15-
let media_type_parts = main_part_str.split("/").to_array()
16-
if media_type_parts.length() != 2 {
17-
None
10+
fn dequote(value : StringView) -> StringView {
11+
if value is ['"', .. rest, '"'] {
12+
rest
1813
} else {
19-
let media_type = media_type_parts[0].trim()
20-
let subtype = media_type_parts[1].trim()
21-
let params = {}
22-
for i in 1..<parts.length() {
23-
let param_part = parts[i].trim()
24-
let eq_index = param_part.find("=")
25-
26-
if eq_index is Some(idx) {
27-
let key = param_part[0:idx].trim()
28-
let value = param_part[idx + 1:].trim()
29-
params[key] = value
30-
}
14+
value
15+
}
16+
}
17+
18+
let params = Map::new()
19+
let (media_type, subtype, rest) = lexmatch s with longest {
20+
(
21+
"[ \t]*"
22+
("[^ \t/;=]+" as media_type)
23+
"[ \t]*"
24+
"/"
25+
"[ \t]*"
26+
("[^ \t/;=]+" as subtype)
27+
"[ \t]*",
28+
rest
29+
) => (media_type, subtype, rest)
30+
_ => return None
31+
}
32+
33+
for curr = rest {
34+
lexmatch curr with longest {
35+
("[ \t]+", rest) => continue rest
36+
(
37+
";"
38+
"[ \t]*"
39+
("[^ \t=;]+" as key)
40+
"[ \t]*"
41+
"="
42+
"[ \t]*"
43+
("(\"[^\"]*\"|[^ \t;]*)" as value)
44+
"[ \t]*",
45+
rest
46+
) => {
47+
params.set(key, dequote(value))
48+
continue rest
3149
}
32-
Some({ media_type, subtype, params })
50+
(";" "[ \t]*", rest) => continue rest
51+
"" => break
52+
_ => break
3353
}
3454
}
55+
56+
Some({ media_type, subtype, params })
3557
}
3658

3759
///|
@@ -44,6 +66,28 @@ test "parse_content_type" {
4466
)
4567
}
4668

69+
///|
70+
test "parse_content_type_with_quoted_params" {
71+
inspect(
72+
parse_content_type(
73+
"multipart/form-data; boundary=\"foo;bar\"; charset = utf-8",
74+
),
75+
content=(
76+
#|Some({media_type: "multipart", subtype: "form-data", params: {"boundary": "foo;bar", "charset": "utf-8"}})
77+
),
78+
)
79+
}
80+
81+
///|
82+
test "parse_content_type_invalid" {
83+
inspect(
84+
parse_content_type("application"),
85+
content=(
86+
#|None
87+
),
88+
)
89+
}
90+
4791
///|
4892
test "parse_form_data" {
4993
inspect(

0 commit comments

Comments
 (0)