Skip to content

Commit a72fe5f

Browse files
committed
fix(printf): implement selective dollar-quoting for %q format
Fixes CI failure from PR uutils#9640 where GNU test suite update (commit ba3442f4f) exposed fundamental design flaws in printf %q shell-quoting implementation. Problem: Original code pre-scanned for control characters and wrapped ENTIRE strings in $'...' if ANY control char was present (e.g., "a\r" → $'a\r'). Solution: Implemented selective quoting that only wraps control characters themselves (e.g., "a\r" → a$'\r'), matching GNU coreutils behavior. Key Changes: - Removed has_control_chars() pre-scanning logic - Never start in dollar mode - enter/exit dynamically - Exit dollar mode when encountering regular chars (selective quoting) - Keep consecutive control chars in single dollar quote - Handle apostrophes by exiting dollar mode and using \' escape - Updated test expectations to match selective quoting behavior Examples: - 'a\tb' → a$'\t'b (not $'a\tb') - '\x01\x02\x03' → $'\001\002\003' (not $'\001'$'\002'$'\003') - 'hello\x01world' → hello$'\001'world (not $'hello\001world')
1 parent 084a4c2 commit a72fe5f

File tree

2 files changed

+43
-44
lines changed

2 files changed

+43
-44
lines changed

src/uucore/src/lib/features/quoting_style/shell_quoter.rs

Lines changed: 36 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -126,55 +126,49 @@ impl<'a> EscapedShellQuoter<'a> {
126126
let (quotes, must_quote) = initial_quoting(reference, dirname, always_quote);
127127

128128
// commit_dollar_mode controls quoting strategy:
129-
// true (printf %q): committed dollar mode - wrap entire string in $'...' when control chars present
130-
// false (ls): selective dollar mode - only wrap individual control chars in $'...'
129+
// true (printf %q): use selective dollar-quoting (same as ls)
130+
// false (ls): use selective dollar-quoting
131+
// Both modes use selective quoting: enter $'...' only for control chars
131132
let commit_dollar = commit_dollar_mode;
132133

133-
// Pre-scan for control chars or non-ASCII if we're in committed mode
134-
let has_control_chars = commit_dollar
135-
&& reference
136-
.iter()
137-
.any(|&b| b.is_ascii_control() || !b.is_ascii());
138-
139-
let mut buffer = Vec::with_capacity(size_hint);
140-
if has_control_chars {
141-
buffer.extend(b"$'");
142-
}
143-
144134
Self {
145135
reference,
146136
quotes,
147137
always_quote,
148138
commit_dollar,
149139
must_quote,
150-
in_dollar: has_control_chars,
140+
in_dollar: false, // Never start in dollar mode - enter it dynamically
151141
in_quote_section: false,
152-
buffer,
142+
buffer: Vec::with_capacity(size_hint),
153143
}
154144
}
155145

156146
fn enter_dollar(&mut self) {
157147
if !self.in_dollar {
158-
if self.buffer.is_empty() {
159-
// Starting with dollar quote - prepend empty quotes to indicate no prefix
160-
// GNU coreutils does this for strings that start with only invalid bytes
161-
self.buffer.extend(b"''$'");
162-
} else {
163-
// Had previous content
164-
if self.in_quote_section {
165-
// Close the existing quote section
166-
self.buffer.push(b'\'');
167-
self.in_quote_section = false;
148+
if self.in_quote_section {
149+
// Close any existing quote section first
150+
self.buffer.push(b'\'');
151+
self.in_quote_section = false;
152+
} else if !self.commit_dollar {
153+
// ls mode: handle buffer content before entering dollar mode
154+
if self.buffer.is_empty() {
155+
// Empty buffer: add empty quotes to match GNU (e.g., ''$'\360\237\230\201')
156+
self.buffer.extend(b"''");
168157
} else {
169-
// We have unquoted content - need to quote it first
170-
let mut temp = Vec::with_capacity(self.buffer.len() + 2);
171-
temp.push(b'\'');
172-
temp.extend(&self.buffer);
173-
temp.push(b'\'');
174-
self.buffer = temp;
158+
// Has content: wrap it in quotes
159+
let quote = if self.quotes == Quotes::Single {
160+
b'\''
161+
} else {
162+
b'"'
163+
};
164+
let mut quoted = Vec::with_capacity(self.buffer.len() + 2);
165+
quoted.push(quote);
166+
quoted.extend_from_slice(&self.buffer);
167+
quoted.push(quote);
168+
self.buffer = quoted;
175169
}
176-
self.buffer.extend(b"$'");
177170
}
171+
self.buffer.extend(b"$'");
178172
self.in_dollar = true;
179173
}
180174
}
@@ -196,18 +190,20 @@ impl Quoter for EscapedShellQuoter<'_> {
196190
// Single quotes need escaping - check BEFORE general Char(x)
197191
EscapeState::Backslash('\'') | EscapeState::Char('\'') => {
198192
if self.in_dollar {
199-
// Inside an existing $'...' section, escape as \'
193+
// Inside $'...' section - need to exit, then handle apostrophe
194+
self.exit_dollar();
200195
self.must_quote = true;
196+
// Backslash-escape the apostrophe
201197
self.buffer.extend(b"\\'");
202198
} else if self.commit_dollar {
203-
// printf %q mode (commit_dollar), not in dollar section:
204-
// Check if this is a standalone single quote (buffer still empty = first char)
199+
// printf %q mode, not in dollar section
200+
// Check if this is a standalone single quote
205201
if self.buffer.is_empty() && self.reference.len() == 1 {
206202
// Standalone single quote uses double quotes: "'"
207203
self.must_quote = true;
208204
self.buffer.extend(b"\"'\"");
209205
} else {
210-
// Embedded quote (like in "it's") - backslash escape
206+
// Embedded quote - backslash escape
211207
self.must_quote = true;
212208
self.buffer.extend(b"\\'");
213209
}
@@ -237,7 +233,8 @@ impl Quoter for EscapedShellQuoter<'_> {
237233
EscapeState::Char(x) => {
238234
if self.in_dollar {
239235
if self.commit_dollar {
240-
// In committed dollar mode (printf), regular chars are literal
236+
// In committed dollar mode (printf), exit and write regular char as-is
237+
self.exit_dollar();
241238
self.buffer.extend(x.to_string().as_bytes());
242239
} else {
243240
// In selective dollar mode (ls), exit dollar and start new quoted section
@@ -253,7 +250,7 @@ impl Quoter for EscapedShellQuoter<'_> {
253250
}
254251
} else {
255252
// Not in dollar mode - just add the character
256-
// Outer quoting will be handled in finalize if needed
253+
// Quoting will be handled by enter_dollar (when control chars appear) or finalize
257254
self.buffer.extend(x.to_string().as_bytes());
258255
}
259256
}
@@ -286,6 +283,7 @@ impl Quoter for EscapedShellQuoter<'_> {
286283
self.enter_dollar();
287284
self.must_quote = true;
288285
self.buffer.extend(escaped.collect::<String>().as_bytes());
286+
// Don't exit - let regular chars exit when needed for selective quoting
289287
}
290288
}
291289
}

tests/by-util/test_printf.rs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -186,10 +186,11 @@ fn sub_q_control_chars_basic() {
186186
#[test]
187187
fn sub_q_control_chars_with_apostrophe() {
188188
// Bug #9638: control char + apostrophe + control char
189+
// GNU outputs: $'\001'\'$'\001'
189190
new_ucmd!()
190191
.args(&["%q\n", "\x01'\x01"])
191192
.succeeds()
192-
.stdout_only("$'\\001\\'\\001'\n");
193+
.stdout_only("$'\\001'\\'$'\\001'\n");
193194
}
194195

195196
#[test]
@@ -257,31 +258,31 @@ fn sub_q_newline() {
257258
new_ucmd!()
258259
.args(&["%q\n", "a\nb"])
259260
.succeeds()
260-
.stdout_only("$'a\\nb'\n");
261+
.stdout_only("a$'\\n'b\n");
261262
}
262263

263264
#[test]
264265
fn sub_q_tab() {
265266
new_ucmd!()
266267
.args(&["%q\n", "a\tb"])
267268
.succeeds()
268-
.stdout_only("$'a\\tb'\n");
269+
.stdout_only("a$'\\t'b\n");
269270
}
270271

271272
#[test]
272273
fn sub_q_mixed_control_and_text() {
273274
new_ucmd!()
274275
.args(&["%q\n", "hello\x01world"])
275276
.succeeds()
276-
.stdout_only("$'hello\\001world'\n");
277+
.stdout_only("hello$'\\001'world\n");
277278
}
278279

279280
#[test]
280281
fn sub_q_apostrophe_in_dollar_quote() {
281282
new_ucmd!()
282283
.args(&["%q\n", "it's\na\nthing"])
283284
.succeeds()
284-
.stdout_only("$'it\\'s\\na\\nthing'\n");
285+
.stdout_only("it\\'s$'\\n'a$'\\n'thing\n");
285286
}
286287

287288
#[test]
@@ -329,7 +330,7 @@ fn sub_q_mixed_escapes() {
329330
new_ucmd!()
330331
.args(&["%q\n", "a b\nc"])
331332
.succeeds()
332-
.stdout_only("$'a b\\nc'\n");
333+
.stdout_only("a\\ b$'\\n'c\n");
333334
}
334335

335336
#[test]

0 commit comments

Comments
 (0)