Skip to content

Commit ac19251

Browse files
committed
fix(printf): implement bash-compatible %q format (fixes #9638)
- Created PrintfQuoter with proper shell quoting algorithm - Empty strings: '' - Simple text: unchanged - Shell metacharacters: backslash escape - Control characters: $'...' ANSI-C quoting format - Fixed shell_quoter apostrophe handling bug - Added 18 comprehensive tests covering edge cases
1 parent 4a1b969 commit ac19251

File tree

6 files changed

+346
-11
lines changed

6 files changed

+346
-11
lines changed

.vscode/cspell.dictionaries/jargon.wordlist.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ lossily
8484
lstat
8585
mebi
8686
mebibytes
87+
metacharacters
8788
mergeable
8889
microbenchmark
8990
microbenchmarks

src/uucore/src/lib/features/format/spec.rs

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,7 @@ use super::{
1313
},
1414
parse_escape_only,
1515
};
16-
use crate::{
17-
format::FormatArguments,
18-
os_str_as_bytes,
19-
quoting_style::{QuotingStyle, locale_aware_escape_name},
20-
};
16+
use crate::{format::FormatArguments, os_str_as_bytes};
2117
use std::{io::Write, num::NonZero, ops::ControlFlow};
2218

2319
/// A parsed specification for formatting a value
@@ -403,10 +399,7 @@ impl Spec {
403399
writer.write_all(&parsed).map_err(FormatError::IoError)
404400
}
405401
Self::QuotedString { position } => {
406-
let s = locale_aware_escape_name(
407-
args.next_string(position),
408-
QuotingStyle::SHELL_ESCAPE,
409-
);
402+
let s = crate::quoting_style::printf_quote(args.next_string(position));
410403
let bytes = os_str_as_bytes(&s)?;
411404
writer.write_all(bytes).map_err(FormatError::IoError)
412405
}

src/uucore/src/lib/features/quoting_style/mod.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ pub use escaped_char::{EscapeState, EscapedChar};
1818

1919
mod c_quoter;
2020
mod literal_quoter;
21+
mod printf_quoter;
2122
mod shell_quoter;
2223

2324
/// The quoting style to use when escaping a name.
@@ -228,6 +229,18 @@ pub fn locale_aware_escape_dir_name(name: &OsStr, style: QuotingStyle) -> OsStri
228229
escape_dir_name(name, style, i18n::get_locale_encoding())
229230
}
230231

232+
/// Escape a string for printf %q format specifier (bash-compatible shell quoting).
233+
/// This uses a simpler algorithm than SHELL_ESCAPE:
234+
/// - Empty strings become ''
235+
/// - Simple alphanumeric strings are unchanged
236+
/// - Strings with shell metacharacters but no control chars use backslash escaping
237+
/// - Strings with control characters use $'...' ANSI-C quoting
238+
pub fn printf_quote(name: &OsStr) -> OsString {
239+
let name = crate::os_str_as_bytes_lossy(name);
240+
crate::os_string_from_vec(printf_quoter::PrintfQuoter::quote(&name))
241+
.expect("all byte sequences should be valid for platform")
242+
}
243+
231244
impl fmt::Display for QuotingStyle {
232245
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
233246
match *self {
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
// This file is part of the uutils coreutils package.
2+
//
3+
// For the full copyright and license information, please view the LICENSE
4+
// file that was distributed with this source code.
5+
6+
//! Quoter for `printf %q` format specifier.
7+
//!
8+
//! This implements bash-compatible shell quoting for the `%q` format in printf.
9+
//! Unlike the shell escape style used by `ls`, this uses a simpler algorithm:
10+
//! 1. For empty strings: output ''
11+
//! 2. For strings with only printable chars and shell-safe chars: no quotes
12+
//! 3. For strings with shell metacharacters but no control chars: backslash escape
13+
//! 4. For strings with control characters: use $'...' ANSI-C quoting
14+
15+
use super::Quoter;
16+
17+
/// Characters that need escaping in shell context
18+
/// Note: Tilde (~) is NOT escaped by bash printf %q
19+
const SHELL_META_CHARS: &str = " \t\n'\"\\`$&|;()<>[]{}*?!#";
20+
21+
/// Check if a byte is a control character
22+
#[inline]
23+
fn is_control(b: u8) -> bool {
24+
b < 0x20 || b == 0x7F
25+
}
26+
27+
/// Check if string contains any control characters
28+
fn has_control_chars(s: &[u8]) -> bool {
29+
s.iter().any(|&b| is_control(b))
30+
}
31+
32+
/// Check if string needs any quoting at all
33+
fn needs_quoting(s: &[u8]) -> bool {
34+
s.is_empty()
35+
|| s.iter()
36+
.any(|&b| SHELL_META_CHARS.as_bytes().contains(&b) || is_control(b))
37+
}
38+
39+
pub(super) struct PrintfQuoter;
40+
41+
impl PrintfQuoter {
42+
/// Quote a string for printf %q
43+
pub fn quote(input: &[u8]) -> Vec<u8> {
44+
// Empty string special case
45+
if input.is_empty() {
46+
return b"''".to_vec();
47+
}
48+
49+
// If no quoting needed, return as-is
50+
if !needs_quoting(input) {
51+
return input.to_vec();
52+
}
53+
54+
// If has control characters, use $'...' format
55+
if has_control_chars(input) {
56+
return Self::quote_ansi_c(input);
57+
}
58+
59+
// Otherwise use simple backslash escaping
60+
Self::quote_backslash(input)
61+
}
62+
63+
/// Quote using backslash escaping for printable special characters
64+
fn quote_backslash(input: &[u8]) -> Vec<u8> {
65+
let mut result = Vec::with_capacity(input.len() * 2);
66+
67+
for &b in input {
68+
if SHELL_META_CHARS.as_bytes().contains(&b) {
69+
result.push(b'\\');
70+
}
71+
result.push(b);
72+
}
73+
74+
result
75+
}
76+
77+
/// Quote using $'...' ANSI-C quoting
78+
fn quote_ansi_c(input: &[u8]) -> Vec<u8> {
79+
let mut result = Vec::with_capacity(input.len() * 2 + 3);
80+
result.extend(b"$'");
81+
82+
for &b in input {
83+
match b {
84+
b'\x07' => result.extend(b"\\a"),
85+
b'\x08' => result.extend(b"\\b"),
86+
b'\t' => result.extend(b"\\t"),
87+
b'\n' => result.extend(b"\\n"),
88+
b'\x0B' => result.extend(b"\\v"),
89+
b'\x0C' => result.extend(b"\\f"),
90+
b'\r' => result.extend(b"\\r"),
91+
b'\'' => result.extend(b"\\'"),
92+
b'\\' => result.extend(b"\\\\"),
93+
_ if is_control(b) => {
94+
// Use octal escape for other control characters
95+
result.push(b'\\');
96+
result.push(b'0' + (b >> 6));
97+
result.push(b'0' + ((b >> 3) & 7));
98+
result.push(b'0' + (b & 7));
99+
}
100+
_ => result.push(b),
101+
}
102+
}
103+
104+
result.push(b'\'');
105+
result
106+
}
107+
}
108+
109+
impl Quoter for PrintfQuoter {
110+
fn push_char(&mut self, _input: char) {
111+
// Not used - we process the entire string at once
112+
unimplemented!("PrintfQuoter processes entire strings, not character by character")
113+
}
114+
115+
fn push_invalid(&mut self, _input: &[u8]) {
116+
unimplemented!("PrintfQuoter processes entire strings, not character by character")
117+
}
118+
119+
fn finalize(self: Box<Self>) -> Vec<u8> {
120+
unimplemented!("PrintfQuoter processes entire strings, not character by character")
121+
}
122+
}
123+
124+
#[cfg(test)]
125+
mod tests {
126+
use super::*;
127+
128+
#[test]
129+
fn test_empty() {
130+
assert_eq!(PrintfQuoter::quote(b""), b"''");
131+
}
132+
133+
#[test]
134+
fn test_simple() {
135+
assert_eq!(PrintfQuoter::quote(b"a"), b"a");
136+
assert_eq!(PrintfQuoter::quote(b"abc"), b"abc");
137+
assert_eq!(PrintfQuoter::quote(b"abc123"), b"abc123");
138+
}
139+
140+
#[test]
141+
fn test_backslash_escape() {
142+
assert_eq!(PrintfQuoter::quote(b"a b"), b"a\\ b");
143+
assert_eq!(PrintfQuoter::quote(b"a'b"), b"a\\'b");
144+
assert_eq!(PrintfQuoter::quote(b"a\"b"), b"a\\\"b");
145+
assert_eq!(PrintfQuoter::quote(b"a$b"), b"a\\$b");
146+
assert_eq!(PrintfQuoter::quote(b"a`b"), b"a\\`b");
147+
assert_eq!(PrintfQuoter::quote(b"a&b"), b"a\\&b");
148+
}
149+
150+
#[test]
151+
fn test_ansi_c() {
152+
assert_eq!(PrintfQuoter::quote(b"\n"), b"$'\\n'");
153+
assert_eq!(PrintfQuoter::quote(b"\t"), b"$'\\t'");
154+
assert_eq!(PrintfQuoter::quote(b"\x01"), b"$'\\001'");
155+
assert_eq!(PrintfQuoter::quote(b"a\x01b"), b"$'a\\001b'");
156+
}
157+
158+
#[test]
159+
fn test_issue_9638() {
160+
// The actual failing test case from the issue
161+
assert_eq!(PrintfQuoter::quote(b"\x01'\x01"), b"$'\\001\\'\\001'");
162+
}
163+
164+
#[test]
165+
fn test_tilde_not_escaped() {
166+
// Tilde should NOT be escaped in bash printf %q
167+
assert_eq!(PrintfQuoter::quote(b"test~"), b"test~");
168+
assert_eq!(PrintfQuoter::quote(b"~/.bashrc"), b"~/.bashrc");
169+
}
170+
}

src/uucore/src/lib/features/quoting_style/shell_quoter.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,8 +150,8 @@ impl Quoter for EscapedShellQuoter<'_> {
150150
// if the string also contains double quotes. In that case, they
151151
// must be handled separately.
152152
EscapeState::Backslash('\'') => {
153+
self.exit_dollar();
153154
self.must_quote = true;
154-
self.in_dollar = false;
155155
self.buffer.extend(b"'\\''");
156156
}
157157
_ => {

0 commit comments

Comments
 (0)