|
| 1 | +// This file is part of the uutils coreutils package. |
| 2 | +// |
| 3 | +// For the full copyright and license information, please view the LICENSE |
| 4 | +// file that was distributed with this source code. |
| 5 | + |
| 6 | +//! Quoter for `printf %q` format specifier. |
| 7 | +//! |
| 8 | +//! This implements bash-compatible shell quoting for the `%q` format in printf. |
| 9 | +//! Unlike the shell escape style used by `ls`, this uses a simpler algorithm: |
| 10 | +//! 1. For empty strings: output '' |
| 11 | +//! 2. For strings with only printable chars and shell-safe chars: no quotes |
| 12 | +//! 3. For strings with shell metacharacters but no control chars: backslash escape |
| 13 | +//! 4. For strings with control characters: use $'...' ANSI-C quoting |
| 14 | +
|
| 15 | +use super::Quoter; |
| 16 | + |
| 17 | +/// Characters that need escaping in shell context |
| 18 | +/// Note: Tilde (~) is NOT escaped by bash printf %q |
| 19 | +const SHELL_META_CHARS: &str = " \t\n'\"\\`$&|;()<>[]{}*?!#"; |
| 20 | + |
| 21 | +/// Check if a byte is a control character |
| 22 | +#[inline] |
| 23 | +fn is_control(b: u8) -> bool { |
| 24 | + b < 0x20 || b == 0x7F |
| 25 | +} |
| 26 | + |
| 27 | +/// Check if string contains any control characters |
| 28 | +fn has_control_chars(s: &[u8]) -> bool { |
| 29 | + s.iter().any(|&b| is_control(b)) |
| 30 | +} |
| 31 | + |
| 32 | +/// Check if string needs any quoting at all |
| 33 | +fn needs_quoting(s: &[u8]) -> bool { |
| 34 | + s.is_empty() |
| 35 | + || s.iter() |
| 36 | + .any(|&b| SHELL_META_CHARS.as_bytes().contains(&b) || is_control(b)) |
| 37 | +} |
| 38 | + |
| 39 | +pub(super) struct PrintfQuoter; |
| 40 | + |
| 41 | +impl PrintfQuoter { |
| 42 | + /// Quote a string for printf %q |
| 43 | + pub fn quote(input: &[u8]) -> Vec<u8> { |
| 44 | + // Empty string special case |
| 45 | + if input.is_empty() { |
| 46 | + return b"''".to_vec(); |
| 47 | + } |
| 48 | + |
| 49 | + // If no quoting needed, return as-is |
| 50 | + if !needs_quoting(input) { |
| 51 | + return input.to_vec(); |
| 52 | + } |
| 53 | + |
| 54 | + // If has control characters, use $'...' format |
| 55 | + if has_control_chars(input) { |
| 56 | + return Self::quote_ansi_c(input); |
| 57 | + } |
| 58 | + |
| 59 | + // Otherwise use simple backslash escaping |
| 60 | + Self::quote_backslash(input) |
| 61 | + } |
| 62 | + |
| 63 | + /// Quote using backslash escaping for printable special characters |
| 64 | + fn quote_backslash(input: &[u8]) -> Vec<u8> { |
| 65 | + let mut result = Vec::with_capacity(input.len() * 2); |
| 66 | + |
| 67 | + for &b in input { |
| 68 | + if SHELL_META_CHARS.as_bytes().contains(&b) { |
| 69 | + result.push(b'\\'); |
| 70 | + } |
| 71 | + result.push(b); |
| 72 | + } |
| 73 | + |
| 74 | + result |
| 75 | + } |
| 76 | + |
| 77 | + /// Quote using $'...' ANSI-C quoting |
| 78 | + fn quote_ansi_c(input: &[u8]) -> Vec<u8> { |
| 79 | + let mut result = Vec::with_capacity(input.len() * 2 + 3); |
| 80 | + result.extend(b"$'"); |
| 81 | + |
| 82 | + for &b in input { |
| 83 | + match b { |
| 84 | + b'\x07' => result.extend(b"\\a"), |
| 85 | + b'\x08' => result.extend(b"\\b"), |
| 86 | + b'\t' => result.extend(b"\\t"), |
| 87 | + b'\n' => result.extend(b"\\n"), |
| 88 | + b'\x0B' => result.extend(b"\\v"), |
| 89 | + b'\x0C' => result.extend(b"\\f"), |
| 90 | + b'\r' => result.extend(b"\\r"), |
| 91 | + b'\'' => result.extend(b"\\'"), |
| 92 | + b'\\' => result.extend(b"\\\\"), |
| 93 | + _ if is_control(b) => { |
| 94 | + // Use octal escape for other control characters |
| 95 | + result.push(b'\\'); |
| 96 | + result.push(b'0' + (b >> 6)); |
| 97 | + result.push(b'0' + ((b >> 3) & 7)); |
| 98 | + result.push(b'0' + (b & 7)); |
| 99 | + } |
| 100 | + _ => result.push(b), |
| 101 | + } |
| 102 | + } |
| 103 | + |
| 104 | + result.push(b'\''); |
| 105 | + result |
| 106 | + } |
| 107 | +} |
| 108 | + |
| 109 | +impl Quoter for PrintfQuoter { |
| 110 | + fn push_char(&mut self, _input: char) { |
| 111 | + // Not used - we process the entire string at once |
| 112 | + unimplemented!("PrintfQuoter processes entire strings, not character by character") |
| 113 | + } |
| 114 | + |
| 115 | + fn push_invalid(&mut self, _input: &[u8]) { |
| 116 | + unimplemented!("PrintfQuoter processes entire strings, not character by character") |
| 117 | + } |
| 118 | + |
| 119 | + fn finalize(self: Box<Self>) -> Vec<u8> { |
| 120 | + unimplemented!("PrintfQuoter processes entire strings, not character by character") |
| 121 | + } |
| 122 | +} |
| 123 | + |
| 124 | +#[cfg(test)] |
| 125 | +mod tests { |
| 126 | + use super::*; |
| 127 | + |
| 128 | + #[test] |
| 129 | + fn test_empty() { |
| 130 | + assert_eq!(PrintfQuoter::quote(b""), b"''"); |
| 131 | + } |
| 132 | + |
| 133 | + #[test] |
| 134 | + fn test_simple() { |
| 135 | + assert_eq!(PrintfQuoter::quote(b"a"), b"a"); |
| 136 | + assert_eq!(PrintfQuoter::quote(b"abc"), b"abc"); |
| 137 | + assert_eq!(PrintfQuoter::quote(b"abc123"), b"abc123"); |
| 138 | + } |
| 139 | + |
| 140 | + #[test] |
| 141 | + fn test_backslash_escape() { |
| 142 | + assert_eq!(PrintfQuoter::quote(b"a b"), b"a\\ b"); |
| 143 | + assert_eq!(PrintfQuoter::quote(b"a'b"), b"a\\'b"); |
| 144 | + assert_eq!(PrintfQuoter::quote(b"a\"b"), b"a\\\"b"); |
| 145 | + assert_eq!(PrintfQuoter::quote(b"a$b"), b"a\\$b"); |
| 146 | + assert_eq!(PrintfQuoter::quote(b"a`b"), b"a\\`b"); |
| 147 | + assert_eq!(PrintfQuoter::quote(b"a&b"), b"a\\&b"); |
| 148 | + } |
| 149 | + |
| 150 | + #[test] |
| 151 | + fn test_ansi_c() { |
| 152 | + assert_eq!(PrintfQuoter::quote(b"\n"), b"$'\\n'"); |
| 153 | + assert_eq!(PrintfQuoter::quote(b"\t"), b"$'\\t'"); |
| 154 | + assert_eq!(PrintfQuoter::quote(b"\x01"), b"$'\\001'"); |
| 155 | + assert_eq!(PrintfQuoter::quote(b"a\x01b"), b"$'a\\001b'"); |
| 156 | + } |
| 157 | + |
| 158 | + #[test] |
| 159 | + fn test_issue_9638() { |
| 160 | + // The actual failing test case from the issue |
| 161 | + assert_eq!(PrintfQuoter::quote(b"\x01'\x01"), b"$'\\001\\'\\001'"); |
| 162 | + } |
| 163 | + |
| 164 | + #[test] |
| 165 | + fn test_tilde_not_escaped() { |
| 166 | + // Tilde should NOT be escaped in bash printf %q |
| 167 | + assert_eq!(PrintfQuoter::quote(b"test~"), b"test~"); |
| 168 | + assert_eq!(PrintfQuoter::quote(b"~/.bashrc"), b"~/.bashrc"); |
| 169 | + } |
| 170 | +} |
0 commit comments