Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 40 additions & 10 deletions src/uu/ptx/src/ptx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
use regex::Regex;
use thiserror::Error;
use uucore::display::Quotable;
use uucore::error::{FromIo, UError, UResult, UUsageError};
use uucore::error::{FromIo, UError, UResult, USimpleError, UUsageError};
use uucore::format_usage;
use uucore::translate;

Expand All @@ -43,6 +43,7 @@
context_regex: String,
line_width: usize,
gap_size: usize,
sentence_regex: Option<String>,
}

impl Default for Config {
Expand All @@ -59,6 +60,7 @@
context_regex: "\\w+".to_owned(),
line_width: 72,
gap_size: 3,
sentence_regex: None,
}
}
}
Expand Down Expand Up @@ -197,9 +199,6 @@

#[derive(Debug, Error)]
enum PtxError {
#[error("{}", translate!("ptx-error-not-implemented", "feature" => (*.0)))]
NotImplemented(&'static str),

#[error("{0}")]
ParseError(ParseIntError),
}
Expand All @@ -214,8 +213,18 @@
config.format = OutFormat::Roff;
"[^ \t\n]+".clone_into(&mut config.context_regex);
}
if matches.contains_id(options::SENTENCE_REGEXP) {
return Err(PtxError::NotImplemented("-S").into());
if let Some(regex) = matches.get_one::<String>(options::SENTENCE_REGEXP) {
config.sentence_regex = Some(regex.clone());

// Verify regex is valid and doesn't match empty string
if let Ok(re) = Regex::new(regex) {
if re.is_match("") {
return Err(USimpleError::new(
1,
"A regular expression cannot match a length zero string",
));
}
}
}
config.auto_ref = matches.get_flag(options::AUTO_REFERENCE);
config.input_ref = matches.get_flag(options::REFERENCES);
Expand Down Expand Up @@ -271,17 +280,38 @@

type FileMap = HashMap<OsString, FileContent>;

fn read_input(input_files: &[OsString]) -> std::io::Result<FileMap> {
fn read_input(input_files: &[OsString], config: &Config) -> std::io::Result<FileMap> {
let mut file_map: FileMap = HashMap::new();
let mut offset: usize = 0;

let sentence_splitter =
if let Some(re_str) = &config.sentence_regex {
Some(Regex::new(re_str).map_err(|_| {
std::io::Error::new(std::io::ErrorKind::InvalidInput, "Invalid regex")
})?)
} else {
None
};

for filename in input_files {
let reader: BufReader<Box<dyn Read>> = BufReader::new(if filename == "-" {
let mut reader: BufReader<Box<dyn Read>> = BufReader::new(if filename == "-" {
Box::new(stdin())
} else {
let file = File::open(Path::new(filename))?;
Box::new(file)
});
let lines: Vec<String> = reader.lines().collect::<std::io::Result<Vec<String>>>()?;

let lines = if let Some(re) = &sentence_splitter {
let mut buffer = String::new();
reader.read_to_string(&mut buffer)?;

re.split(&buffer)
.map(|s| s.replace("\n", " ")) // ptx behavior: newlines become spaces inside sentences

Check failure on line 309 in src/uu/ptx/src/ptx.rs

View workflow job for this annotation

GitHub Actions / Style and Lint (ubuntu-24.04, unix)

ERROR: `cargo clippy`: single-character string constant used as pattern (file:'src/uu/ptx/src/ptx.rs', line:309)

Check failure on line 309 in src/uu/ptx/src/ptx.rs

View workflow job for this annotation

GitHub Actions / Style and Lint (unix)

ERROR: `cargo clippy`: single-character string constant used as pattern (file:'src/uu/ptx/src/ptx.rs', line:309)
.filter(|s| !s.is_empty()) // remove empty sentences
.collect()
} else {
reader.lines().collect::<std::io::Result<Vec<String>>>()?
};

// Indexing UTF-8 string requires walking from the beginning, which can hurts performance badly when the line is long.
// Since we will be jumping around the line a lot, we dump the content into a Vec<char>, which can be indexed in constant time.
Expand Down Expand Up @@ -877,7 +907,7 @@
}

let word_filter = WordFilter::new(&matches, &config)?;
let file_map = read_input(&input_files).map_err_context(String::new)?;
let file_map = read_input(&input_files, &config).map_err_context(String::new)?;
let word_set = create_word_set(&config, &word_filter, &file_map);
write_traditional_output(&mut config, &file_map, &word_set, &output_file)
}
Expand Down
38 changes: 38 additions & 0 deletions tests/by-util/test_ptx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,44 @@ fn test_utf8() {
.stdout_only("\\xx {}{it’s}{disabled}{}{}\n\\xx {}{}{it’s}{ disabled}{}\n");
}

#[test]
fn test_sentence_regexp_basic() {
new_ucmd!()
.args(&["-G", "-S", "\\."])
.pipe_in("Hello. World.")
.succeeds()
.stdout_contains("Hello")
.stdout_contains("World");
}

#[test]
fn test_sentence_regexp_split_behavior() {
new_ucmd!()
.args(&["-G", "-w", "50", "-S", "[.!]"])
.pipe_in("One sentence. Two sentence!")
.succeeds()
.stdout_contains("One sentence")
.stdout_contains("Two sentence");
}

#[test]
fn test_sentence_regexp_empty_match_failure() {
new_ucmd!()
.args(&["-G", "-S", "^"])
.pipe_in("Input")
.fails()
.stderr_contains("A regular expression cannot match a length zero string");
}

#[test]
fn test_sentence_regexp_newlines_are_spaces() {
new_ucmd!()
.args(&["-G", "-S", "\\."])
.pipe_in("Start of\nsentence.")
.succeeds()
.stdout_contains("Start of sentence");
}

#[test]
fn test_gnu_mode_dumb_format() {
// Test GNU mode (dumb format) - the default mode without -G flag
Expand Down
Loading