Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 53 additions & 24 deletions src/sed/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ use std::rc::Rc;
use terminal_size::{Width, terminal_size};
use uucore::error::{UResult, USimpleError};

const DEFAULT_OUTPUT_WIDTH: usize = 60;
/// Default line-wrap width for the `l` command when no terminal, COLS, or
/// `-l` flag is available. Matches GNU sed's compiled-in default of 70.
const DEFAULT_OUTPUT_WIDTH: usize = 70;

// Handling required after processing a command
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
Expand Down Expand Up @@ -252,8 +254,10 @@ fn compile_sequence(

// According to POSIX: "If the first two characters in the script are
// "#n", the default output shall be suppressed".
// This only applies to the very first line of the first script source.
if !line.eol()
&& line.current() == '#'
&& lines.get_source_index() == 0
&& lines.get_line_number() == 1
&& line.get_pos() == 0
{
Expand Down Expand Up @@ -511,25 +515,33 @@ fn parse_number(
}

/// Parse the end of a command, failing with an error on extra characters.
/// Valid command terminators are: EOL, ';', '}', and '#' (start of comment).
fn parse_command_ending(
lines: &ScriptLineProvider,
line: &mut ScriptCharProvider,
cmd: &mut Command,
) -> UResult<()> {
if !line.eol() && line.current() == ';' {
line.advance();
line.eat_spaces();

if line.eol() {
return Ok(());
}

if !line.eol() {
return compilation_error(
match line.current() {
';' => {
line.advance();
Ok(())
}
'}' | '#' => {
// Don't consume — the caller (compile_sequence) handles these
Ok(())
}
_ => compilation_error(
lines,
line,
format!("extra characters at the end of the {} command", cmd.code),
);
),
}

Ok(())
}

/// Convert a primitive BRE pattern to a safe ERE-compatible pattern string.
Expand Down Expand Up @@ -710,7 +722,7 @@ pub fn compile_replacement(
if let Some(decoded) = parse_char_escape(line) {
literal.push(decoded);
} else {
literal.push('\\');
// Unknown escape: drop the backslash (GNU behavior)
literal.push(line.current());
line.advance();
}
Expand Down Expand Up @@ -931,6 +943,8 @@ pub fn compile_subst_flags(

';' | '\n' => break,

'}' | '#' => break, // closing brace and comment start are valid terminators

other => {
return compilation_error(
lines,
Expand Down Expand Up @@ -1068,7 +1082,16 @@ fn compile_label_command(
}

/// Return the width of the command's terminal or a default.
/// GNU sed checks COLS env var first, then terminal width, then defaults to 70.
/// COLS values <= 0 or non-numeric are ignored.
fn output_width() -> usize {
if let Ok(cols) = std::env::var("COLS")
&& let Ok(n) = cols.parse::<usize>()
&& n > 0
{
// Subtract 1 to avoid line wraps on terminals
return n - 1;
}
if let Some((Width(w), _)) = terminal_size() {
w as usize
} else {
Expand All @@ -1082,21 +1105,31 @@ fn compile_number_command(
lines: &mut ScriptLineProvider,
line: &mut ScriptCharProvider,
cmd: &mut Command,
_context: &mut ProcessingContext,
context: &mut ProcessingContext,
) -> UResult<CommandHandling> {
line.advance(); // Skip the command character
line.eat_spaces(); // Skip any leading whitespace

match parse_number(lines, line, false)? {
Some(n) => {
// 'l<N>' is a GNU extension, reject in POSIX mode
if cmd.code == 'l' && context.posix {
return compilation_error(lines, line, "extra characters after command");
}
cmd.data = CommandData::Number(n);
}
None => match cmd.code {
'q' | 'Q' => {
cmd.data = CommandData::Number(0);
}
'l' => {
cmd.data = CommandData::Number(output_width());
// Use -l flag value if set (non-zero), else auto-detect
let width = if context.length > 0 {
context.length
} else {
output_width()
};
cmd.data = CommandData::Number(width);
}
_ => panic!("invalid number-expecting command"),
},
Expand Down Expand Up @@ -1438,7 +1471,7 @@ mod tests {
let err = result.unwrap_err();
let msg = err.to_string();

assert!(msg.contains("test.sed:42:5: error: unexpected token"));
assert!(msg.contains("test.sed:42: unexpected token"));
}

#[test]
Expand All @@ -1455,7 +1488,7 @@ mod tests {
let err = result.unwrap_err();
let msg = err.to_string();

assert_eq!(msg, "input.txt:3:1: error: invalid command 'x'");
assert_eq!(msg, "input.txt:3: invalid command 'x'");
}

// get_verified_cmd_spec
Expand All @@ -1467,7 +1500,7 @@ mod tests {

assert!(result.is_err());
let msg = result.unwrap_err().to_string();
assert!(msg.contains("test.sed:1:1: error: command expected"));
assert!(msg.contains("test.sed:1: command expected"));
}

#[test]
Expand All @@ -1478,7 +1511,7 @@ mod tests {

assert!(result.is_err());
let msg = result.unwrap_err().to_string();
assert!(msg.contains("script.sed:2:1: error: invalid command code `@'"));
assert!(msg.contains("script.sed:2: invalid command code `@'"));
}

#[test]
Expand All @@ -1489,9 +1522,7 @@ mod tests {

assert!(result.is_err());
let msg = result.unwrap_err().to_string();
assert!(
msg.contains("input.sed:3:1: error: command q expects up to 1 address(es), found 2")
);
assert!(msg.contains("input.sed:3: command q expects up to 1 address(es), found 2"));
}

#[test]
Expand All @@ -1511,9 +1542,7 @@ mod tests {
let result = get_verified_cmd_spec(&lines, &line, 2, true);
assert!(result.is_err());
let msg = result.unwrap_err().to_string();
assert!(
msg.contains("input.sed:1:1: error: command i expects up to 1 address(es), found 2")
);
assert!(msg.contains("input.sed:1: command i expects up to 1 address(es), found 2"));
}

// parse_number
Expand Down Expand Up @@ -1892,7 +1921,7 @@ mod tests {

assert_eq!(cmd.location.line_number, 1);
assert_eq!(cmd.location.column_number, 1);
assert_eq!(cmd.location.input_name.as_ref(), "<script argument 1>");
assert_eq!(cmd.location.input_name.as_ref(), "-e expression #1");

assert!(cmd.next.is_none());
}
Expand All @@ -1909,14 +1938,14 @@ mod tests {
assert_eq!(cmd.code, 'l');
assert_eq!(cmd.location.line_number, 1);
assert_eq!(cmd.location.column_number, 1);
assert_eq!(cmd.location.input_name.as_ref(), "<script argument 1>");
assert_eq!(cmd.location.input_name.as_ref(), "-e expression #1");

let binding2 = cmd.next.clone().unwrap();
let cmd2 = binding2.borrow();
assert_eq!(cmd2.code, 'q');
assert_eq!(cmd2.location.line_number, 1);
assert_eq!(cmd2.location.column_number, 3);
assert_eq!(cmd2.location.input_name.as_ref(), "<script argument 1>");
assert_eq!(cmd2.location.input_name.as_ref(), "-e expression #1");

assert!(cmd2.next.is_none());
}
Expand Down
54 changes: 37 additions & 17 deletions src/sed/error_handling.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,36 +47,53 @@ impl ScriptLocation {

/// Fail with msg as a compile error at the provider location.
/// The error's exit code is 1 (compilation phase).
/// Format matches GNU sed: `-e expression #N, char C: message` for string
/// scripts, `file:line: message` for file scripts.
pub fn compilation_error<T>(
lines: &ScriptLineProvider,
line: &ScriptCharProvider,
msg: impl ToString,
) -> UResult<T> {
Err(USimpleError::new(
1,
let input_name = lines.get_input_name();
let message = if input_name.starts_with("-e expression") {
// GNU format: char position is 1-based within the current line
format!(
"{}:{}:{}: error: {}",
lines.get_input_name(),
lines.get_line_number(),
"{}, char {}: {}",
input_name,
line.get_pos() + 1,
msg.to_string()
),
))
)
} else {
format!(
"{}:{}: {}",
input_name,
lines.get_line_number(),
msg.to_string()
)
};
Err(USimpleError::new(1, message))
}

/// Fail with msg as a compilation error at the command's location.
/// The error's exit code is as specified.
/// Format matches GNU sed conventions.
fn location_error<T>(location: &ScriptLocation, msg: impl ToString, exit_code: i32) -> UResult<T> {
Err(USimpleError::new(
exit_code,
let message = if location.input_name.starts_with("-e expression") {
format!(
"{}:{}:{}: error: {}",
"{}, char {}: {}",
location.input_name,
location.line_number,
location.column_number,
msg.to_string()
),
))
)
} else {
format!(
"{}:{}: {}",
location.input_name,
location.line_number,
msg.to_string()
)
};
Err(USimpleError::new(exit_code, message))
}

/// Fail with msg as a compilation error at the command's location.
Expand All @@ -101,13 +118,16 @@ pub fn input_runtime_error<T>(
context: &ProcessingContext,
msg: impl ToString,
) -> UResult<T> {
let loc_str = if location.input_name.starts_with("-e expression") {
format!("{}, char {}", location.input_name, location.column_number)
} else {
format!("{}:{}", location.input_name, location.line_number)
};
Err(USimpleError::new(
2,
format!(
"{}:{}:{}: {}:{} error: {}",
location.input_name,
location.line_number,
location.column_number,
"{}: {}:{} error: {}",
loc_str,
context.input_name,
context.line_number,
msg.to_string()
Expand Down
8 changes: 5 additions & 3 deletions src/sed/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,9 @@ fn build_context(matches: &ArgMatches) -> ProcessingContext {
in_place_suffix: matches
.get_one::<String>("in-place")
.and_then(|s| if s.is_empty() { None } else { Some(s.clone()) }),
length: matches.get_one::<u32>("length").map_or(70, |v| *v as usize),
// 0 means "not explicitly set via -l"; the l command then
// falls back to COLS, terminal width, or DEFAULT_OUTPUT_WIDTH.
length: matches.get_one::<u32>("length").map_or(0, |v| *v as usize),
quiet: matches.get_flag("quiet"),
posix: matches.get_flag("posix"),
separate: matches.get_flag("separate"),
Expand Down Expand Up @@ -334,7 +336,7 @@ mod tests {
assert!(!ctx.follow_symlinks);
assert!(!ctx.in_place);
assert_eq!(ctx.in_place_suffix, None);
assert_eq!(ctx.length, 70);
assert_eq!(ctx.length, 0);
assert!(!ctx.quiet);
assert!(!ctx.posix);
assert!(!ctx.separate);
Expand Down Expand Up @@ -403,7 +405,7 @@ mod tests {
let ctx_default = build_context(&matches_default);
let ctx_custom = build_context(&matches_custom);

assert_eq!(ctx_default.length, 70);
assert_eq!(ctx_default.length, 0);
assert_eq!(ctx_custom.length, 120);
}
}
16 changes: 12 additions & 4 deletions src/sed/script_line_provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,14 @@ impl ScriptLineProvider {
}
}

/// Return the zero-based index of the currently processed script source.
pub fn get_source_index(&self) -> usize {
match &self.state {
State::Active { index, .. } => *index,
_ => 0,
}
}

/// Return the currently processed script descriptive name.
pub fn get_input_name(&self) -> &str {
match &self.state {
Expand Down Expand Up @@ -118,7 +126,7 @@ impl ScriptLineProvider {
self.state = State::Active {
index: next_index,
reader: Box::new(BufReader::new(cursor)),
input_name: format!("<script argument {}>", next_index + 1),
input_name: format!("-e expression #{}", next_index + 1),
line_number: 0,
};
}
Expand Down Expand Up @@ -268,23 +276,23 @@ mod tests {
if let Some(line) = provider.next_line().unwrap() {
assert_eq!(line.trim(), "l1");
assert_eq!(provider.get_line_number(), 1);
assert_eq!(provider.get_input_name(), "<script argument 1>");
assert_eq!(provider.get_input_name(), "-e expression #1");
} else {
panic!("Expected a line");
}

if let Some(line) = provider.next_line().unwrap() {
assert_eq!(line.trim(), "l2");
assert_eq!(provider.get_line_number(), 2);
assert_eq!(provider.get_input_name(), "<script argument 1>");
assert_eq!(provider.get_input_name(), "-e expression #1");
} else {
panic!("Expected a line");
}

if let Some(line) = provider.next_line().unwrap() {
assert_eq!(line.trim(), "l3");
assert_eq!(provider.get_line_number(), 1);
assert_eq!(provider.get_input_name(), "<script argument 2>");
assert_eq!(provider.get_input_name(), "-e expression #2");
} else {
panic!("Expected a line");
}
Expand Down
Loading
Loading