Skip to content

Commit

Permalink
fix(parser): calculate correct error column in presence of unicode (#353
Browse files Browse the repository at this point in the history
)
  • Loading branch information
martinohmann authored May 24, 2024
1 parent c975e96 commit e23f90c
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 10 deletions.
24 changes: 14 additions & 10 deletions crates/hcl-edit/src/parser/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,33 +115,37 @@ impl Location {
self.offset
}
}

fn locate_error<'a>(err: &'a ParseError<Input<'a>, ContextError>) -> (&'a [u8], Location) {
let offset = err.offset();
let input = err.input().as_bytes();
let remaining_input = &input[offset..];
let consumed_input = &input[..offset];
let offset = err.offset().min(input.len() - 1);
let column_offset = err.offset() - offset;

// Find the line that includes the subslice:
// Find the *last* newline before the remaining input starts
let line_begin = consumed_input
// Find the start of the line containing the error.
let line_begin = input[..offset]
.iter()
.rev()
.position(|&b| b == b'\n')
.map_or(0, |pos| offset - pos);

// Find the full line after that newline
// Use the full line containing the error as context for later printing.
let line_context = input[line_begin..]
.iter()
.position(|&b| b == b'\n')
.map_or(&input[line_begin..], |pos| {
&input[line_begin..line_begin + pos]
});

// Count the number of newlines in the first `offset` bytes of input
let line = consumed_input.iter().filter(|&&b| b == b'\n').count() + 1;
// Count the number of newlines in the input before the line containing the error to calculate
// the line number.
let line = input[..line_begin].iter().filter(|&&b| b == b'\n').count() + 1;

// The (1-indexed) column number is the offset of the remaining input into that line.
let column = remaining_input.offset_from(&line_context) + 1;
// This also takes multi-byte unicode characters into account.
let column = std::str::from_utf8(&input[line_begin..=offset])
.map(|s| s.chars().count())
.unwrap_or_else(|_| offset - line_begin + 1)
+ column_offset;

(
line_context,
Expand Down
11 changes: 11 additions & 0 deletions crates/hcl-edit/tests/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,4 +150,15 @@ fn invalid_exprs() {
|
= expected `)` or expression"#}
);

assert_error!(
"unicodé_ident = '4",
indoc! {r#"
--> HCL parse error in line 1, column 17
|
1 | unicodé_ident = '4
| ^---
|
= invalid expression; expected `"`, `[`, `{`, `-`, `!`, `(`, `_`, `<`, letter or digit"#}
);
}

0 comments on commit e23f90c

Please sign in to comment.