Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(parser): switch from &[u8] to &str based stream #351

Merged
merged 1 commit into from
May 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions crates/hcl-edit/src/parser/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,9 @@ impl Location {
self.offset
}
}

fn locate_error<'a>(err: &ParseError<Input<'a>, ContextError>) -> (&'a [u8], Location) {
fn locate_error<'a>(err: &'a ParseError<Input<'a>, ContextError>) -> (&'a [u8], Location) {
let offset = err.offset();
let input = err.input();
let input = err.input().as_bytes();
let remaining_input = &input[offset..];
let consumed_input = &input[..offset];

Expand Down
207 changes: 102 additions & 105 deletions crates/hcl-edit/src/parser/expr.rs

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions crates/hcl-edit/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ mod prelude {
pub(super) use winnow::stream::Stream;
pub(super) use winnow::{dispatch, PResult, Parser};

pub(super) type Input<'a> = winnow::stream::Located<&'a [u8]>;
pub(super) type Input<'a> = winnow::stream::Located<&'a str>;
}

use self::prelude::*;
Expand Down Expand Up @@ -67,7 +67,7 @@ fn parse_complete<'a, P, O>(input: &'a str, mut parser: P) -> Result<O, Error>
where
P: Parser<Input<'a>, O, ContextError>,
{
let input = Input::new(input.as_bytes());
let input = Input::new(input);

parser
.parse(input)
Expand Down
22 changes: 6 additions & 16 deletions crates/hcl-edit/src/parser/number.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
use super::prelude::*;

use super::string::from_utf8_unchecked;

use crate::Number;

use std::str::FromStr;
Expand All @@ -18,27 +16,19 @@ pub(super) fn number(input: &mut Input) -> PResult<Number> {
}

fn integer(input: &mut Input) -> PResult<u64> {
digit1
.try_map(|s: &[u8]| {
u64::from_str(unsafe { from_utf8_unchecked(s, "`digit1` filters out non-ascii") })
})
.parse_next(input)
digit1.try_map(|s: &str| u64::from_str(s)).parse_next(input)
}

fn float(input: &mut Input) -> PResult<f64> {
let fraction = preceded(b'.', digit1);
let fraction = preceded('.', digit1);

terminated(digit1, alt((terminated(fraction, opt(exponent)), exponent)))
.recognize()
.try_map(|s: &[u8]| {
f64::from_str(unsafe {
from_utf8_unchecked(s, "`digit1` and `exponent` filter out non-ascii")
})
})
.try_map(|s: &str| f64::from_str(s))
.parse_next(input)
}

fn exponent<'a>(input: &mut Input<'a>) -> PResult<&'a [u8]> {
fn exponent<'a>(input: &mut Input<'a>) -> PResult<&'a str> {
(
one_of(b"eE"),
opt(one_of(b"+-")),
Expand All @@ -63,7 +53,7 @@ mod tests {
];

for (input, expected) in tests {
let parsed = integer.parse(Input::new(input.as_bytes()));
let parsed = integer.parse(Input::new(input));
assert!(parsed.is_ok(), "expected `{input}` to parse correctly");
assert_eq!(parsed.unwrap(), *expected);
}
Expand All @@ -81,7 +71,7 @@ mod tests {
];

for (input, expected) in tests {
let parsed = float.parse(Input::new(input.as_bytes()));
let parsed = float.parse(Input::new(input));
assert!(parsed.is_ok(), "expected `{input}` to parse correctly");
assert_eq!(parsed.unwrap(), *expected);
}
Expand Down
74 changes: 21 additions & 53 deletions crates/hcl-edit/src/parser/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@ use super::trivia::void;

use crate::{Decorated, Ident, RawString};

use hcl_primitives::ident::{is_id_continue, is_id_start};
use std::borrow::Cow;
use winnow::combinator::{alt, cut_err, delimited, empty, fail, not, opt, preceded, repeat};
use winnow::stream::AsChar;
use winnow::token::{any, one_of, take, take_while};

pub(super) fn string(input: &mut Input) -> PResult<String> {
delimited(b'"', opt(build_string(quoted_string_fragment)), b'"')
delimited('"', opt(build_string(quoted_string_fragment)), '"')
.map(Option::unwrap_or_default)
.output_into()
.parse_next(input)
Expand Down Expand Up @@ -102,8 +102,8 @@ where
/// interpolation/directive start markers.
fn string_literal<'a>(input: &mut Input<'a>) -> PResult<&'a str> {
let literal_end = dispatch! {any;
b'\"' | b'\\' => empty.value(true),
b'$' | b'%' => b'{'.value(true),
'\"' | '\\' => empty.value(true),
'$' | '%' => '{'.value(true),
_ => fail,
};
any_until(literal_end).parse_next(input)
Expand All @@ -118,35 +118,34 @@ where
preceded(not(alt((escaped_marker.void(), literal_end.void()))), any),
))
.recognize()
.try_map(std::str::from_utf8)
}

/// Parse an escaped start marker for a template interpolation or directive.
fn escaped_marker(input: &mut Input) -> PResult<EscapedMarker> {
dispatch! {take::<_, Input, _>(3usize);
b"$${" => empty.value(EscapedMarker::Interpolation),
b"%%{" => empty.value(EscapedMarker::Directive),
"$${" => empty.value(EscapedMarker::Interpolation),
"%%{" => empty.value(EscapedMarker::Directive),
_ => fail,
}
.parse_next(input)
}

/// Parse an escaped character: `\n`, `\t`, `\r`, `\u00AC`, etc.
fn escaped_char(input: &mut Input) -> PResult<char> {
b'\\'.parse_next(input)?;
'\\'.parse_next(input)?;

dispatch! {any;
b'n' => empty.value('\n'),
b'r' => empty.value('\r'),
b't' => empty.value('\t'),
b'\\' => empty.value('\\'),
b'"' => empty.value('"'),
b'/' => empty.value('/'),
b'b' => empty.value('\u{08}'),
b'f' => empty.value('\u{0C}'),
b'u' => cut_err(hexescape::<4>)
'n' => empty.value('\n'),
'r' => empty.value('\r'),
't' => empty.value('\t'),
'\\' => empty.value('\\'),
'"' => empty.value('"'),
'/' => empty.value('/'),
'b' => empty.value('\u{08}'),
'f' => empty.value('\u{0C}'),
'u' => cut_err(hexescape::<4>)
.context(StrContext::Label("unicode 4-digit hex code")),
b'U' => cut_err(hexescape::<8>)
'U' => cut_err(hexescape::<8>)
.context(StrContext::Label("unicode 8-digit hex code")),
_ => cut_err(fail)
.context(StrContext::Label("escape sequence"))
Expand All @@ -165,14 +164,9 @@ fn escaped_char(input: &mut Input) -> PResult<char> {

fn hexescape<const N: usize>(input: &mut Input) -> PResult<char> {
let parse_hex =
take_while(1..=N, |c: u8| c.is_ascii_hexdigit()).verify(|hex: &[u8]| hex.len() == N);
take_while(1..=N, |c: char| c.is_ascii_hexdigit()).verify(|hex: &str| hex.len() == N);

let parse_u32 = parse_hex.try_map(|hex: &[u8]| {
u32::from_str_radix(
unsafe { from_utf8_unchecked(hex, "`is_ascii_hexdigit` filters out non-ascii") },
16,
)
});
let parse_u32 = parse_hex.try_map(|hex: &str| u32::from_str_radix(hex, 16));

parse_u32.verify_map(std::char::from_u32).parse_next(input)
}
Expand All @@ -193,19 +187,14 @@ pub(super) fn ident(input: &mut Input) -> PResult<Decorated<Ident>> {
pub(super) fn str_ident<'a>(input: &mut Input<'a>) -> PResult<&'a str> {
(one_of(is_id_start), take_while(0.., is_id_continue))
.recognize()
.map(|s: &[u8]| unsafe {
from_utf8_unchecked(s, "`is_id_start` and `is_id_continue` filter out non-utf8")
})
.parse_next(input)
}

pub(super) fn cut_char<'a>(c: char) -> impl Parser<Input<'a>, char, ContextError> {
cut_err(c)
.map(AsChar::as_char)
.context(StrContext::Expected(StrContextValue::CharLiteral(c)))
cut_err(c).context(StrContext::Expected(StrContextValue::CharLiteral(c)))
}

pub(super) fn cut_tag<'a>(tag: &'static str) -> impl Parser<Input<'a>, &'a [u8], ContextError> {
pub(super) fn cut_tag<'a>(tag: &'static str) -> impl Parser<Input<'a>, &'a str, ContextError> {
cut_err(tag).context(StrContext::Expected(StrContextValue::StringLiteral(tag)))
}

Expand All @@ -224,24 +213,3 @@ pub(super) fn cut_str_ident<'a>(input: &mut Input<'a>) -> PResult<&'a str> {
)))
.parse_next(input)
}

#[inline]
pub(super) fn is_id_start(b: u8) -> bool {
hcl_primitives::ident::is_id_start(b.as_char())
}

#[inline]
fn is_id_continue(b: u8) -> bool {
hcl_primitives::ident::is_id_continue(b.as_char())
}

pub(super) unsafe fn from_utf8_unchecked<'b>(
bytes: &'b [u8],
safety_justification: &'static str,
) -> &'b str {
if cfg!(debug_assertions) {
std::str::from_utf8(bytes).expect(safety_justification)
} else {
std::str::from_utf8_unchecked(bytes)
}
}
9 changes: 5 additions & 4 deletions crates/hcl-edit/src/parser/structure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@ use super::prelude::*;
use super::expr::expr;
use super::repr::{decorated, prefix_decorated, suffix_decorated};
use super::state::BodyParseState;
use super::string::{cut_char, cut_str_ident, ident, is_id_start, raw_string, string};
use super::string::{cut_char, cut_str_ident, ident, raw_string, string};
use super::trivia::{line_comment, sp, void, ws};

use crate::expr::Expression;
use crate::structure::{Attribute, Block, BlockLabel, Body, Structure};
use crate::{Decorate, Decorated, Ident, SetSpan};

use hcl_primitives::ident::is_id_start;
use std::cell::RefCell;
use winnow::ascii::line_ending;
use winnow::combinator::{
Expand Down Expand Up @@ -64,7 +65,7 @@ fn structure<'i, 's>(
let suffix = raw_string(sp).parse_next(input)?;

let mut structure = match peek(any).parse_next(input)? {
b'=' => {
'=' => {
if state.borrow_mut().is_redefined(ident) {
input.reset(&checkpoint);
return cut_err(fail)
Expand All @@ -82,7 +83,7 @@ fn structure<'i, 's>(
let attr = Attribute::new(ident, expr);
Structure::Attribute(attr)
}
b'{' => {
'{' => {
let body = block_body(input)?;
let mut ident = Decorated::new(Ident::new_unchecked(ident));
ident.decor_mut().set_suffix(suffix);
Expand All @@ -91,7 +92,7 @@ fn structure<'i, 's>(
block.body = body;
Structure::Block(block)
}
ch if ch == b'"' || is_id_start(ch) => {
ch if ch == '"' || is_id_start(ch) => {
let labels = block_labels(input)?;
let body = block_body(input)?;
let mut ident = Decorated::new(Ident::new_unchecked(ident));
Expand Down
38 changes: 16 additions & 22 deletions crates/hcl-edit/src/parser/template.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ use super::prelude::*;
use super::expr::expr;
use super::repr::{decorated, spanned};
use super::string::{
build_string, cut_char, cut_ident, cut_tag, from_utf8_unchecked, quoted_string_fragment,
raw_string, template_string_fragment,
build_string, cut_char, cut_ident, cut_tag, quoted_string_fragment, raw_string,
template_string_fragment,
};
use super::trivia::ws;

Expand All @@ -19,13 +19,13 @@ use winnow::ascii::{line_ending, space0};
use winnow::combinator::{alt, delimited, opt, preceded, repeat, separated_pair, terminated};

pub(super) fn string_template(input: &mut Input) -> PResult<StringTemplate> {
delimited(b'"', elements(build_string(quoted_string_fragment)), b'"')
delimited('"', elements(build_string(quoted_string_fragment)), '"')
.output_into()
.parse_next(input)
}

pub(super) fn template(input: &mut Input) -> PResult<Template> {
let literal_end = alt((b"${", b"%{"));
let literal_end = alt(("${", "%{"));
let literal = template_literal(literal_end);
elements(literal).output_into().parse_next(input)
}
Expand All @@ -45,15 +45,12 @@ pub(super) fn heredoc_template<'a>(
// Handling this case via parser combinators is quite tricky and thus we'll manually add
// the line ending to the last template element below.
let heredoc_end = (line_ending, space0, delim).recognize();
let literal_end = alt((b"${", b"%{", heredoc_end));
let literal_end = alt(("${", "%{", heredoc_end));
let literal = template_literal(literal_end);

// Use `opt` to handle an empty template.
opt((elements(literal), line_ending.with_span()).map(
|(mut elements, (line_ending, line_ending_span))| {
let line_ending = unsafe {
from_utf8_unchecked(line_ending, "`line_ending` filters out non-ascii")
};
// If there is a trailing literal, update its span and append the line ending to
// it. Otherwise just add a new literal containing only the line ending.
if let Some(Element::Literal(lit)) = elements.last_mut() {
Expand Down Expand Up @@ -97,7 +94,7 @@ where
}

fn interpolation(input: &mut Input) -> PResult<Interpolation> {
control(b"${", decorated(ws, expr, ws))
control("${", decorated(ws, expr, ws))
.map(|(expr, strip)| {
let mut interp = Interpolation::new(expr);
interp.strip = strip;
Expand All @@ -117,8 +114,8 @@ fn directive(input: &mut Input) -> PResult<Directive> {
fn if_directive(input: &mut Input) -> PResult<IfDirective> {
let if_expr = (
control(
b"%{",
(terminated(raw_string(ws), b"if"), decorated(ws, expr, ws)),
"%{",
(terminated(raw_string(ws), "if"), decorated(ws, expr, ws)),
),
spanned(template),
)
Expand All @@ -130,10 +127,7 @@ fn if_directive(input: &mut Input) -> PResult<IfDirective> {
});

let else_expr = (
control(
b"%{",
separated_pair(raw_string(ws), b"else", raw_string(ws)),
),
control("%{", separated_pair(raw_string(ws), "else", raw_string(ws))),
spanned(template),
)
.map(|(((preamble, trailing), strip), template)| {
Expand All @@ -145,7 +139,7 @@ fn if_directive(input: &mut Input) -> PResult<IfDirective> {
});

let endif_expr = control(
b"%{",
"%{",
separated_pair(raw_string(ws), cut_tag("endif"), raw_string(ws)),
)
.map(|((preamble, trailing), strip)| {
Expand All @@ -164,11 +158,11 @@ fn if_directive(input: &mut Input) -> PResult<IfDirective> {
fn for_directive(input: &mut Input) -> PResult<ForDirective> {
let for_expr = (
control(
b"%{",
"%{",
(
terminated(raw_string(ws), b"for"),
terminated(raw_string(ws), "for"),
decorated(ws, cut_ident, ws),
opt(preceded(b',', decorated(ws, cut_ident, ws))),
opt(preceded(',', decorated(ws, cut_ident, ws))),
preceded(cut_tag("in"), decorated(ws, expr, ws)),
),
),
Expand All @@ -189,7 +183,7 @@ fn for_directive(input: &mut Input) -> PResult<ForDirective> {
);

let endfor_expr = control(
b"%{",
"%{",
separated_pair(raw_string(ws), cut_tag("endfor"), raw_string(ws)),
)
.map(|((preamble, trailing), strip)| {
Expand All @@ -214,9 +208,9 @@ where
P: Parser<Input<'a>, O2, ContextError>,
{
(
preceded(intro, opt(b'~')),
preceded(intro, opt('~')),
inner,
terminated(opt(b'~'), cut_char('}')),
terminated(opt('~'), cut_char('}')),
)
.map(|(strip_start, output, strip_end)| {
(
Expand Down
Loading
Loading