Skip to content

Commit

Permalink
Significant refactoring of grammar (#45)
Browse files Browse the repository at this point in the history
This is a large batch of changes that significantly improve output:

- Fix string and backtick parsing
- Add support for escape sequences
- Simplify recipe matching
- Fix single varidic parameter handling
- Simplify shebang parsing
- Add recipe line prefixes
- Fix allowing leading `/` for paths
- Add more labels everywhere
- Update highlighting where needed
- Add a lot more highlighting tests
- Add script to convert from tree-sitter flavor bindings to nvim
- Verify no tests include parsed errors
  • Loading branch information
tgross35 authored Jan 4, 2024
1 parent 4a34816 commit ba36c85
Show file tree
Hide file tree
Showing 18 changed files with 8,222 additions and 5,119 deletions.
25 changes: 21 additions & 4 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ on:
pull_request:
push:


jobs:
deno:
name: Deno format and lint
Expand Down Expand Up @@ -37,9 +38,25 @@ jobs:
git diff pre-updates --exit-code || failed=true
if ! [ "$failed" = "false" ]; then
echo '::error::Generated files are out of date!'
echo '::error::run `npm run gen` and commit the changes'
exit "$exitcode"
echo '::warning::Generated files are out of date!'
echo '::warning::run `npm run gen` and commit the changes'
fi
- run: npm test
# FIXME: also parse tests/*.just

static-validation:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: taiki-e/install-action@just
- name: Verify Just can parse test files
shell: bash
run: |
# skip readme.just because it is broken but works for testing
find . -type f -iregex '.*[\./]just[^\./]*' |
grep -v readme.just | while read -r fname
do
echo checking "$fname"
just --list --unstable --justfile "$fname"
done
- name: Look for tests that contain errors
run: "! grep -r -E '(ERROR|MISSING|UNEXPECTED)' test"
21 changes: 21 additions & 0 deletions build-queries-nvim.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/usr/bin/env python3
"""Generate NeoVim queries.
Everything in `queries/` uses tree-sitter syntax, as defined at
<https://tree-sitter.github.io/tree-sitter/syntax-highlighting#theme>. However, NVim
has a slightly different syntax.
This file performs conversions so two sets of files don't need to be maintained.
<https://docs.helix-editor.com/master/themes.html#syntax-highlighting> has a bit better
guide for these parameters than tree-sitter does.
"""

REPLACEMENTS = [
("@local.definition", "@definition.variable"),
("@constants.builtin.boolean", "@boolean"),
("@keyword.control.conditional", "@conditional"),
("@variable.parameter", "@parameter"),
("@comment.line", "@comment"),
]
215 changes: 129 additions & 86 deletions grammar.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,26 @@ function array(item) {
"array",
seq(
"[",
optional(seq(comma_sep1(array_item), optional(array_item))),
field(
"contents",
optional(seq(comma_sep1(array_item), optional(array_item))),
),
"]",
),
);
}

module.exports = grammar({
name: "just",

externals: ($) => [$.INDENT, $.DEDENT, $.NEWLINE, $.LINE],
externals: ($) => [$._indent, $._dedent, $._newline],
inline: (
$,
) => [
$._dependency_with_args,
$._expression_braced,
$._expression_recurse,
],
word: ($) => $.identifier,

rules: {
// justfile : item* EOF
Expand Down Expand Up @@ -49,14 +59,25 @@ module.exports = grammar({

// eol : NEWLINE
// | COMMENT NEWLINE
eol: ($) => choice($.NEWLINE, $.comment),
eol: ($) => choice($._newline, $.comment),

// alias : 'alias' NAME ':=' NAME
alias: ($) =>
seq("alias", field("left", $.NAME), ":=", field("right", $.NAME)),
seq(
"alias",
field("left", $.identifier),
":=",
field("right", $.identifier),
),

// assignment : NAME ':=' expression eol
assignment: ($) => seq($.NAME, ":=", $.expression, $.eol),
assignment: ($) =>
seq(
field("left", $.identifier),
":=",
field("right", $.expression),
$.eol,
),

// export : 'export' assignment
export: ($) => seq("export", $.assignment),
Expand All @@ -65,7 +86,7 @@ module.exports = grammar({
import: ($) => seq("import", optional("?"), $.string),

// module : 'mod' '?'? string?
module: ($) => seq("mod", optional("?"), $.NAME, optional($.string)),
module: ($) => seq("mod", optional("?"), $.identifier, optional($.string)),

// setting : 'set' 'dotenv-load' boolean?
// | 'set' 'export' boolean?
Expand All @@ -75,7 +96,7 @@ module.exports = grammar({
choice(
seq(
"set",
field("name", $.NAME),
field("left", $.identifier),
field(
"right",
optional(
Expand All @@ -100,24 +121,31 @@ module.exports = grammar({
// | value '/' expression
// | value '+' expression
// | value
expression: ($) =>
expression: ($) => seq(optional("/"), $._expression_inner),

_expression_inner: ($) =>
choice(
seq(
"if",
$.condition,
"{",
field("if", $.expression),
"}",
"else",
"{",
field("else", $.expression),
"}",
),
seq($.value, "+", $.expression),
seq($.value, "/", $.expression),
$.if_expression,
prec.left(2, seq($._expression_recurse, "+", $._expression_recurse)),
prec.left(1, seq($._expression_recurse, "/", $._expression_recurse)),
$.value,
),

// We can't mark `_expression_inner` inline because it causes an infinite
// loop at generation, so we just alias it.
_expression_recurse: ($) => alias($._expression_inner, "expression"),

if_expression: ($) =>
seq(
"if",
$.condition,
$._braced_expr,
repeat(seq("else", "if", $.condition, $._braced_expr)),
optional(seq("else", $._braced_expr)),
),

_braced_expr: ($) => seq("{", field("braced_body", $.expression), "}"),

// condition : expression '==' expression
// | expression '!=' expression
// | expression '=~' expression
Expand All @@ -126,6 +154,8 @@ module.exports = grammar({
seq($.expression, "==", $.expression),
seq($.expression, "!=", $.expression),
seq($.expression, "=~", $.expression),
// verify whether this is valid
$.expression,
),

// value : NAME '(' sequence? ')'
Expand All @@ -136,124 +166,137 @@ module.exports = grammar({
// | '(' expression ')'
value: ($) =>
prec.left(
0,
choice($.call, $.cmd, $.NAME, $.string, seq("(", $.expression, ")")),
choice(
$.function_call,
$.command,
$.identifier,
$.string,
seq("(", $.expression, ")"),
),
),

function_call: ($) =>
seq(
field("name", $.identifier),
"(",
field("arguments", optional($.sequence)),
")",
),

call: ($) => seq($.NAME, "(", optional($.sequence), ")"),
command: ($) => choice(seq($.backticked), seq($.indented_backticked)),

cmd: ($) => choice(seq($.BACKTICK), seq($.INDENTED_BACKTICK)),
// sequence : expression ',' sequence
// | expression ','?
sequence: ($) => comma_sep1($.expression),

// string : STRING
// | INDENTED_STRING
// | RAW_STRING
// | INDENTED_RAW_STRING
string: ($) =>
choice($.STRING, $.INDENTED_STRING, $.RAW_STRING, $.INDENTED_RAW_STRING),

// sequence : expression ',' sequence
// | expression ','?
sequence: ($) =>
choice(
seq($.expression, ",", $.sequence),
seq($.expression, optional(",")),
$.basic_string,
$.basic_string_indented,
$.raw_string,
$.raw_string_indented,
),

attribute: ($) => seq("[", $.NAME, "]", $.eol),
attribute: ($) =>
seq("[", field("contents", comma_sep1($.identifier)), "]", $.eol),

// A complete recipe
// recipe : attribute? '@'? NAME parameter* variadic_parameters? ':' dependency* body?
recipe: ($) =>
seq(optional($.attribute), $.recipe_header, $.NEWLINE, optional($.body)),
seq(
repeat($.attribute),
$.recipe_header,
$._newline,
optional($.recipe_body),
),

recipe_header: ($) =>
seq(
optional("@"),
$.NAME,
$.identifier,
optional($.parameters),
":",
optional(" "),
optional($.dependencies),
repeat($.dependency),
),

parameters: ($) =>
seq(repeat1($.parameter), optional($.variadic_parameters)),
seq(repeat($.parameter), choice($.parameter, $.variadic_parameter)),

// parameter : '$'? NAME
// | '$'? NAME '=' value
parameter: ($) =>
choice(
seq(optional("$"), $.NAME),
seq(optional("$"), $.NAME, "=", $.value),
seq(
optional("$"),
field("param", $.identifier),
optional(seq("=", field("default", $.value))),
),

// variadic_parameters : '*' parameter
// | '+' parameter
variadic_parameters: ($) =>
choice(seq("*", $.parameter), seq("+", $.parameter)),
variadic_parameter: ($) =>
seq(field("kleene", choice("*", "+")), $.parameter),

dependencies: ($) => repeat1($.dependency),

// dependency : NAME
// | '(' NAME expression* ')'
dependency: ($) => choice($.NAME, seq("(", $.depcall, ")")),
dependency: ($) =>
choice(
field("recipe", $.identifier),
field("call", seq("(", $._dependency_with_args, ")")),
),

depcall: ($) => seq($.NAME, repeat($.expression)),
// contents of `(recipe expression)`
_dependency_with_args: ($) =>
seq(
field("recipe", $.identifier),
repeat(field("expression", $.expression)),
),

// body : INDENT line+ DEDENT
body: ($) =>
recipe_body: ($) =>
seq(
$.INDENT,
choice($.shebang_recipe, optional($.recipe_body)),
$.DEDENT,
$._indent,
field("contents", seq(optional($.shebang), repeat($.recipe_line))),
$._dedent,
),
// seq($.INDENT, $.recipebody, $.DEDENT),

shebang_recipe: ($) => seq($.shebang, $.shebang_body),
shebang_body: ($) => repeat1($.line),

shebang: ($) =>
recipe_line: ($) =>
seq(
"#!",
/.*/,
// choice(
// seq(/.*\//, field("interpreter", $.TEXT)),
// seq("/usr/bin/env", field("interpreter", $.TEXT)),
// ),
$.NEWLINE,
optional($.recipe_line_prefix),
repeat(choice($.text, $.interpolation)),
$._newline,
),

recipe_body: ($) => repeat1($.line),
recipe_line_prefix: (_) => choice("@-", "-@", "@", "-"),

line: ($) => choice($.comment, $.recipeline),
// line: ($) => choice($.comment, $.recipeline, $.shebang),
shebang: ($) => seq(/\s*#!.*/, $._newline),

// FIXME: detecting interpolation doesn't work
recipeline: ($) =>
seq(
$.notcomment,
// repeat(choice($.interpolation, $.notinterpolation)),
repeat(choice($.interpolation, $.TEXT)),
$.NEWLINE,
),
// notcomment: ($) => /[^#\s{]\S*/,
notcomment: (_) => /[^#\s]\S*/,
comment: ($) => seq(/#.*/, $.NEWLINE),
// `# ...` comment
comment: ($) => seq(/#.*/, $._newline),

// notinterpolation: ($) => /[^{][^{]\S*/,
notinterpolation: (_) => /[^\s{][^\s{]\S*/,

// interpolation : '{{' expression '}}'
interpolation: ($) => seq("{{", $.expression, "}}"),

BACKTICK: (_) => /`[^`]*`/,
INDENTED_BACKTICK: (_) => /```[^(```)]*```/,
// COMMENT: (_) => /\#([^!].*)?/, // /\#([^!].*)?$/, // FIXME: '$' Regex assertions not supported, could cause misparses
NAME: (_) => /[a-zA-Z_][a-zA-Z0-9_-]*/,
RAW_STRING: (_) => /'[^']*'/,
INDENTED_RAW_STRING: (_) => /'''[^(''')]*'''/,

STRING: (_) => /"[^"]*"/, // # also processes \n \r \t \" \\ escapes
INDENTED_STRING: (_) => /"""[^("""]*"""/, // # also processes \n \r \t \" \\ escapes
TEXT: (_) => /\S+/, //recipe TEXT, only matches in a recipe body
identifier: (_) => /[a-zA-Z_][a-zA-Z0-9_-]*/,

backticked: (_) => seq("`", repeat(/./), "`"),
indented_backticked: (_) => seq("```", repeat(/./), "```"),
raw_string: (_) => /'[^']*'/,
raw_string_indented: (_) => seq("'''", repeat(/./), "'''"),
basic_string: ($) =>
seq('"', repeat(choice($.string_escape, /[^\\"]+/)), '"'),
basic_string_indented: ($) =>
seq('"""', repeat(choice($.string_escape, /[^\\"]+/)), '"""'),
string_escape: (_) => /\\[nrt"\\]/,
text: (_) => /.+/, //recipe TEXT, only matches in a recipe body
// text: (_) => /\S+/, //recipe TEXT, only matches in a recipe body
},
});
Loading

0 comments on commit ba36c85

Please sign in to comment.