forked from zaach/jison
-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
a4324ab
commit d614873
Showing
1 changed file
with
378 additions
and
0 deletions.
There are no files selected for viewing
378 changes: 378 additions & 0 deletions
378
examples/codegen-feature-tester-no-default-action.jison
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,378 @@ | ||
|
||
/* | ||
* description: Parses and executes mathematical expressions. | ||
* Self-contained example which runs a series of tests in a performance benchmark: | ||
* see main() at the bottom | ||
*/ | ||
|
||
|
||
|
||
/* lexical grammar */ | ||
%lex | ||
|
||
// %options backtrack_lexer | ||
|
||
%s PERCENT_ALLOWED | ||
|
||
%% | ||
|
||
// `%`: the grammar is not LALR(1) unless we make the lexer smarter and have | ||
// it disambiguate the `%` between `percent` and `modulo` functionality by | ||
// additional look-ahead: | ||
// we introduce a lexical predicate here to disambiguate the `%` and thus | ||
// keep the grammar LALR(1)! | ||
// https://developer.mozilla.org/en/docs/Web/JavaScript/Guide/Regular_Expressions | ||
// we also use an (inclusive) lexical scope which turns this rule on only | ||
// immediately after a number was lexed previously. | ||
|
||
<PERCENT_ALLOWED>"%"(?=\s*(?:[^0-9)]|E\b|PI\b|$)) | ||
// followed by another operator, i.e. anything that's | ||
// not a number, or The End: then this is a unary | ||
// `percent` operator. | ||
// | ||
// `1%-2` would be ambiguous but isn't: the `-` is | ||
// considered as a unary minus and thus `%` is a | ||
// `modulo` operator. | ||
// | ||
// `1%*5` thus is treated the same: any operator | ||
// following the `%` is assumed to be a *binary* | ||
// operator. Hence `1% times 5` which brings us to | ||
// operators which only exist in unary form: `!`, and | ||
// values which are not numbers, e.g. `PI` and `E`: | ||
// how about | ||
// - `1%E` -> modulo E, | ||
// - `1%!0` -> modulo 1 (as !0 -> 1) | ||
// | ||
// Of course, the easier way to handle this would be to | ||
// keep the lexer itself dumb and put this additional | ||
// logic inside a post_lex handler which should then be | ||
// able to obtain additional look-ahead tokens and queue | ||
// them for later, while using those to inspect and | ||
// adjust the lexer output now -- a trick which is used | ||
// in the cockroachDB SQL parser code, for example. | ||
// | ||
// The above regex solution however is a more local | ||
// extra-lookahead solution and thus should cost us less | ||
// overhead than the suggested post_lex alternative, but | ||
// it comes at a cost itself: complex regex and | ||
// duplication of language knowledge in the lexer itself, | ||
// plus inclusion of *grammar* (syntactic) knowledge in | ||
// the lexer too, where it doesn't belong in an ideal | ||
// world... | ||
console.log('percent: ', yytext); | ||
return '%'; | ||
|
||
<PERCENT_ALLOWED>. | ||
this.popState(); | ||
this.unput(yytext); | ||
// this.unput(yytext); can be used here instead of | ||
// this.reject(); which would only work when we set the | ||
// backtrack_lexer option | ||
|
||
|
||
\s+ /* skip whitespace */ | ||
|
||
[0-9]+("."[0-9]+)?\b | ||
this.pushState('PERCENT_ALLOWED'); | ||
return 'NUMBER'; | ||
|
||
"*" return '*'; | ||
"/" return '/'; | ||
"-" return '-'; | ||
"+" return '+'; | ||
"^" return '^'; | ||
"!" return '!'; | ||
"%" return 'MOD'; | ||
"(" return '('; | ||
")" return ')'; | ||
"PI" return 'PI'; | ||
"E" return 'E'; | ||
<<EOF>> return 'EOF'; | ||
. return 'INVALID'; | ||
|
||
/lex | ||
|
||
|
||
|
||
|
||
|
||
/* operator associations and precedence */ | ||
|
||
%left '+' '-' | ||
%left MOD '*' '/' | ||
%right '^' // it really doesn't matter, but we ASSUME most expressions with chained power expressions, e.g. `10^3^2`, have the nearer-to-one(1) integer? values, which makes us guess it's slightly better, given the restrictions of floating point accuracy, to calculate the uppermost power part first, i.e. `3^2` instead of `10^3` in the given example. | ||
%right '!' | ||
%right '%' | ||
|
||
%token INVALID | ||
|
||
|
||
|
||
%start expressions | ||
|
||
%options parser-errors-are-recoverable lexer-errors-are-recoverable | ||
|
||
|
||
|
||
%% /* language grammar */ | ||
|
||
|
||
|
||
expressions | ||
: e EOF | ||
{ | ||
print('### expression result:', $1); | ||
// No need to `return $1;`: the value is automatically carried to the outside | ||
// (UNLESS it is 'undefined', in which case the parser is assumed | ||
// to be a recognizer, but that is not the case here!) | ||
$$ = $1; | ||
} | ||
| e error EOF | ||
{ | ||
//print('~~~ (...) error: ', { '$1': $1, '#1': #1, yytext: yytext, '$$': $$, '@$': @$, token: parser.describeSymbol(#$), 'yystack': yystack, 'yyvstack': yyvstack, 'yylstack': yylstack, last_error: yy.lastErrorMessage}); | ||
print('~~~', parser.describeSymbol(#error), ' error: ', { '$1': $1, '$2': $2, yytext: yytext, '@error': @error, token: parser.describeSymbol(#error)}, yy.lastErrorMessage); | ||
yyerrok; | ||
yyclearin; | ||
$$ = $e + 3; | ||
// ^-- every error recovery rule in this grammar adds a different value | ||
// so we can track which error rule(s) were executed during the parse | ||
// of (intentionally) erroneous test expressions. | ||
print($1, $2, $3, '==>', $$); | ||
} | ||
; | ||
|
||
e | ||
: e '+' e | ||
{ | ||
$$ = $1 + $3; | ||
print($1, $2, $3, '==>', $$); | ||
} | ||
| e '-' e | ||
{ | ||
$$ = $1 - $3; | ||
print($1, $2, $3, '==>', $$); | ||
} | ||
| m | ||
; | ||
|
||
m | ||
: m MOD m | ||
{ | ||
$$ = $1 % $3; | ||
print($1, $2, $3, '==>', $$); | ||
} | ||
| m '/' m | ||
{ | ||
$$ = $1 / $3; | ||
print($1, $2, $3, '==>', $$); | ||
} | ||
| m '*' m | ||
{ | ||
$$ = $1 * $3; | ||
print($1, $2, $3, '==>', $$); | ||
} | ||
| p | ||
; | ||
|
||
p | ||
: p '^' p | ||
{ | ||
$$ = Math.pow($1, $3); | ||
print($1, $2, $3, '==>', $$); | ||
} | ||
| u | ||
; | ||
|
||
u | ||
: u '!' // 'factorial' | ||
{ | ||
$$ = (function fact(n) { | ||
n = Math.max(0, n | 0); | ||
var rv = 1; | ||
for (var i = 2; i <= n; i++) { | ||
rv *= i; | ||
} | ||
return rv; | ||
})($u); | ||
print($1, $2, '==>', $$); | ||
} | ||
| '!' u // 'not' | ||
{ | ||
$$ = ($u ? 0 : 1); | ||
print($1, $2, '==>', $$); | ||
} | ||
// the PERCENT `%` operator only accepts direct values with optional sign: | ||
| NUMBER '%' | ||
{ | ||
$$ = $NUMBER / 100; | ||
print($1, $2, '==>', $$); | ||
} | ||
| '-' u // doesn't need the `%prec UMINUS` tweak as the grammar ruleset enforces the precedence implicitly | ||
{ | ||
$$ = -$u; | ||
print($1, $2, '==>', $$); | ||
} | ||
| '+' u // doesn't need the `%prec UMINUS` tweak as the grammar ruleset enforces the precedence implicitly | ||
{ | ||
$$ = $u; | ||
print($1, $2, '==>', $$); | ||
} | ||
| '(' e ')' | ||
{ | ||
$$ = $2; | ||
print($1, $2, $3, '==>', $$); | ||
} | ||
| v | ||
; | ||
|
||
v | ||
: NUMBER | ||
{ | ||
$$ = Number(yytext); | ||
print($1, '==>', $$); | ||
} | ||
| E | ||
{ | ||
$$ = Math.E; | ||
print($1, '==>', $$); | ||
} | ||
| PI | ||
{ | ||
$$ = Math.PI; | ||
print($1, '==>', $$); | ||
} | ||
| error | ||
{ | ||
//print('~~~ (...) error: ', { '$1': $1, '#1': #1, yytext: yytext, '$$': $$, '@$': @$, token: parser.describeSymbol(#$), 'yystack': yystack, 'yyvstack': yyvstack, 'yylstack': yylstack, last_error: yy.lastErrorMessage}); | ||
print('~~~', parser.describeSymbol(#$), ' error: ', { '$1': $1, yytext: yytext, '@$': @$, token: parser.describeSymbol(#$), 'yyvstack': yyvstack }, yy.lastErrorMessage, yy.lastErrorHash.token, yysp); | ||
yyerrok; | ||
//yyclearin; | ||
$$ = 5; | ||
// ^-- every error recovery rule in this grammar adds a different value | ||
// so we can track which error rule(s) were executed during the parse | ||
// of (intentionally) erroneous test expressions. | ||
print($1, '==>', $$); | ||
} | ||
; | ||
|
||
|
||
|
||
|
||
|
||
// ---------------------------------------------------------------------------------------- | ||
|
||
%% | ||
// feature of the GH fork: specify your own main. | ||
// | ||
// compile with | ||
// | ||
// jison -o test.js --main that/will/be/me.jison | ||
// | ||
// then run | ||
// | ||
// node ./test.js | ||
// | ||
// to see the output. | ||
var assert = require("assert"); | ||
var print = (typeof console !== 'undefined' ? function __print__() { | ||
console.log.apply(null, [' '].concat(Array.prototype.slice.call(arguments, 0))); | ||
} : function __dummy__() {}); | ||
parser.pre_parse = function (yy) { | ||
print("parsing: ", yy.lexer.upcomingInput(-1 /* i.e. produce the entire (unparsed) input string */)); | ||
parser.lexer.options.post_lex = function (token) { | ||
print("lex() ==> ", token, '[' + this.yytext + ']', parser.describeSymbol(token)); | ||
}; | ||
}; | ||
if (0) { | ||
parser.trace = function () { | ||
print.apply(null, ['TRACE: '].concat(Array.prototype.slice.call(arguments, 0))); | ||
}; | ||
} | ||
parser.yy.parseError = function parseError(str, hash, ExceptionClass) { | ||
assert(hash.yy); | ||
assert(this); | ||
assert(this !== parser.yy); | ||
assert(this === hash.yy.parser || this === hash.yy.lexer); | ||
if (hash.recoverable) { | ||
hash.yy.parser.trace(str); | ||
hash.yy.lastErrorMessage = str; | ||
hash.yy.lastErrorHash = hash; | ||
} else { | ||
console.error(str, hash && hash.exception); | ||
throw new ExceptionClass(str, hash); | ||
} | ||
}; | ||
%include benchmark.js | ||
parser.main = function () { | ||
print("Running benchmark..."); | ||
var t1 = perf.start(); | ||
var basenum = 1; | ||
function test() { | ||
const formulas_and_expectations = [ | ||
basenum + '+2*(3-5--+--+6!)-7/-8%', NaN, | ||
basenum + '+2*0.7%^PI^2+4+5', NaN, /* this bets on JS floating point calculations discarding the small difference with this integer value... */ | ||
basenum + '+(2+3*++++)+5+6+7+8+9 9', NaN, | ||
basenum + '+2*(3!-5!-6!)/7/8', NaN, | ||
]; | ||
basenum++; | ||
for (var i = 0, len = formulas_and_expectations.length; i < len; i += 2) { | ||
var formula = formulas_and_expectations[i]; | ||
var expectation = formulas_and_expectations[i + 1]; | ||
var rv = parser.parse(formula); | ||
print("'" + formula + "' ==> ", rv, "\n"); | ||
if (isNaN(rv) && isNaN(expectation)) { | ||
assert(1); | ||
} else { | ||
assert.equal(rv, expectation); | ||
} | ||
} | ||
return formulas_and_expectations.length / 2; | ||
} | ||
if (0) { | ||
print = function dummy() {}; | ||
} | ||
if (01) { | ||
test(); | ||
} else { | ||
bench(test); | ||
} | ||
// if you get past the assert(), you're good. | ||
print("tested OK @", r(perf.mark(), 2), " ms"); | ||
}; | ||