-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlexer.mll
120 lines (106 loc) · 3.53 KB
/
lexer.mll
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
{
open Lexing
open Ast
open Parser
open Errors
open Int32
exception Lexing_error of string
exception Eof
let kwd_tbl =
["char",CHAR;"else",ELSE;"for",FOR;
"if",IF;"int",INT;"return",RETURN;
"sizeof",SIZEOF;"struct",STRUCT;
"union",UNION;"void",VOID;"while",WHILE]
let id_or_kwd s = try List.assoc s kwd_tbl with _-> IDENT s
let localstring=ref ""
let newline lexbuf =
let pos= lexbuf.lex_curr_p in
lexbuf.lex_curr_p<-
{ pos with pos_lnum = pos.pos_lnum + 1; pos_bol = pos.pos_cnum }
let pr = Printf.printf "%s\n"
}
let chiffre = ['0'-'9']
let pchif = ['1'-'9']
let nombre = (pchif) (chiffre)*
let alpha = ['a'-'z'] | ['A'-'Z']
let ident = (alpha | '_') (alpha | chiffre | '_')*
let space = [' ' '\t']
let doctal = ['0'-'7']
let dhex= chiffre | ['a'-'f'] | ['A'-'F']
rule token = parse
| '0' (doctal+ as n) {INTEGER ( of_string ("0o"^n))}
| '\n' { newline lexbuf;token lexbuf}
| ident as id { id_or_kwd id}
| (nombre|'0') as n { INTEGER (of_string n) }
| "0x" (dhex+ as n) {INTEGER (of_string ("0x"^n))}
| '"' {tokstring lexbuf}
| ''' { CHARACTER (tokchar lexbuf) }
| space+ {token lexbuf}
| "->" {ARROW}
| '+' {PLUS} (* on pourrait factoriser*)
| '*' {STAR} (* cependant on obtiendrait*)
| '-' {MINUS} (* pas un automate avec moins*)
| '/' {DIV} (* d'états. en effet il faudrait*)
| '%' {MOD} (* matcher une expression reguliere*)
| "<=" {LEQ} (* plus compliquée. ce n'est donc *)
| ">=" {GEQ} (* pas une factorisation aussi utile que*)
| "<" {LT} (* celle de ident*)
| ">" {GT}
| "==" {EQUAL}
| "!=" {DIFF}
| '=' {GETS}
| "||" {OR}
| "&&" {AND}
| '!' {NOT}
| "++" {INCR}
| "--" {DECR}
| "&" {AMP}
| '(' {LPAREN}
| ')' {RPAREN}
| '[' {LBRA}
| ']' {RBRA}
| '{' {LCUR}
| '}' {RCUR}
| '.' {DOT}
| "/*" {comment lexbuf}
| "//" {commentendline lexbuf}
| ',' {COMMA}
| ';' {SC}
| eof {EOF}
| _ {raise (Lexing_error "syntax error")}
and tokstring = parse
|[^ '\\' '"' ''' '\n']* as s
{localstring:= (!localstring) ^ s; tokstring lexbuf}
|'"'
{let aux = !localstring in localstring:= ""; STRING aux}
|"\\\"" {localstring:= (!localstring) ^ "\"";
tokstring lexbuf}
|"\\\'"{localstring:= (!localstring) ^ "\'";
tokstring lexbuf}
|"\\n" {localstring:= (!localstring)^ "\\n";tokstring lexbuf}
|"\\\\" {localstring:= (!localstring) ^ "\\";
tokstring lexbuf}
| "\\x" (dhex dhex as s)
{localstring := (!localstring) ^
(String.make 1 (char_of_int
(int_of_string ("0x"^s)))); tokstring lexbuf}
|eof {raise(Lexing_error("Unterminated string"))}
|_ as c {raise (Lexing_error
(Printf.sprintf "Character %s forbidden"
(if c = '\n' then "newline" else String.make 1 c))
)}
and tokchar = parse
| "\\x" (dhex dhex as s) "'" { (char_of_int (int_of_string ("0x"^s))) }
| [^ '\\'] as c "'" { c }
| eof {raise(Lexing_error("Unterminated char"))}
| _
{ raise (Lexing_error ("Invalid character")) }
and commentendline = parse
| '\n' {newline lexbuf; token lexbuf}
|eof {token lexbuf}
| _ {commentendline lexbuf}
and comment=parse
|"*/" {token lexbuf}
| '\n' { newline lexbuf;comment lexbuf}
|eof {raise (Lexing_error ("Unterminated comment")) }
|_ {comment lexbuf}