Revert "Introduce a combinator to combine utilities in sequence."

This reverts commit fd2e9e8.
colis-anr · Apr 3, 2019 · 2441635 · 2441635
1 parent fd2e9e8
commit 2441635
Show file tree

Hide file tree

Showing 5 changed files with 39 additions and 100 deletions.
diff --git a/src/dune b/src/dune
@@ -13,14 +13,14 @@
 (library
  (name morbig)
  (public_name morbig)
- (libraries str menhirLib ppx_deriving_yojson.runtime visitors.runtime unix)
+ (libraries str menhirLib ppx_deriving_yojson.runtime visitors.runtime)
  (preprocess (pps ppx_deriving_yojson visitors.ppx))
  (flags :standard -w -3) ; FIXME: remove this when Yojson and its PPX are fixed.
  (modules :standard \ morbigDriver))
 
 (executable
  (name morbigDriver)
  (public_name morbig)
- (ocamlopt_flags :standard -p)
- (libraries morbig unix)
+ (ocamlopt_flags :standard)
+ (libraries morbig)
  (modules morbigDriver))
diff --git a/src/engine.ml b/src/engine.ml
@@ -410,30 +410,12 @@ module Lexer (U : sig end) : Lexer = struct
         pos_cnum = p.pos_cnum
     }
 
-  let remember_token token =
-    tokens := match !tokens with
-    | [] -> [token]
-    | [token'] | [token'; _] -> [token; token']
-    | _ -> assert false (* We only need to remember at most the last two tokens. *)
-
-  let show_line_number =
-    let timestamp = ref (Unix.gettimeofday ()) in
-    let last = ref 0 in
-    fun pos ->
-    if pos.Lexing.pos_lnum > !last then (
-      let now = Unix.gettimeofday () in
-      last := pos.Lexing.pos_lnum;
-      Printf.eprintf "%f Line %d\n" (now -. !timestamp) !last;
-      timestamp := now
-    )
-
   let next_token ({ aliases; checkpoint } as state) =
     let curr_p = copy_position (lexbuf ()).Lexing.lex_curr_p in
-    show_line_number curr_p;
     let state' = { aliases; checkpoint } in
     let (raw, _, _, aliases) as token = next_token state' in
     let state = { state with aliases } in
-    remember_token raw;
+    tokens := raw :: !tokens;
     last_state := Some (state, token, curr_p);
     token
 

diff --git a/src/extPervasives.ml b/src/extPervasives.ml
@@ -24,7 +24,7 @@ let comment f message =
   y
 
 let string_of_channel cin =
-  let b = Buffer.create (12 * 16384) in
+  let b = Buffer.create 16384 in
   let rec aux () =
     Buffer.add_channel b cin 1;
     aux ()
@@ -144,15 +144,6 @@ let count_end_character c s =
   in
   aux 0 (String.length s - 1)
 
-let is_digit s =
-  let len = String.length s in
-  let rec aux i =
-    (i >= len)
-    || let c = s.[i] in
-       ((c >= '0' && c <= '9') && aux (i + 1))
-  in
-  aux 0
-
 (** [strip s] returns a copy of s, without any final newline *)
 let string_strip s =
   let n = String.length s in
@@ -190,10 +181,10 @@ let rec take n l =
 
 let take_until pred l =
   let rec aux accu = function
-  | [] -> [], l, false
-  | (x :: xs) as l ->
+  | [] -> [], l
+  | x :: xs ->
     if pred x then
-      List.rev accu, l, true
+      List.rev accu, x :: xs
     else
       aux (x :: accu) xs
   in
@@ -281,18 +272,11 @@ let ( <$> ) x f =
 let list_last l =
   list_hd_opt (List.rev l)
 
-let contains_newline s =
-  String.contains s '\010'
+let newline_regexp =
+  Str.regexp "\010"
 
 let lines s =
-  String.split_on_char '\010' s
+  Str.split_delim newline_regexp s
 
 let string_last_line s =
   lines s |> list_last
-
-let time what f =
-  let start = Unix.gettimeofday () in
-  let y = f () in
-  let stop = Unix.gettimeofday () in
-  Printf.eprintf "%s: %f\n" what (stop -. start);
-  y
diff --git a/src/keyword.ml b/src/keyword.ml
@@ -77,13 +77,19 @@ let must_be_well_delimited flag = function
   | Rbrace | Do | Done | Then | Else | Elif | Fi | Esac -> flag
   | _ -> true
 
-let recognize_reserved_word_if_relevant well_delimited checkpoint (_, pstart, pstop) w =
+let recognize_reserved_word_if_relevant well_delimited checkpoint p w =
+  let (_, pstart, pstop) = p in
   let valid_token kwd =
     accepted_token checkpoint (kwd, pstart, pstop) <> Wrong
     && must_be_well_delimited well_delimited kwd
   in
   FirstSuccessMonad.(
-    (keyword_of_string w >>= fun kwd ->
-     return_if (valid_token kwd) kwd
-    ) +> (return_if (Name.is_name w) (NAME (CST.Name w)))
+    let as_keyword =
+      keyword_of_string w >>= fun kwd ->
+      return_if (valid_token kwd) kwd
+    in
+    let as_name =
+      return_if (Name.is_name w) (NAME (CST.Name w))
+    in
+    as_keyword +> as_name
   )
diff --git a/src/prelexerState.ml b/src/prelexerState.ml
@@ -34,39 +34,22 @@ module AtomBuffer : sig
   val is_empty : t -> bool
   val push_string : t -> string -> t
   val last_line : t -> string
-  val count_end_character : char -> t -> int
 end = struct
   type t = {
       mutable buffer  : atom list;
       mutable strings_len : int;
       mutable strings : string list;
     }
 
-  let too_many_strings_threshold = 128
-
-  let too_many_strings = ref too_many_strings_threshold
+  let too_many_strings = 1024
 
   let compact_strings strings =
-    let n = List.fold_left (fun a s -> a + String.length s) 0 strings in
-    let f = Bytes.create n in
-    let copy i s =
-      let j = i - String.length s in
-      for k = j to i - 1 do
-        Bytes.set f k s.[k - j]
-      done;
-      j
-    in
-    let rec aux i = function
-        | s :: ss -> aux (copy i s) ss
-        | [] -> Bytes.to_string f
-    in
-    aux n strings
+    [String.concat "" (List.rev strings)]
 
   let compact b =
-    if b.strings_len > !too_many_strings then (
-      too_many_strings := min max_int (2 * !too_many_strings);
+    if b.strings_len > too_many_strings then (
       b.strings_len <- 1;
-      b.strings <- [compact_strings b.strings]
+      b.strings <- compact_strings b.strings
     );
     b
 
@@ -81,9 +64,8 @@ end = struct
 
   let normalize b =
     if b.strings <> [] then begin
-        let s = compact_strings b.strings in
+        let s = String.concat "" (List.rev b.strings) in
         let buffer = push_string b.buffer s in
-        too_many_strings := too_many_strings_threshold;
         b.strings <- [];
         b.strings_len <- 0;
         b.buffer <- buffer
@@ -118,7 +100,7 @@ end = struct
     let last_line_of_strings ss =
       let rec aux accu = function
         | s :: ss ->
-           if ExtPervasives.contains_newline s then
+           if Str.string_match ExtPervasives.newline_regexp s 0 then
              match ExtPervasives.(list_last (lines s)) with
                | None -> assert false (* By the if-condition. *)
                | Some s -> s :: accu
@@ -134,23 +116,6 @@ end = struct
     else
       last_line_of_strings (buffer_as_strings b.buffer)
 
-  let count_end_character c b =
-    let rec count split l =
-      match split l with
-      | Some (s, ss) ->
-         let d = ExtPervasives.count_end_character c s in
-         if d < String.length s then d + count split ss else d
-      | None ->
-         0
-    in
-
-    let count_in_strings l =
-      count (function s :: ss -> Some (s, ss) | _ -> None) l
-    and count_in_buffer l =
-      count (function (WordComponent (s, _)) :: ss -> Some (s, ss) | _ -> None) l
-    in
-    if b.strings <> [] then count_in_strings b.strings else count_in_buffer b.buffer
-
 end
 
 type prelexer_state = {
@@ -321,8 +286,8 @@ let is_assignment_mark = function
   | _ -> false
 
 let recognize_assignment current =
-  let rhs, prefix, found = take_until is_assignment_mark (buffer current) in
-  if not found then (
+  let rhs, prefix = take_until is_assignment_mark (buffer current) in
+  if prefix = buffer current then (
     current
   ) else
     let buffer = AtomBuffer.make (rhs @ List.tl prefix) in
@@ -375,15 +340,14 @@ let recognize_assignment current =
     the buffer.
 
 *)
+let digit_regexp = Str.regexp "^[0-9]+$"
+
 let return ?(with_newline=false) lexbuf (current : prelexer_state) tokens =
-(*
   assert (
       not (List.exists (function (Pretoken.PreWord _)->true |_-> false) tokens)
     );
-*)
-  let current =
-     recognize_assignment current
-  in
+
+  let current = recognize_assignment current in
 
   let flush_word b =
     let buf = Buffer.create 13 in
@@ -394,6 +358,9 @@ let return ?(with_newline=false) lexbuf (current : prelexer_state) tokens =
     (* FIXME: Positions are not updated properly. *)
     (token, lexbuf.Lexing.lex_start_p, lexbuf.Lexing.lex_curr_p)
   in
+  let is_digit d =
+    Str.(string_match digit_regexp d 0)
+  in
   let followed_by_redirection = Parser.(function
     | Pretoken.Operator (LESSAND |  GREATAND | DGREAT | DLESS _
                          | CLOBBER | LESS | GREAT | LESSGREAT) :: _ ->
@@ -428,7 +395,7 @@ let return ?(with_newline=false) lexbuf (current : prelexer_state) tokens =
     match flush_word current with
     | "" ->
       []
-    | w when ExtPervasives.is_digit w && followed_by_redirection tokens ->
+    | w when is_digit w && followed_by_redirection tokens ->
       [Pretoken.IoNumber w]
     | w ->
       let csts =
@@ -476,6 +443,7 @@ let provoke_error current lexbuf =
 
 *)
 let escape_analysis ?(for_backquote=false) ?(for_dquotes=false) level current =
+  let current = AtomBuffer.last_line current.buffer in
   let number_of_backslashes_to_escape = Nesting.(
     (* FIXME: We will be looking for the general pattern here. *)
     match level with
@@ -503,7 +471,6 @@ let escape_analysis ?(for_backquote=false) ?(for_dquotes=false) level current =
   )
   in
   if Options.debug () then (
-    let current = AtomBuffer.last_line current.buffer in
     let current' = List.(concat (map rev (map string_to_char_list [current]))) in
     Printf.eprintf "N = %s | %s\n"
       (String.concat " "
@@ -512,7 +479,7 @@ let escape_analysis ?(for_backquote=false) ?(for_dquotes=false) level current =
       (string_of_char_list current')
   );
 
-  let backslashes_before = AtomBuffer.count_end_character '\\' current.buffer in
+  let backslashes_before = ExtPervasives.count_end_character '\\' current in
 
   if List.exists (fun k ->
          backslashes_before >= k && (k - backslashes_before) mod (k + 1) = 0