Skip to content

Commit

Permalink
feat: show diffs when #guard_msgs fails (#3912)
Browse files Browse the repository at this point in the history
Adds the ability to show a diff when `guard_msgs` fails, using the
histogram diff algorithm pioneered in jgit. This algorithm tends to
produce more user-friendly diffs, but it can be quadratic in the worst
case. Empirically, the quadratic case of this implementation doesn't
seem to be slow enough to matter for messages smaller than hundreds of
megabytes, but if it's ever a problem, we can mitigate it the same way
jgit does by falling back to Myers diff.

See lean/run/guard_msgs.lean in the tests directory for some examples of
its output.
  • Loading branch information
david-christiansen authored Apr 18, 2024
1 parent 0c9f9ab commit b6d77be
Show file tree
Hide file tree
Showing 11 changed files with 626 additions and 1 deletion.
4 changes: 4 additions & 0 deletions RELEASES.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,10 @@ v4.8.0 (development in progress)
and `#guard_msgs (ordering := sorted) in cmd` sorts the messages in lexicographic order before checking.
PR [#3883](https://github.com/leanprover/lean4/pull/3883).

* The `#guard_msgs` command now supports showing a diff between the expected and actual outputs. This feature is currently
disabled by default, but can be enabled with `set_option guard_msgs.diff true`. Depending on user feedback, this option
may default to `true` in a future version of Lean.

Breaking changes:

* Automatically generated equational theorems are now named using suffix `.eq_<idx>` instead of `._eq_<idx>`, and `.def` instead of `._unfold`. Example:
Expand Down
1 change: 1 addition & 0 deletions src/Init/Data.lean
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import Init.Data.String
import Init.Data.List
import Init.Data.Int
import Init.Data.Array
import Init.Data.Array.Subarray.Split
import Init.Data.ByteArray
import Init.Data.FloatArray
import Init.Data.Fin
Expand Down
6 changes: 6 additions & 0 deletions src/Init/Data/Array/Subarray.lean
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ namespace Subarray
def size (s : Subarray α) : Nat :=
s.stop - s.start

theorem size_le_array_size {s : Subarray α} : s.size ≤ s.array.size := by
let {array, start, stop, start_le_stop, stop_le_array_size} := s
simp [size]
apply Nat.le_trans (Nat.sub_le stop start)
assumption

def get (s : Subarray α) (i : Fin s.size) : α :=
have : s.start + i.val < s.array.size := by
apply Nat.lt_of_lt_of_le _ s.stop_le_array_size
Expand Down
71 changes: 71 additions & 0 deletions src/Init/Data/Array/Subarray/Split.lean
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/-
Copyright (c) 2024 Lean FRO, LLC. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: David Thrane Christiansen
-/

prelude
import Init.Data.Array.Basic
import Init.Data.Array.Subarray
import Init.Omega

/-
This module contains splitting operations on subarrays that crucially rely on `omega` for proof
automation. Placing them in another module breaks an import cycle, because `omega` itself uses the
array library.
-/

namespace Subarray
/--
Splits a subarray into two parts.
-/
def split (s : Subarray α) (i : Fin s.size.succ) : (Subarray α × Subarray α) :=
let ⟨i', isLt⟩ := i
have := s.start_le_stop
have := s.stop_le_array_size
have : i' ≤ s.stop - s.start := Nat.lt_succ.mp isLt
have : s.start + i' ≤ s.stop := by omega
have : s.start + i' ≤ s.array.size := by omega
have : s.start + i' ≤ s.stop := by
simp only [size] at isLt
omega
let pre := {s with
stop := s.start + i',
start_le_stop := by omega,
stop_le_array_size := by assumption
}
let post := {s with
start := s.start + i'
start_le_stop := by assumption
}
(pre, post)

/--
Removes the first `i` elements of the subarray. If there are `i` or fewer elements, the resulting
subarray is empty.
-/
def drop (arr : Subarray α) (i : Nat) : Subarray α where
array := arr.array
start := min (arr.start + i) arr.stop
stop := arr.stop
start_le_stop := by
rw [Nat.min_def]
split <;> simp only [Nat.le_refl, *]
stop_le_array_size := arr.stop_le_array_size

/--
Keeps only the first `i` elements of the subarray. If there are `i` or fewer elements, the resulting
subarray is empty.
-/
def take (arr : Subarray α) (i : Nat) : Subarray α where
array := arr.array
start := arr.start
stop := min (arr.start + i) arr.stop
start_le_stop := by
have := arr.start_le_stop
rw [Nat.min_def]
split <;> omega
stop_le_array_size := by
have := arr.stop_le_array_size
rw [Nat.min_def]
split <;> omega
14 changes: 13 additions & 1 deletion src/Lean/Elab/GuardMsgs.lean
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ Authors: Kyle Miller
-/
prelude
import Lean.Elab.Notation
import Lean.Util.Diff
import Lean.Server.CodeActions.Attr

/-! `#guard_msgs` command for testing commands
Expand All @@ -15,6 +16,12 @@ See the docstring on the `#guard_msgs` command.

open Lean Parser.Tactic Elab Command

register_builtin_option guard_msgs.diff : Bool := {
defValue := false
descr := "When true, show a diff between expected and actual messages if they don't match. "
}


namespace Lean.Elab.Tactic.GuardMsgs

/-- Gives a string representation of a message without source position information.
Expand Down Expand Up @@ -151,7 +158,12 @@ def MessageOrdering.apply (mode : MessageOrdering) (msgs : List String) : List S
else
-- Failed. Put all the messages back on the message log and add an error
modify fun st => { st with messages := initMsgs ++ msgs }
logErrorAt tk m!"❌ Docstring on `#guard_msgs` does not match generated message:\n\n{res}"
let feedback :=
if (← getOptions).getBool `guard_msgs.diff false then
let diff := Diff.diff (expected.split (· == '\n')).toArray (res.split (· == '\n')).toArray
Diff.linesToString diff
else res
logErrorAt tk m!"❌ Docstring on `#guard_msgs` does not match generated message:\n\n{feedback}"
pushInfoLeaf (.ofCustomInfo { stx := ← getRef, value := Dynamic.mk (GuardMsgFailure.mk res) })
| _ => throwUnsupportedSyntax

Expand Down
198 changes: 198 additions & 0 deletions src/Lean/Util/Diff.lean
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
/-
Copyright (c) 2024 Lean FRO, LLC. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: David Thrane Christiansen
-/
prelude
import Init.Data
import Lean.Data.HashMap
import Init.Omega

namespace Lean.Diff
/--
An action in an edit script to transform one source sequence into a target sequence.
-/
inductive Action where
/-- Insert the item into the source -/
| insert
/-- Delete the item from the source -/
| delete
/-- Leave the item in the source -/
| skip
deriving Repr, BEq, Hashable, Repr

instance : ToString Action where
toString
| .insert => "insert"
| .delete => "delete"
| .skip => "skip"

/--
Determines a prefix to show on a given line when printing diff results.
For `delete`, the prefix is `"-"`, for `insert`, it is `"+"`, and for `skip` it is `" "`.
-/
def Action.linePrefix : (action : Action) → String
| .delete => "-"
| .insert => "+"
| .skip => " "

/--
A histogram entry consists of the number of times the element occurs in the left and right input
arrays along with an index at which the element can be found, if applicable.
-/
structure Histogram.Entry (α : Type u) (lsize rsize : Nat) where
/-- How many times the element occurs in the left array -/
leftCount : Nat
/-- An index of the element in the left array, if applicable -/
leftIndex : Option (Fin lsize)
/-- Invariant: the count is non-zero iff there is a saved index -/
leftWF : leftCount = 0 ↔ leftIndex = none
/-- How many times the element occurs in the right array -/
rightCount : Nat
/-- An index of the element in the right array, if applicable -/
rightIndex : Option (Fin rsize)
/-- Invariant: the count is non-zero iff there is a saved index -/
rightWF : rightCount = 0 ↔ rightIndex = none

/-- A histogram for arrays maps each element to a count and, if applicable, an index.-/
def Histogram (α : Type u) (lsize rsize : Nat) [BEq α] [Hashable α] :=
Lean.HashMap α (Histogram.Entry α lsize rsize)


section

variable [BEq α] [Hashable α]

/-- Add an element from the left array to a histogram -/
def Histogram.addLeft (histogram : Histogram α lsize rsize) (index : Fin lsize) (val : α)
: Histogram α lsize rsize :=
match histogram.find? val with
| none => histogram.insert val {
leftCount := 1, leftIndex := some index,
leftWF := by simp,
rightCount := 0, rightIndex := none
rightWF := by simp
}
| some x => histogram.insert val {x with
leftCount := x.leftCount + 1, leftIndex := some index, leftWF := by simp
}

/-- Add an element from the right array to a histogram -/
def Histogram.addRight (histogram : Histogram α lsize rsize) (index : Fin rsize) (val : α)
: Histogram α lsize rsize :=
match histogram.find? val with
| none => histogram.insert val {
leftCount := 0, leftIndex := none,
leftWF := by simp,
rightCount := 1, rightIndex := some index,
rightWF := by simp
}
| some x => histogram.insert val {x with
rightCount := x.leftCount + 1, rightIndex := some index,
rightWF := by simp
}

/-- Given two `Subarray`s, find their common prefix and return their differing suffixes -/
def matchPrefix (left right : Subarray α) : Array α × Subarray α × Subarray α :=
let rec go (pref : Array α) : Array α × Subarray α × Subarray α :=
let i := pref.size
if _ : i < left.size ∧ i < right.size then
if left[i]'(by omega) == right[i]'(by omega) then
go <| pref.push <| left[i]'(by omega)
else (pref, left.drop pref.size, right.drop pref.size)
else (pref, left.drop pref.size, right.drop pref.size)
termination_by left.size - pref.size
go #[]


/-- Given two `Subarray`s, find their common suffix and return their differing prefixes -/
def matchSuffix (left right : Subarray α) : Subarray α × Subarray α × Array α :=
let rec go (i : Nat) : Subarray α × Subarray α × Array α :=
if _ : i < left.size ∧ i < right.size then
if left[left.size - i - 1]'(by omega) == right[right.size - i - 1]'(by omega) then
go i.succ
else
(left.take (left.size - i), right.take (right.size - i), (left.drop (left.size - i)).toArray)
else
(left.take (left.size - i), right.take (right.size - i), (left.drop (left.size - i)).toArray)
termination_by left.size - i
go 0

/--
Finds the least common subsequence of two arrays using a variant of jgit's histogram diff algorithm.
Unlike jgit's implementation, this one does not perform a cutoff on histogram bucket sizes, nor does
it fall back to the Myers diff algorithm. This means that it has quadratic worst-case complexity.
Empirically, however, even quadratic cases of this implementation can handle hundreds of megabytes
in a second or so, and it is intended to be used for short strings like error messages. Be
cautious when applying it to larger workloads.
-/
partial def lcs (left right : Subarray α) : Array α := Id.run do
let (pref, left, right) := matchPrefix left right
let (left, right, suff) := matchSuffix left right
let mut hist : Histogram α left.size right.size := .empty
for h : i in [0:left.size] do
hist := hist.addLeft ⟨i, Membership.get_elem_helper h rfl⟩ left[i]
for h : i in [0:right.size] do
hist := hist.addRight ⟨i, Membership.get_elem_helper h rfl⟩ right[i]
let mut best := none
for (k, v) in hist.toList do
if let {leftCount := lc, leftIndex := some li, rightCount := rc, rightIndex := some ri, ..} := v then
match best with
| none =>
best := some ((lc + rc), k, li, ri)
| some (count, _) =>
if lc + rc < count then
best := some ((lc + rc), k, li, ri)
let some (_, v, li, ri) := best
| return pref ++ suff

let lefts := left.split ⟨li.val, by cases li; simp_arith; omega⟩
let rights := right.split ⟨ri.val, by cases ri; simp_arith; omega⟩

pref ++ lcs lefts.1 rights.1 ++ #[v] ++ lcs (lefts.2.drop 1) (rights.2.drop 1) ++ suff

/--
Computes an edit script to transform `left` into `right`.
-/
def diff [Inhabited α] (original edited : Array α) : Array (Action × α) :=
if h : ¬(0 < original.size) then
edited.map (.insert, ·)
else if h' : ¬(0 < edited.size) then
original.map (.delete, ·)
else Id.run do
have : original.size > 0 := by omega
have : edited.size > 0 := by omega
let mut out := #[]
let ds := lcs original.toSubarray edited.toSubarray
let mut i := 0
let mut j := 0
for d in ds do
while i < original.size && original[i]! != d do
out := out.push <| (.delete, original[i]!)
i := i.succ
while j < edited.size && edited[j]! != d do
out := out.push <| (.insert, edited[j]!)
j := j.succ
out := out.push <| (.skip, d)
i := i.succ
j := j.succ
while h : i < original.size do
out := out.push <| (.delete, original[i])
i := i.succ
while h : j < edited.size do
out := out.push <| (.insert, edited[j])
j := j.succ
out

/-- Shows a line-by-line edit script with markers for added and removed lines. -/
def linesToString [ToString α] (lines : Array (Action × α)) : String := Id.run do
let mut out : String := ""
for (act, line) in lines do
let lineStr := toString line
if lineStr == "" then
out := out ++ s!"{act.linePrefix}\n"
else
out := out ++ s!"{act.linePrefix} {lineStr}\n"
out
Loading

0 comments on commit b6d77be

Please sign in to comment.