From c0200b3b95ed329fd6284b8b4a314c8fe2cba278 Mon Sep 17 00:00:00 2001 From: zeramorphic Date: Fri, 12 Apr 2024 01:41:34 +0100 Subject: [PATCH] Create unseen definition tracker Signed-off-by: zeramorphic --- .gitignore | 1 + DependencyGraph.lean | 86 +++++++++++++++++++++++++++++++++----------- lake-manifest.json | 2 +- 3 files changed, 67 insertions(+), 22 deletions(-) diff --git a/.gitignore b/.gitignore index 50dff19efd..f216b4c58f 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ /.lake/* **.olean depgraph.* +unseen_defs.txt diff --git a/DependencyGraph.lean b/DependencyGraph.lean index 9a64376281..a36ad81a24 100644 --- a/DependencyGraph.lean +++ b/DependencyGraph.lean @@ -4,13 +4,6 @@ import ConNF open Lean -/-- Extracts the names of the declarations in `env` on which `decl` depends. -/ --- source: --- https://leanprover.zulipchat.com/#narrow/stream/287929-mathlib4/topic/Counting.20prerequisites.20of.20a.20theorem/near/425370265 -def getVisited (env : Environment) (decl : Name) := - let (_, { visited, .. }) := Elab.Command.CollectAxioms.collect decl |>.run env |>.run {} - visited - def nameCode (n : Name) : String := if n = .anonymous then "anonymous" @@ -20,9 +13,10 @@ def nameCode (n : Name) : String := def nameDisplay (n : Name) : String := n.components.getLast!.toString -def printDeps₁ (k : Name) (v : Array Name) (print : String → IO Unit) : IO Unit := do +def printDeps₁ (k : Name) (_v : Array Name) (print : String → IO Unit) : IO Unit := do let n := k.componentsRev[1]! - print (nameCode k ++ " [label=\"" ++ nameDisplay k ++ "\"" ++ " group=\"" ++ n.toString ++ "\"]" ++ ";\n") + print (nameCode k ++ " [label=\"" ++ nameDisplay k ++ "\"" ++ + " group=\"" ++ n.toString ++ "\"]" ++ ";\n") def printDeps₂ (k : Name) (v : Array Name) (print : String → IO Unit) : IO Unit := do for val in v do @@ -42,22 +36,72 @@ elab "#deptree " : command => do let env ← getEnv let imports := env.importGraph IO.FS.withFile "depgraph.dot" IO.FS.Mode.write fun h => do - h.write "digraph {\n".toAsciiByteArray - h.write "compound=true;\n".toAsciiByteArray + h.write "digraph {\n".toUTF8 + h.write "compound=true;\n".toUTF8 for (gp, _) in groups imports do - h.write ("subgraph cluster_" ++ nameCode gp ++ " {\n").toAsciiByteArray + h.write ("subgraph cluster_" ++ nameCode gp ++ " {\n").toUTF8 for (k, v) in imports do if (`ConNF).isPrefixOf k && group k = gp then do - printDeps₁ k v (fun s => h.write s.toAsciiByteArray) - h.write ("label = \"" ++ gp.toString ++ "\";\n").toAsciiByteArray - h.write ("margin = 32;\n").toAsciiByteArray - h.write ("pad = 32;\n").toAsciiByteArray - h.write ("penwidth = 5;\n").toAsciiByteArray - h.write ("color = cyan4;\n").toAsciiByteArray - h.write "}\n".toAsciiByteArray + printDeps₁ k v (fun s => h.write s.toUTF8) + h.write ("label = \"" ++ gp.toString ++ "\";\n").toUTF8 + h.write ("margin = 32;\n").toUTF8 + h.write ("pad = 32;\n").toUTF8 + h.write ("penwidth = 5;\n").toUTF8 + h.write ("color = cyan4;\n").toUTF8 + h.write "}\n".toUTF8 for (k, v) in imports do if (`ConNF).isPrefixOf k then do - printDeps₂ k v (fun s => h.write s.toAsciiByteArray) - h.write "}\n".toAsciiByteArray + printDeps₂ k v (fun s => h.write s.toUTF8) + h.write "}\n".toUTF8 #deptree + +/-- Extracts the names of the declarations in `env` on which `decl` depends. -/ +-- source: +-- https://leanprover.zulipchat.com/#narrow/stream/287929-mathlib4/topic/Counting.20prerequisites.20of.20a.20theorem/near/425370265 +def getVisited (env : Environment) (decl : Name) := + let (_, { visited, .. }) := Elab.Command.CollectAxioms.collect decl |>.run env |>.run {} + visited.erase decl + +partial def allDeclsIn (module : Name) : Elab.Command.CommandElabM (Array Name) := do + let mFile ← findOLean module + unless (← mFile.pathExists) do + logError m!"object file '{mFile}' of module {module} does not exist" + let (md, _) ← readModuleData mFile + let decls ← md.constNames.filterM fun d => return !(← d.isBlackListed) + return decls + +def allFiles (env : Environment) : List Name := + (env.importGraph.fold (fun xs k _ => if (`ConNF).isPrefixOf k then k :: xs else xs) []).mergeSort + (toString · < toString ·) + +def allDecls (env : Environment) : Elab.Command.CommandElabM NameSet := + (fun l => RBTree.ofList (l.map (fun a => a.toList)).join) <$> + (mapM allDeclsIn (allFiles env)) + +def seenIn (env : Environment) (allDecls : NameSet) (decl : Name) : NameSet := + (getVisited env decl).fold + (fun decls x => if allDecls.contains x then decls.insert x else decls) RBTree.empty + +/-- `#unseen` computes a list of the declarations of the current file that are +defined but not used in the current file. The list is stored in `unseen_defs.txt`. +The average runtime on my computer is 1 minute, with 16 threads. -/ +elab "#unseen " : command => do + let env ← getEnv + let allDecls ← allDecls env + let mut unseen := allDecls + let timeStart ← IO.monoMsNow + let tasks := (fun l => Task.spawn (fun _ => seenIn env allDecls l)) <$> + allDecls.toList.mergeSort (toString · < toString ·) + for task in tasks do + for v in task.get do + unseen := unseen.erase v + IO.FS.withFile "unseen_defs.txt" IO.FS.Mode.write (fun h => do + for v in unseen.toList.mergeSort (toString · < toString ·) do + h.write (v.toString ++ "\n").toUTF8) + let timeEnd ← IO.monoMsNow + logInfo m!"operation took {(timeEnd - timeStart) / 1000}s" + +/-! Uncommenting the following line will run the `#unseen` program, which will take ~1 minute on a +reasonably powerful system with about 16 threads. -/ +-- #unseen diff --git a/lake-manifest.json b/lake-manifest.json index a8bb3495f0..3bfd105832 100644 --- a/lake-manifest.json +++ b/lake-manifest.json @@ -58,7 +58,7 @@ {"url": "https://github.com/leanprover-community/mathlib4.git", "type": "git", "subDir": null, - "rev": "bf1fcf395bad9fc870f5d2fc5c4528de1b19877a", + "rev": "5dd5b451e8a2d630886762f6209782a4f37ccbdb", "name": "mathlib", "manifestFile": "lake-manifest.json", "inputRev": null,