From 15ae9bc492fdb545b9350e6fae60a5d9c3ad9778 Mon Sep 17 00:00:00 2001 From: JOSHCLUNE Date: Sun, 26 Nov 2023 00:37:52 -0500 Subject: [PATCH] Removed instance 5 from portfolio mode --- Duper/Interface.lean | 27 +++++++++++++-------------- Duper/Tests/test_regression.lean | 2 +- README.md | 2 +- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/Duper/Interface.lean b/Duper/Interface.lean index 15baf7b..615fa53 100644 --- a/Duper/Interface.lean +++ b/Duper/Interface.lean @@ -111,7 +111,10 @@ The `facts` supplied to `duper` are separated by commas and can include: By default, `duper` will call multiple instances of itself with different option configurations in an attempt to find the option configuration that is best suited for the current problem. This behavior can be changed by including the option -`portfolioMode := false` in the comma separated options list. +`portfolioMode := false` in the comma separated options list. Running `duper` with `portfolioMode` enabled means that +each instance of `duper` will be given less time to run. To increase the amount of time allocated to `duper`, use +`set_option maxHeartbeats N` (setting `N` to 3 times the default value will ensure that each `duper` instance is given +as much time as it would receive if run with `portfolioMode` disabled). The variant `duper?` will call `duper` and, if a proof is found, return a `Try this` suggestion that will call `duper` using just the option configuration that succeeded in finding a proof. If the suggestion is used, then on all subsequent @@ -394,11 +397,11 @@ def runDuper (formulas : List (Expr × Expr × Array Name × Bool)) (instanceMax return contradictionProof | Result.saturated => printSaturation state - throwError "Prover saturated." + throwError "Duper saturated." | Result.unknown => - /- Note: If this error message changes, make sure to edit Tactic.lean since `runDuperPortfolioMode` uses this error message to - determine whether Duper threw an error due to an actual problem or due to a timeout -/ - throwError "Prover was terminated." + /- Note: If this error message changes, make sure to grep the current message and change any code that uses the content + of this error message to determine whether Duper threw an error due to an actual problem or due to timeout. -/ + throwError "Duper was terminated." /- Note for converting between Duper's formulas format and Auto's lemmas format. If `hp : p`, then Duper stores the formula `(p, eq_true hp, #[], isFromGoal)` whereas Auto stores the lemma `⟨hp, p, #[]⟩`. Importantly, Duper stores the proof of `p = True` and @@ -1109,22 +1112,18 @@ def runDuperPortfolioMode (formulas : List (Expr × Expr × Array Name × Bool)) | none => #[(1, runDuperInstance1 formulas declName?), (3, runDuperInstance3 formulas declName?), - (5, runDuperInstance5 formulas declName?), (7, runDuperInstance7 formulas declName?)] | some FullPreprocessing => -- Replace instance 7 which has preprocessing disabled #[(1, runDuperInstance1 formulas declName?), (3, runDuperInstance3 formulas declName?), - (5, runDuperInstance5 formulas declName?), (15, runDuperInstance15 formulas declName?)] - | some NoPreprocessing => -- Replaces instances 1, 3, and 5 which have full preprocessing enabled + | some NoPreprocessing => -- Replaces instances 1 and 3 which have full preprocessing enabled #[(9, runDuperInstance9 formulas declName?), (11, runDuperInstance11 formulas declName?), - (13, runDuperInstance13 formulas declName?), (7, runDuperInstance7 formulas declName?)] | some Monomorphization => -- Replace all instances with corresponding instances that have only monomorphization enabled #[(17, runDuperInstance17 formulas declName?), (19, runDuperInstance19 formulas declName?), - (21, runDuperInstance21 formulas declName?), (23, runDuperInstance23 formulas declName?)] let numInstances := instances.size let mut maxInstanceHeartbeats := maxHeartbeats / numInstances -- Allocate total heartbeats among all instances @@ -1170,7 +1169,7 @@ def runDuperPortfolioMode (formulas : List (Expr × Expr × Array Name × Bool)) If Duper saturates or fails proof reconstruction specifically because inhabitation reasoning is disabled, `proofOption` will be `none` and `retryWithInhabitationReasoning` will be true. - If Duper times out (achieving ProverM.Result.unknown and throwing the error "Prover was terminated.") then `proofOption` + If Duper times out (achieving ProverM.Result.unknown and throwing the error "Duper was terminated.") then `proofOption` will be `none` and `retryWithInhabitationReasoning` will be false. If Duper fails for any other reason, then Duper will either continue to the next instance or throw an error depending on @@ -1195,7 +1194,7 @@ def runDuperPortfolioMode (formulas : List (Expr × Expr × Array Name × Bool)) pure (none, false) -- Don't retry with inhabitation reasoning because the user specifically indicated not to else pure (none, true) -- Attempting to solve this problem with inhabitation reasoning disabled leads to failed proof reconstruction - else if errorMessage.startsWith "Prover was terminated" then + else if errorMessage.startsWith "Duper was terminated" then pure (none, false) -- No reason to retry with inhabitation reasoning, portfolio mode should just move on to the next instance in the loop else if ← getThrowPortfolioErrorsM then throw e -- Throw the error because it doesn't appear to pertain to inhabitation reasoning or a timeout @@ -1235,7 +1234,7 @@ def runDuperPortfolioMode (formulas : List (Expr × Expr × Array Name × Bool)) pure $ some proof catch e => -- Only `e` is an error arising from the Duper instance timing out, it should be caught. Otherwise, it should be thrown. - if (← e.toMessageData.toString).startsWith "Prover was terminated" then pure none -- Duper instance just timed out, try again with the next instance + if (← e.toMessageData.toString).startsWith "Duper was terminated" then pure none -- Duper instance just timed out, try again with the next instance else if ← getThrowPortfolioErrorsM then throw e -- Error unrelated to timeout, and inhabitation reasoning is already enabled, so throw the error @@ -1250,4 +1249,4 @@ def runDuperPortfolioMode (formulas : List (Expr × Expr × Array Name × Bool)) mkDuperCallSuggestion duperStxRef origSpan facts withDuperStar duperInstanceNum return proof | none => continue -- Duper timed out or otherwise failed, try the next instance - throwError "Prover failed to solve the goal in allotted time" + throwError "Duper failed to solve the goal in the allotted time. To rerun Duper for longer, use set_option maxHeartbeats N." diff --git a/Duper/Tests/test_regression.lean b/Duper/Tests/test_regression.lean index b2c887a..c13f9d3 100644 --- a/Duper/Tests/test_regression.lean +++ b/Duper/Tests/test_regression.lean @@ -352,7 +352,7 @@ tptp PUZ012_1 "../TPTP-v8.0.0/Problems/PUZ/PUZ012_1.p" -- Tests that (in the current commit at least) use positive simplify reflect set_option trace.Rule.simplifyReflect true in tptp NUN004_5 "../TPTP-v8.0.0/Problems/NUN/NUN004_5.p" - by duper [*] {portfolioInstance := 7} -- Runs out of time if run in portfolio mode + by duper [*] set_option trace.Rule.simplifyReflect true in tptp ITP209_2 "../TPTP-v8.0.0/Problems/ITP/ITP209_2.p" diff --git a/README.md b/README.md index bb8c3d8..ac36e1d 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ If the `[facts]` argument are omitted from the Duper call, then Duper will only Each of the `options` supplied to Duper have the form `option := value` and are separated by commas. Options that can be used to customize a Duper call include: - `portfolioInstance`: Can be set to a natural number from 0 to 24. Each of these numbers corresponds to an exact set of options by which Duper can be called, so specifying the portfolioInstance option makes specifying any additional options redundant. The primary use for the portfolioInstance option is to ensure that when `duper?` succeeds, a short proof script of the form `by duper [facts] {portfolioInstance := n}` can be recommended to the user. -- `portfolioMode`: Can be set to `true` or `false` and is set to `true` by default. If portfolioMode is enabled, then Duper will call 4-5 instances of itself with different option configurations, and if portfolioMode is disabled, then Duper will only call a single instance of itself. The benefit of enabling portfolioMode is that different option configurations are better suited to different problems, while the downside is that if portfolioMode is enabled, each instance of Duper is given less time to run before timing out. Generally, we recommend users keep portfolioMode enabled by default and use `set_option maxHeartbeats` to control how long they would like each Duper instance to run before giving up (if `maxHeartbeats` is set to `n`, each instance will be allocated approximately `n/4` heartbeats). +- `portfolioMode`: Can be set to `true` or `false` and is set to `true` by default. If portfolioMode is enabled, then Duper will call 3-4 instances of itself with different option configurations, and if portfolioMode is disabled, then Duper will only call a single instance of itself. The benefit of enabling portfolioMode is that different option configurations are better suited to different problems, while the downside is that if portfolioMode is enabled, each instance of Duper is given less time to run before timing out. Generally, we recommend users keep portfolioMode enabled by default and use `set_option maxHeartbeats` to control how long they would like each Duper instance to run before giving up (if `maxHeartbeats` is set to `n`, each instance will be allocated approximately `n/3` heartbeats). - `preprocessing`: Can be set to `full`, `monomorphization`, or `no_preprocessing`. This option will control the extent to which lemmas are preprocessed before being given to Duper. When portfolioMode is enabled and the preprocessing option is not specified, 3-4 instances will have full preprocessing and 1 instance will have no preprocessing. But when the preprocessing option is enabled, all attempted instances will use the specified preprocessing option. Generally, we recommend users do not specify the preprocessing option unless specifically prompted to by Duper. - `inhabitationReasoning`: Can be set to `true` or `false`. If the goal or any provided lemmas include potentially empty types, Duper has to perform additional reasoning to guarantee the correctness of its proofs. If the user is confident that all types in the problem are provably nonempty, even outside the context of the current problem, then setting inhabitationReasoning to false can help Duper reason more efficiently (though during proof reconstruction, Duper will still verify that any assumptions made about type inhabitation are valid). Likewise, if the user is confident that there is at least one type in the problem that is either empty or can only be verified as nonempty using facts in the current context, then setting inhabitationReasoning to true will ensure that all Duper instances perform the necessary type inhabitation reasoning from the getgo. - `includeExpensiveRules`: Can be set to `true` or `false`. Some of the rules that Duper uses to reason about higher-order problems can significantly worsen Duper's performance. If the user believes the problem they are attempting to solve only requires first-order reasoning, then setting includeExpensiveRules to false can make Duper's search more efficient.