Only run perf once instead of per sample

Runing perf every sample would probably slow down all benchmarks significantly for no gain (as we only store the last recorded perf result).
JuliaCI · Dec 30, 2023 · 504eed3 · 504eed3
1 parent 2bcc4ee
commit 504eed3
Show file tree

Hide file tree

Showing 4 changed files with 71 additions and 96 deletions.
diff --git a/src/execution.jl b/src/execution.jl
@@ -9,6 +9,7 @@ gcscrub() = (GC.gc(); GC.gc(); GC.gc(); GC.gc())
 
 mutable struct Benchmark
     samplefunc
+    linux_perf_func
     quote_vals
     params::Parameters
 end
@@ -106,15 +107,21 @@ function _run(b::Benchmark, p::Parameters; verbose=false, pad="", kwargs...)
     start_time = Base.time()
     trial = Trial(params)
     params.gcsample && gcscrub()
-    trial_contents = b.samplefunc(b.quote_vals, params)
-    push!(trial, trial_contents)
-    return_val = trial_contents.return_val
+    s = b.samplefunc(b.quote_vals, params)
+    push!(trial, s[1:(end - 1)]...)
+    return_val = s[end]
     iters = 2
     while (Base.time() - start_time) < params.seconds && iters ≤ params.samples
         params.gcsample && gcscrub()
-        push!(trial, b.samplefunc(b.quote_vals, params))
+        push!(trial, b.samplefunc(b.quote_vals, params)[1:(end - 1)]...)
         iters += 1
     end
+
+    if p.experimental_enable_linux_perf
+        params.gcsample && gcscrub()
+        trial.linux_perf_stats = b.linux_perf_func(b.quote_vals, params)
+    end
+
     return trial, return_val
 end
 
@@ -178,7 +185,7 @@ function _lineartrial(b::Benchmark, p::Parameters=b.params; maxevals=RESOLUTION,
     for evals in eachindex(estimates)
         params.gcsample && gcscrub()
         params.evals = evals
-        estimates[evals] = b.samplefunc(b.quote_vals, params).time
+        estimates[evals] = first(b.samplefunc(b.quote_vals, params))
         completed += 1
         ((time() - start_time) > params.seconds) && break
     end
@@ -506,6 +513,7 @@ function generate_benchmark_definition(
     @nospecialize
     corefunc = gensym("core")
     samplefunc = gensym("sample")
+    linux_perf_func = gensym("perf")
     type_vars = [gensym() for i in 1:(length(quote_vars) + length(setup_vars))]
     signature = Expr(:call, corefunc, quote_vars..., setup_vars...)
     signature_def = Expr(
@@ -572,64 +580,57 @@ function generate_benchmark_definition(
                         __evals,
                     ),
                 )
-                if $(params.experimental_enable_linux_perf)
-                    # Based on https://github.com/JuliaPerf/LinuxPerf.jl/blob/a7fee0ff261a5b5ce7a903af7b38d1b5c27dd931/src/LinuxPerf.jl#L1043-L1061
-                    __linux_perf_groups = BenchmarkTools.LinuxPerf.set_default_spaces(
-                        $(params.linux_perf_options.events),
-                        $(params.linux_perf_options.spaces),
+                return __time, __gctime, __memory, __allocs, __return_val
+            end
+            @noinline function $(linux_perf_func)(
+                $(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters
+            )
+                # Based on https://github.com/JuliaPerf/LinuxPerf.jl/blob/a7fee0ff261a5b5ce7a903af7b38d1b5c27dd931/src/LinuxPerf.jl#L1043-L1061
+                __linux_perf_groups = $LinuxPerf.set_default_spaces(
+                    eval(__params.linux_perf_options.events),
+                    eval(__params.linux_perf_options.spaces),
+                )
+                __linux_perf_bench = nothing
+                try
+                    __linux_perf_bench = $LinuxPerf.make_bench_threaded(
+                        __linux_perf_groups;
+                        threads=eval(__params.linux_perf_options.threads),
                     )
-                    __linux_perf_bench = nothing
-                    try
-                        __linux_perf_bench = BenchmarkTools.LinuxPerf.make_bench_threaded(
-                            __linux_perf_groups;
-                            threads=$(params.linux_perf_options.threads),
-                        )
-                    catch e
-                        if e isa ErrorException &&
-                            startswith(e.msg, "perf_event_open error : ")
-                            @warn "Perf is disabled"
-                        else
-                            rethrow()
-                        end
+                catch e
+                    if e isa ErrorException &&
+                        startswith(e.msg, "perf_event_open error : ")
+                        @warn "Perf is disabled" # Really we only want to do this if we defaulted to running with perf, otherwise we should just throw.
+                    # Given we now more accurately determine if perf is available can we do away with this hack?
+                    else
+                        rethrow()
                     end
+                end
 
-                    if !isnothing(__linux_perf_bench)
-                        try
-                            $(setup)
-                            BenchmarkTools.LinuxPerf.enable!(__linux_perf_bench)
-                            # We'll just run it one time.
-                            __return_val_2 = $(invocation)
-                            BenchmarkTools.LinuxPerf.disable!(__linux_perf_bench)
-                            # trick the compiler not to eliminate the code
-                            if rand() < 0
-                                __linux_perf_stats = __return_val_2
-                            else
-                                __linux_perf_stats = BenchmarkTools.LinuxPerf.Stats(
-                                    __linux_perf_bench
-                                )
-                            end
-                        catch
-                            rethrow()
-                        finally
-                            close(__linux_perf_bench)
-                            $(teardown)
+                if !isnothing(__linux_perf_bench)
+                    $(setup)
+                    try
+                        $LinuxPerf.enable!(__linux_perf_bench)
+                        # We'll just run it one time.
+                        __return_val_2 = $(invocation)
+                        $LinuxPerf.disable!(__linux_perf_bench)
+                        # trick the compiler not to eliminate the code
+                        if rand() < 0
+                            __linux_perf_stats = __return_val_2
+                        else
+                            __linux_perf_stats = $LinuxPerf.Stats(__linux_perf_bench)
                         end
+                        return __linux_perf_stats
+                    catch
+                        rethrow()
+                    finally
+                        close(__linux_perf_bench)
+                        $(teardown)
                     end
-                else
-                    __return_val_2 = nothing
-                    __linux_perf_stats = nothing
                 end
-                return BenchmarkTools.TrialContents(
-                    __time,
-                    __gctime,
-                    __memory,
-                    __allocs,
-                    __return_val,
-                    __return_val_2,
-                    __linux_perf_stats,
-                )
             end
-            $BenchmarkTools.Benchmark($(samplefunc), $(quote_vals), $(params))
+            $BenchmarkTools.Benchmark(
+                $(samplefunc), $(linux_perf_func), $(quote_vals), $(params)
+            )
         end,
     )
 end

diff --git a/src/parameters.jl b/src/parameters.jl
@@ -30,7 +30,7 @@ function perf_available()
     try
         opts = DEFAULT_LINUX_PERF_OPTIONS
         groups = LinuxPerf.set_default_spaces(eval(opts.events), eval(opts.spaces))
-        bench = LinuxPerf.make_bench_threaded(groups, threads = eval(opts.threads))
+        bench = LinuxPerf.make_bench_threaded(groups; threads=eval(opts.threads))
         return true
     catch
         return false

diff --git a/src/trials.jl b/src/trials.jl
@@ -11,16 +11,6 @@ mutable struct Trial
     linux_perf_stats::Union{LinuxPerf.Stats,Nothing}
 end
 
-struct TrialContents{A,B}
-    time::Float64
-    gctime::Float64
-    memory::Int
-    allocs::Int
-    return_val::A
-    return_val_2::B
-    linux_perf_stats::Union{LinuxPerf.Stats,Nothing}
-end
-
 function Trial(params::Parameters)
     return Trial(params, Float64[], Float64[], typemax(Int), typemax(Int), nothing)
 end
@@ -44,16 +34,11 @@ function Base.copy(t::Trial)
     )
 end
 
-function Base.push!(t::Trial, trial_contents::TrialContents)
-    time = trial_contents.time
-    gctime = trial_contents.gctime
-    memory = trial_contents.memory
-    allocs = trial_contents.allocs
+function Base.push!(t::Trial, time, gctime, memory, allocs)
     push!(t.times, time)
     push!(t.gctimes, gctime)
     memory < t.memory && (t.memory = memory)
     allocs < t.allocs && (t.allocs = allocs)
-    t.linux_perf_stats = trial_contents.linux_perf_stats
     return t
 end
 
@@ -65,17 +50,8 @@ end
 
 Base.length(t::Trial) = length(t.times)
 function Base.getindex(t::Trial, i::Number)
-    return push!(
-        Trial(t.params),
-        TrialContents(
-            t.times[i],
-            t.gctimes[i],
-            t.memory,
-            t.allocs,
-            nothing,
-            nothing,
-            t.linux_perf_stats,
-        ),
+    return Trial(
+        t.params, [t.times[i]], [t.gctimes[i]], t.memory, t.allocs, t.linux_perf_stats
     )
 end
 function Base.getindex(t::Trial, i)

diff --git a/test/TrialsTests.jl b/test/TrialsTests.jl
@@ -1,21 +1,21 @@
 module TrialsTests
 
 using BenchmarkTools
-using BenchmarkTools: TrialContents
 using Test
 
 #########
 # Trial #
 #########
+
 trial1 = BenchmarkTools.Trial(BenchmarkTools.Parameters(; evals=2))
-push!(trial1, TrialContents(2.0, 1.0, 4, 5, nothing, nothing, nothing))
-push!(trial1, TrialContents(21.0, 0.0, 41, 51, nothing, nothing, nothing))
+push!(trial1, 2, 1, 4, 5)
+push!(trial1, 21, 0, 41, 51)
 
 trial2 = BenchmarkTools.Trial(BenchmarkTools.Parameters(; time_tolerance=0.15))
-push!(trial2, TrialContents(21.0, 0.0, 41, 51, nothing, nothing, nothing))
-push!(trial2, TrialContents(2.0, 1.0, 4, 5, nothing, nothing, nothing))
+push!(trial2, 21, 0, 41, 51)
+push!(trial2, 2, 1, 4, 5)
 
-push!(trial2, TrialContents(21.0, 0.0, 41, 51, nothing, nothing, nothing))
+push!(trial2, 21, 0, 41, 51)
 @test length(trial2) == 3
 deleteat!(trial2, 3)
 @test length(trial1) == length(trial2) == 2
@@ -33,10 +33,8 @@ trial2.params = trial1.params
 
 @test trial1 == trial2
 
-@test trial1[2] == push!(
-    BenchmarkTools.Trial(BenchmarkTools.Parameters(; evals=2)),
-    TrialContents(21.0, 0.0, 4, 5, nothing, nothing, nothing),
-)
+@test trial1[2] ==
+    push!(BenchmarkTools.Trial(BenchmarkTools.Parameters(; evals=2)), 21, 0, 4, 5)
 @test trial1[1:end] == trial1
 
 @test time(trial1) == time(trial2) == 2.0
@@ -63,11 +61,11 @@ rmskew!(trial3)
 randtrial = BenchmarkTools.Trial(BenchmarkTools.Parameters())
 
 for _ in 1:40
-    push!(randtrial, TrialContents(rand(1.0:20.0), 1.0, 1, 1, nothing, nothing, nothing))
+    push!(randtrial, rand(1:20), 1, 1, 1)
 end
 
 while mean(randtrial) <= median(randtrial)
-    push!(randtrial, TrialContents(rand(10.0:20.0), 1.0, 1, 1, nothing, nothing, nothing))
+    push!(randtrial, rand(10:20), 1, 1, 1)
 end
 
 rmskew!(randtrial)