From 504eed38b15c4bbe3e50aeac5fcdff57e12da3ce Mon Sep 17 00:00:00 2001 From: Zentrik Date: Sat, 30 Dec 2023 22:55:58 +0000 Subject: [PATCH] Only run perf once instead of per sample Runing perf every sample would probably slow down all benchmarks significantly for no gain (as we only store the last recorded perf result). --- src/execution.jl | 113 ++++++++++++++++++++++---------------------- src/parameters.jl | 2 +- src/trials.jl | 30 ++---------- test/TrialsTests.jl | 22 ++++----- 4 files changed, 71 insertions(+), 96 deletions(-) diff --git a/src/execution.jl b/src/execution.jl index 7df2aa02..cbaaa750 100644 --- a/src/execution.jl +++ b/src/execution.jl @@ -9,6 +9,7 @@ gcscrub() = (GC.gc(); GC.gc(); GC.gc(); GC.gc()) mutable struct Benchmark samplefunc + linux_perf_func quote_vals params::Parameters end @@ -106,15 +107,21 @@ function _run(b::Benchmark, p::Parameters; verbose=false, pad="", kwargs...) start_time = Base.time() trial = Trial(params) params.gcsample && gcscrub() - trial_contents = b.samplefunc(b.quote_vals, params) - push!(trial, trial_contents) - return_val = trial_contents.return_val + s = b.samplefunc(b.quote_vals, params) + push!(trial, s[1:(end - 1)]...) + return_val = s[end] iters = 2 while (Base.time() - start_time) < params.seconds && iters ≤ params.samples params.gcsample && gcscrub() - push!(trial, b.samplefunc(b.quote_vals, params)) + push!(trial, b.samplefunc(b.quote_vals, params)[1:(end - 1)]...) iters += 1 end + + if p.experimental_enable_linux_perf + params.gcsample && gcscrub() + trial.linux_perf_stats = b.linux_perf_func(b.quote_vals, params) + end + return trial, return_val end @@ -178,7 +185,7 @@ function _lineartrial(b::Benchmark, p::Parameters=b.params; maxevals=RESOLUTION, for evals in eachindex(estimates) params.gcsample && gcscrub() params.evals = evals - estimates[evals] = b.samplefunc(b.quote_vals, params).time + estimates[evals] = first(b.samplefunc(b.quote_vals, params)) completed += 1 ((time() - start_time) > params.seconds) && break end @@ -506,6 +513,7 @@ function generate_benchmark_definition( @nospecialize corefunc = gensym("core") samplefunc = gensym("sample") + linux_perf_func = gensym("perf") type_vars = [gensym() for i in 1:(length(quote_vars) + length(setup_vars))] signature = Expr(:call, corefunc, quote_vars..., setup_vars...) signature_def = Expr( @@ -572,64 +580,57 @@ function generate_benchmark_definition( __evals, ), ) - if $(params.experimental_enable_linux_perf) - # Based on https://github.com/JuliaPerf/LinuxPerf.jl/blob/a7fee0ff261a5b5ce7a903af7b38d1b5c27dd931/src/LinuxPerf.jl#L1043-L1061 - __linux_perf_groups = BenchmarkTools.LinuxPerf.set_default_spaces( - $(params.linux_perf_options.events), - $(params.linux_perf_options.spaces), + return __time, __gctime, __memory, __allocs, __return_val + end + @noinline function $(linux_perf_func)( + $(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters + ) + # Based on https://github.com/JuliaPerf/LinuxPerf.jl/blob/a7fee0ff261a5b5ce7a903af7b38d1b5c27dd931/src/LinuxPerf.jl#L1043-L1061 + __linux_perf_groups = $LinuxPerf.set_default_spaces( + eval(__params.linux_perf_options.events), + eval(__params.linux_perf_options.spaces), + ) + __linux_perf_bench = nothing + try + __linux_perf_bench = $LinuxPerf.make_bench_threaded( + __linux_perf_groups; + threads=eval(__params.linux_perf_options.threads), ) - __linux_perf_bench = nothing - try - __linux_perf_bench = BenchmarkTools.LinuxPerf.make_bench_threaded( - __linux_perf_groups; - threads=$(params.linux_perf_options.threads), - ) - catch e - if e isa ErrorException && - startswith(e.msg, "perf_event_open error : ") - @warn "Perf is disabled" - else - rethrow() - end + catch e + if e isa ErrorException && + startswith(e.msg, "perf_event_open error : ") + @warn "Perf is disabled" # Really we only want to do this if we defaulted to running with perf, otherwise we should just throw. + # Given we now more accurately determine if perf is available can we do away with this hack? + else + rethrow() end + end - if !isnothing(__linux_perf_bench) - try - $(setup) - BenchmarkTools.LinuxPerf.enable!(__linux_perf_bench) - # We'll just run it one time. - __return_val_2 = $(invocation) - BenchmarkTools.LinuxPerf.disable!(__linux_perf_bench) - # trick the compiler not to eliminate the code - if rand() < 0 - __linux_perf_stats = __return_val_2 - else - __linux_perf_stats = BenchmarkTools.LinuxPerf.Stats( - __linux_perf_bench - ) - end - catch - rethrow() - finally - close(__linux_perf_bench) - $(teardown) + if !isnothing(__linux_perf_bench) + $(setup) + try + $LinuxPerf.enable!(__linux_perf_bench) + # We'll just run it one time. + __return_val_2 = $(invocation) + $LinuxPerf.disable!(__linux_perf_bench) + # trick the compiler not to eliminate the code + if rand() < 0 + __linux_perf_stats = __return_val_2 + else + __linux_perf_stats = $LinuxPerf.Stats(__linux_perf_bench) end + return __linux_perf_stats + catch + rethrow() + finally + close(__linux_perf_bench) + $(teardown) end - else - __return_val_2 = nothing - __linux_perf_stats = nothing end - return BenchmarkTools.TrialContents( - __time, - __gctime, - __memory, - __allocs, - __return_val, - __return_val_2, - __linux_perf_stats, - ) end - $BenchmarkTools.Benchmark($(samplefunc), $(quote_vals), $(params)) + $BenchmarkTools.Benchmark( + $(samplefunc), $(linux_perf_func), $(quote_vals), $(params) + ) end, ) end diff --git a/src/parameters.jl b/src/parameters.jl index 0c46c6bd..3b65a5de 100644 --- a/src/parameters.jl +++ b/src/parameters.jl @@ -30,7 +30,7 @@ function perf_available() try opts = DEFAULT_LINUX_PERF_OPTIONS groups = LinuxPerf.set_default_spaces(eval(opts.events), eval(opts.spaces)) - bench = LinuxPerf.make_bench_threaded(groups, threads = eval(opts.threads)) + bench = LinuxPerf.make_bench_threaded(groups; threads=eval(opts.threads)) return true catch return false diff --git a/src/trials.jl b/src/trials.jl index 9280bb9b..18a931de 100644 --- a/src/trials.jl +++ b/src/trials.jl @@ -11,16 +11,6 @@ mutable struct Trial linux_perf_stats::Union{LinuxPerf.Stats,Nothing} end -struct TrialContents{A,B} - time::Float64 - gctime::Float64 - memory::Int - allocs::Int - return_val::A - return_val_2::B - linux_perf_stats::Union{LinuxPerf.Stats,Nothing} -end - function Trial(params::Parameters) return Trial(params, Float64[], Float64[], typemax(Int), typemax(Int), nothing) end @@ -44,16 +34,11 @@ function Base.copy(t::Trial) ) end -function Base.push!(t::Trial, trial_contents::TrialContents) - time = trial_contents.time - gctime = trial_contents.gctime - memory = trial_contents.memory - allocs = trial_contents.allocs +function Base.push!(t::Trial, time, gctime, memory, allocs) push!(t.times, time) push!(t.gctimes, gctime) memory < t.memory && (t.memory = memory) allocs < t.allocs && (t.allocs = allocs) - t.linux_perf_stats = trial_contents.linux_perf_stats return t end @@ -65,17 +50,8 @@ end Base.length(t::Trial) = length(t.times) function Base.getindex(t::Trial, i::Number) - return push!( - Trial(t.params), - TrialContents( - t.times[i], - t.gctimes[i], - t.memory, - t.allocs, - nothing, - nothing, - t.linux_perf_stats, - ), + return Trial( + t.params, [t.times[i]], [t.gctimes[i]], t.memory, t.allocs, t.linux_perf_stats ) end function Base.getindex(t::Trial, i) diff --git a/test/TrialsTests.jl b/test/TrialsTests.jl index c666bf8e..3cd960d9 100644 --- a/test/TrialsTests.jl +++ b/test/TrialsTests.jl @@ -1,21 +1,21 @@ module TrialsTests using BenchmarkTools -using BenchmarkTools: TrialContents using Test ######### # Trial # ######### + trial1 = BenchmarkTools.Trial(BenchmarkTools.Parameters(; evals=2)) -push!(trial1, TrialContents(2.0, 1.0, 4, 5, nothing, nothing, nothing)) -push!(trial1, TrialContents(21.0, 0.0, 41, 51, nothing, nothing, nothing)) +push!(trial1, 2, 1, 4, 5) +push!(trial1, 21, 0, 41, 51) trial2 = BenchmarkTools.Trial(BenchmarkTools.Parameters(; time_tolerance=0.15)) -push!(trial2, TrialContents(21.0, 0.0, 41, 51, nothing, nothing, nothing)) -push!(trial2, TrialContents(2.0, 1.0, 4, 5, nothing, nothing, nothing)) +push!(trial2, 21, 0, 41, 51) +push!(trial2, 2, 1, 4, 5) -push!(trial2, TrialContents(21.0, 0.0, 41, 51, nothing, nothing, nothing)) +push!(trial2, 21, 0, 41, 51) @test length(trial2) == 3 deleteat!(trial2, 3) @test length(trial1) == length(trial2) == 2 @@ -33,10 +33,8 @@ trial2.params = trial1.params @test trial1 == trial2 -@test trial1[2] == push!( - BenchmarkTools.Trial(BenchmarkTools.Parameters(; evals=2)), - TrialContents(21.0, 0.0, 4, 5, nothing, nothing, nothing), -) +@test trial1[2] == + push!(BenchmarkTools.Trial(BenchmarkTools.Parameters(; evals=2)), 21, 0, 4, 5) @test trial1[1:end] == trial1 @test time(trial1) == time(trial2) == 2.0 @@ -63,11 +61,11 @@ rmskew!(trial3) randtrial = BenchmarkTools.Trial(BenchmarkTools.Parameters()) for _ in 1:40 - push!(randtrial, TrialContents(rand(1.0:20.0), 1.0, 1, 1, nothing, nothing, nothing)) + push!(randtrial, rand(1:20), 1, 1, 1) end while mean(randtrial) <= median(randtrial) - push!(randtrial, TrialContents(rand(10.0:20.0), 1.0, 1, 1, nothing, nothing, nothing)) + push!(randtrial, rand(10:20), 1, 1, 1) end rmskew!(randtrial)