diff --git a/src/execution.jl b/src/execution.jl index 7df2aa02..cbaaa750 100644 --- a/src/execution.jl +++ b/src/execution.jl @@ -9,6 +9,7 @@ gcscrub() = (GC.gc(); GC.gc(); GC.gc(); GC.gc()) mutable struct Benchmark samplefunc + linux_perf_func quote_vals params::Parameters end @@ -106,15 +107,21 @@ function _run(b::Benchmark, p::Parameters; verbose=false, pad="", kwargs...) start_time = Base.time() trial = Trial(params) params.gcsample && gcscrub() - trial_contents = b.samplefunc(b.quote_vals, params) - push!(trial, trial_contents) - return_val = trial_contents.return_val + s = b.samplefunc(b.quote_vals, params) + push!(trial, s[1:(end - 1)]...) + return_val = s[end] iters = 2 while (Base.time() - start_time) < params.seconds && iters ≤ params.samples params.gcsample && gcscrub() - push!(trial, b.samplefunc(b.quote_vals, params)) + push!(trial, b.samplefunc(b.quote_vals, params)[1:(end - 1)]...) iters += 1 end + + if p.experimental_enable_linux_perf + params.gcsample && gcscrub() + trial.linux_perf_stats = b.linux_perf_func(b.quote_vals, params) + end + return trial, return_val end @@ -178,7 +185,7 @@ function _lineartrial(b::Benchmark, p::Parameters=b.params; maxevals=RESOLUTION, for evals in eachindex(estimates) params.gcsample && gcscrub() params.evals = evals - estimates[evals] = b.samplefunc(b.quote_vals, params).time + estimates[evals] = first(b.samplefunc(b.quote_vals, params)) completed += 1 ((time() - start_time) > params.seconds) && break end @@ -506,6 +513,7 @@ function generate_benchmark_definition( @nospecialize corefunc = gensym("core") samplefunc = gensym("sample") + linux_perf_func = gensym("perf") type_vars = [gensym() for i in 1:(length(quote_vars) + length(setup_vars))] signature = Expr(:call, corefunc, quote_vars..., setup_vars...) signature_def = Expr( @@ -572,64 +580,57 @@ function generate_benchmark_definition( __evals, ), ) - if $(params.experimental_enable_linux_perf) - # Based on https://github.com/JuliaPerf/LinuxPerf.jl/blob/a7fee0ff261a5b5ce7a903af7b38d1b5c27dd931/src/LinuxPerf.jl#L1043-L1061 - __linux_perf_groups = BenchmarkTools.LinuxPerf.set_default_spaces( - $(params.linux_perf_options.events), - $(params.linux_perf_options.spaces), + return __time, __gctime, __memory, __allocs, __return_val + end + @noinline function $(linux_perf_func)( + $(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters + ) + # Based on https://github.com/JuliaPerf/LinuxPerf.jl/blob/a7fee0ff261a5b5ce7a903af7b38d1b5c27dd931/src/LinuxPerf.jl#L1043-L1061 + __linux_perf_groups = $LinuxPerf.set_default_spaces( + eval(__params.linux_perf_options.events), + eval(__params.linux_perf_options.spaces), + ) + __linux_perf_bench = nothing + try + __linux_perf_bench = $LinuxPerf.make_bench_threaded( + __linux_perf_groups; + threads=eval(__params.linux_perf_options.threads), ) - __linux_perf_bench = nothing - try - __linux_perf_bench = BenchmarkTools.LinuxPerf.make_bench_threaded( - __linux_perf_groups; - threads=$(params.linux_perf_options.threads), - ) - catch e - if e isa ErrorException && - startswith(e.msg, "perf_event_open error : ") - @warn "Perf is disabled" - else - rethrow() - end + catch e + if e isa ErrorException && + startswith(e.msg, "perf_event_open error : ") + @warn "Perf is disabled" # Really we only want to do this if we defaulted to running with perf, otherwise we should just throw. + # Given we now more accurately determine if perf is available can we do away with this hack? + else + rethrow() end + end - if !isnothing(__linux_perf_bench) - try - $(setup) - BenchmarkTools.LinuxPerf.enable!(__linux_perf_bench) - # We'll just run it one time. - __return_val_2 = $(invocation) - BenchmarkTools.LinuxPerf.disable!(__linux_perf_bench) - # trick the compiler not to eliminate the code - if rand() < 0 - __linux_perf_stats = __return_val_2 - else - __linux_perf_stats = BenchmarkTools.LinuxPerf.Stats( - __linux_perf_bench - ) - end - catch - rethrow() - finally - close(__linux_perf_bench) - $(teardown) + if !isnothing(__linux_perf_bench) + $(setup) + try + $LinuxPerf.enable!(__linux_perf_bench) + # We'll just run it one time. + __return_val_2 = $(invocation) + $LinuxPerf.disable!(__linux_perf_bench) + # trick the compiler not to eliminate the code + if rand() < 0 + __linux_perf_stats = __return_val_2 + else + __linux_perf_stats = $LinuxPerf.Stats(__linux_perf_bench) end + return __linux_perf_stats + catch + rethrow() + finally + close(__linux_perf_bench) + $(teardown) end - else - __return_val_2 = nothing - __linux_perf_stats = nothing end - return BenchmarkTools.TrialContents( - __time, - __gctime, - __memory, - __allocs, - __return_val, - __return_val_2, - __linux_perf_stats, - ) end - $BenchmarkTools.Benchmark($(samplefunc), $(quote_vals), $(params)) + $BenchmarkTools.Benchmark( + $(samplefunc), $(linux_perf_func), $(quote_vals), $(params) + ) end, ) end diff --git a/src/parameters.jl b/src/parameters.jl index 0c46c6bd..3b65a5de 100644 --- a/src/parameters.jl +++ b/src/parameters.jl @@ -30,7 +30,7 @@ function perf_available() try opts = DEFAULT_LINUX_PERF_OPTIONS groups = LinuxPerf.set_default_spaces(eval(opts.events), eval(opts.spaces)) - bench = LinuxPerf.make_bench_threaded(groups, threads = eval(opts.threads)) + bench = LinuxPerf.make_bench_threaded(groups; threads=eval(opts.threads)) return true catch return false diff --git a/src/trials.jl b/src/trials.jl index 9280bb9b..18a931de 100644 --- a/src/trials.jl +++ b/src/trials.jl @@ -11,16 +11,6 @@ mutable struct Trial linux_perf_stats::Union{LinuxPerf.Stats,Nothing} end -struct TrialContents{A,B} - time::Float64 - gctime::Float64 - memory::Int - allocs::Int - return_val::A - return_val_2::B - linux_perf_stats::Union{LinuxPerf.Stats,Nothing} -end - function Trial(params::Parameters) return Trial(params, Float64[], Float64[], typemax(Int), typemax(Int), nothing) end @@ -44,16 +34,11 @@ function Base.copy(t::Trial) ) end -function Base.push!(t::Trial, trial_contents::TrialContents) - time = trial_contents.time - gctime = trial_contents.gctime - memory = trial_contents.memory - allocs = trial_contents.allocs +function Base.push!(t::Trial, time, gctime, memory, allocs) push!(t.times, time) push!(t.gctimes, gctime) memory < t.memory && (t.memory = memory) allocs < t.allocs && (t.allocs = allocs) - t.linux_perf_stats = trial_contents.linux_perf_stats return t end @@ -65,17 +50,8 @@ end Base.length(t::Trial) = length(t.times) function Base.getindex(t::Trial, i::Number) - return push!( - Trial(t.params), - TrialContents( - t.times[i], - t.gctimes[i], - t.memory, - t.allocs, - nothing, - nothing, - t.linux_perf_stats, - ), + return Trial( + t.params, [t.times[i]], [t.gctimes[i]], t.memory, t.allocs, t.linux_perf_stats ) end function Base.getindex(t::Trial, i) diff --git a/test/TrialsTests.jl b/test/TrialsTests.jl index c666bf8e..3cd960d9 100644 --- a/test/TrialsTests.jl +++ b/test/TrialsTests.jl @@ -1,21 +1,21 @@ module TrialsTests using BenchmarkTools -using BenchmarkTools: TrialContents using Test ######### # Trial # ######### + trial1 = BenchmarkTools.Trial(BenchmarkTools.Parameters(; evals=2)) -push!(trial1, TrialContents(2.0, 1.0, 4, 5, nothing, nothing, nothing)) -push!(trial1, TrialContents(21.0, 0.0, 41, 51, nothing, nothing, nothing)) +push!(trial1, 2, 1, 4, 5) +push!(trial1, 21, 0, 41, 51) trial2 = BenchmarkTools.Trial(BenchmarkTools.Parameters(; time_tolerance=0.15)) -push!(trial2, TrialContents(21.0, 0.0, 41, 51, nothing, nothing, nothing)) -push!(trial2, TrialContents(2.0, 1.0, 4, 5, nothing, nothing, nothing)) +push!(trial2, 21, 0, 41, 51) +push!(trial2, 2, 1, 4, 5) -push!(trial2, TrialContents(21.0, 0.0, 41, 51, nothing, nothing, nothing)) +push!(trial2, 21, 0, 41, 51) @test length(trial2) == 3 deleteat!(trial2, 3) @test length(trial1) == length(trial2) == 2 @@ -33,10 +33,8 @@ trial2.params = trial1.params @test trial1 == trial2 -@test trial1[2] == push!( - BenchmarkTools.Trial(BenchmarkTools.Parameters(; evals=2)), - TrialContents(21.0, 0.0, 4, 5, nothing, nothing, nothing), -) +@test trial1[2] == + push!(BenchmarkTools.Trial(BenchmarkTools.Parameters(; evals=2)), 21, 0, 4, 5) @test trial1[1:end] == trial1 @test time(trial1) == time(trial2) == 2.0 @@ -63,11 +61,11 @@ rmskew!(trial3) randtrial = BenchmarkTools.Trial(BenchmarkTools.Parameters()) for _ in 1:40 - push!(randtrial, TrialContents(rand(1.0:20.0), 1.0, 1, 1, nothing, nothing, nothing)) + push!(randtrial, rand(1:20), 1, 1, 1) end while mean(randtrial) <= median(randtrial) - push!(randtrial, TrialContents(rand(10.0:20.0), 1.0, 1, 1, nothing, nothing, nothing)) + push!(randtrial, rand(10:20), 1, 1, 1) end rmskew!(randtrial)