diff --git a/kifmm/Cargo.toml b/kifmm/Cargo.toml index 8b6664e4..2f98c914 100644 --- a/kifmm/Cargo.toml +++ b/kifmm/Cargo.toml @@ -67,6 +67,14 @@ harness = false name = "laplace_direct_f32" harness = false +[[bench]] +name = "helmholtz_direct_f64" +harness = false + +[[bench]] +name = "helmholtz_direct_f32" +harness = false + [package.metadata.docs.rs] cargo-args = ["-Zunstable-options", "-Zrustdoc-scrape-examples"] rustdoc-args = [ "--html-in-header", "./src/docs-header.html" ] diff --git a/kifmm/benches/helmholtz_direct_f32.rs b/kifmm/benches/helmholtz_direct_f32.rs new file mode 100644 index 00000000..047ac11a --- /dev/null +++ b/kifmm/benches/helmholtz_direct_f32.rs @@ -0,0 +1,179 @@ +use std::time::Duration; + +use criterion::{criterion_group, criterion_main, Criterion}; +use green_kernels::traits::Kernel; +use green_kernels::{helmholtz_3d::Helmholtz3dKernel, types::GreenKernelEvalType}; +use kifmm::tree::helpers::points_fixture; +use num::{Complex, One}; +use rlst::{c32, c64, rlst_dynamic_array2, RawAccess, RawAccessMut}; + + +fn multithreaded_direct_f64(c: &mut Criterion) { + let mut group = c.benchmark_group("Multi Threaded Direct f64"); + + group + .sample_size(10) + .measurement_time(Duration::from_secs(15)); + + { + let nsources = 20000; + let ntargets = 20000; + let sources = points_fixture::(nsources, None, None, Some(0)); + let targets = points_fixture::(ntargets, None, None, Some(1)); + + // FFT based M2L for a vector of charges + let nvecs = 1; + let tmp = vec![Complex::one(); nsources * nvecs]; + let mut charges = rlst_dynamic_array2!(c32, [nsources, nvecs]); + charges.data_mut().copy_from_slice(&tmp); + let wavenumber = 1f32; + + let kernel = Helmholtz3dKernel::new(wavenumber); + + let mut result = rlst_dynamic_array2!(c32, [nsources, nvecs]); + + group.bench_function(format!("N={nsources}"), |b| { + b.iter(|| { + kernel.evaluate_mt( + GreenKernelEvalType::Value, + sources.data(), + targets.data(), + charges.data(), + result.data_mut(), + ) + }) + }); + } + + { + let nsources = 100000; + let ntargets = 100000; + let sources = points_fixture::(nsources, None, None, Some(0)); + let targets = points_fixture::(ntargets, None, None, Some(1)); + + // FFT based M2L for a vector of charges + let nvecs = 1; + let tmp = vec![Complex::one(); nsources * nvecs]; + let mut charges = rlst_dynamic_array2!(c32, [nsources, nvecs]); + charges.data_mut().copy_from_slice(&tmp); + let wavenumber =1f32; + + let kernel = Helmholtz3dKernel::new(wavenumber); + + let mut result = rlst_dynamic_array2!(c32, [nsources, nvecs]); + + group.bench_function(format!("N={nsources}"), |b| { + b.iter(|| { + kernel.evaluate_mt( + GreenKernelEvalType::Value, + sources.data(), + targets.data(), + charges.data(), + result.data_mut(), + ) + }) + }); + } + + { + let nsources = 500000; + let ntargets = 500000; + let sources = points_fixture::(nsources, None, None, Some(0)); + let targets = points_fixture::(ntargets, None, None, Some(1)); + + // FFT based M2L for a vector of charges + let nvecs = 1; + let tmp = vec![Complex::one(); nsources * nvecs]; + let mut charges = rlst_dynamic_array2!(c32, [nsources, nvecs]); + charges.data_mut().copy_from_slice(&tmp); + let wavenumber = 1f32; + + let kernel = Helmholtz3dKernel::new(wavenumber); + + let mut result = rlst_dynamic_array2!(c32, [nsources, nvecs]); + + group.bench_function(format!("N={nsources}"), |b| { + b.iter(|| { + kernel.evaluate_mt( + GreenKernelEvalType::Value, + sources.data(), + targets.data(), + charges.data(), + result.data_mut(), + ) + }) + }); + } +} + +fn singlethreaded_direct_f64(c: &mut Criterion) { + let mut group = c.benchmark_group("Single Threaded Direct f64"); + + group + .sample_size(10) + .measurement_time(Duration::from_secs(15)); + + { + let nsources = 5000; + let ntargets = 5000; + let sources = points_fixture::(nsources, None, None, Some(0)); + let targets = points_fixture::(ntargets, None, None, Some(1)); + + // FFT based M2L for a vector of charges + let nvecs = 1; + let tmp = vec![Complex::one(); nsources * nvecs]; + let mut charges = rlst_dynamic_array2!(c32, [nsources, nvecs]); + charges.data_mut().copy_from_slice(&tmp); + let wavenumber = 1f32; + + let kernel = Helmholtz3dKernel::new(wavenumber); + + let mut result = rlst_dynamic_array2!(c32, [nsources, nvecs]); + + group.bench_function(format!("N={nsources}"), |b| { + b.iter(|| { + kernel.evaluate_st( + GreenKernelEvalType::Value, + sources.data(), + targets.data(), + charges.data(), + result.data_mut(), + ) + }) + }); + } + + { + let nsources = 20000; + let ntargets = 20000; + let sources = points_fixture::(nsources, None, None, Some(0)); + let targets = points_fixture::(ntargets, None, None, Some(1)); + + // FFT based M2L for a vector of charges + let nvecs = 1; + let tmp = vec![Complex::one(); nsources * nvecs]; + let mut charges = rlst_dynamic_array2!(c32, [nsources, nvecs]); + charges.data_mut().copy_from_slice(&tmp); + let wavenumber = 1f32; + + let kernel = Helmholtz3dKernel::new(wavenumber); + + let mut result = rlst_dynamic_array2!(c32, [nsources, nvecs]); + + group.bench_function(format!("N={nsources}"), |b| { + b.iter(|| { + kernel.evaluate_st( + GreenKernelEvalType::Value, + sources.data(), + targets.data(), + charges.data(), + result.data_mut(), + ) + }) + }); + } +} + +criterion_group!(d_f64, multithreaded_direct_f64, singlethreaded_direct_f64); + +criterion_main!(d_f64); \ No newline at end of file diff --git a/kifmm/benches/helmholtz_direct_f64.rs b/kifmm/benches/helmholtz_direct_f64.rs new file mode 100644 index 00000000..c1f764c5 --- /dev/null +++ b/kifmm/benches/helmholtz_direct_f64.rs @@ -0,0 +1,179 @@ +use std::time::Duration; + +use criterion::{criterion_group, criterion_main, Criterion}; +use green_kernels::traits::Kernel; +use green_kernels::{helmholtz_3d::Helmholtz3dKernel, types::GreenKernelEvalType}; +use kifmm::tree::helpers::points_fixture; +use num::{Complex, One}; +use rlst::{c64, rlst_dynamic_array2, RawAccess, RawAccessMut}; + + +fn multithreaded_direct_f64(c: &mut Criterion) { + let mut group = c.benchmark_group("Multi Threaded Direct f64"); + + group + .sample_size(10) + .measurement_time(Duration::from_secs(15)); + + { + let nsources = 20000; + let ntargets = 20000; + let sources = points_fixture::(nsources, None, None, Some(0)); + let targets = points_fixture::(ntargets, None, None, Some(1)); + + // FFT based M2L for a vector of charges + let nvecs = 1; + let tmp = vec![Complex::one(); nsources * nvecs]; + let mut charges = rlst_dynamic_array2!(c64, [nsources, nvecs]); + charges.data_mut().copy_from_slice(&tmp); + let wavenumber = 1f64; + + let kernel = Helmholtz3dKernel::new(wavenumber); + + let mut result = rlst_dynamic_array2!(c64, [nsources, nvecs]); + + group.bench_function(format!("N={nsources}"), |b| { + b.iter(|| { + kernel.evaluate_mt( + GreenKernelEvalType::Value, + sources.data(), + targets.data(), + charges.data(), + result.data_mut(), + ) + }) + }); + } + + { + let nsources = 100000; + let ntargets = 100000; + let sources = points_fixture::(nsources, None, None, Some(0)); + let targets = points_fixture::(ntargets, None, None, Some(1)); + + // FFT based M2L for a vector of charges + let nvecs = 1; + let tmp = vec![Complex::one(); nsources * nvecs]; + let mut charges = rlst_dynamic_array2!(c64, [nsources, nvecs]); + charges.data_mut().copy_from_slice(&tmp); + let wavenumber =1f64; + + let kernel = Helmholtz3dKernel::new(wavenumber); + + let mut result = rlst_dynamic_array2!(c64, [nsources, nvecs]); + + group.bench_function(format!("N={nsources}"), |b| { + b.iter(|| { + kernel.evaluate_mt( + GreenKernelEvalType::Value, + sources.data(), + targets.data(), + charges.data(), + result.data_mut(), + ) + }) + }); + } + + { + let nsources = 500000; + let ntargets = 500000; + let sources = points_fixture::(nsources, None, None, Some(0)); + let targets = points_fixture::(ntargets, None, None, Some(1)); + + // FFT based M2L for a vector of charges + let nvecs = 1; + let tmp = vec![Complex::one(); nsources * nvecs]; + let mut charges = rlst_dynamic_array2!(c64, [nsources, nvecs]); + charges.data_mut().copy_from_slice(&tmp); + let wavenumber = 1f64; + + let kernel = Helmholtz3dKernel::new(wavenumber); + + let mut result = rlst_dynamic_array2!(c64, [nsources, nvecs]); + + group.bench_function(format!("N={nsources}"), |b| { + b.iter(|| { + kernel.evaluate_mt( + GreenKernelEvalType::Value, + sources.data(), + targets.data(), + charges.data(), + result.data_mut(), + ) + }) + }); + } +} + +fn singlethreaded_direct_f64(c: &mut Criterion) { + let mut group = c.benchmark_group("Single Threaded Direct f64"); + + group + .sample_size(10) + .measurement_time(Duration::from_secs(15)); + + { + let nsources = 5000; + let ntargets = 5000; + let sources = points_fixture::(nsources, None, None, Some(0)); + let targets = points_fixture::(ntargets, None, None, Some(1)); + + // FFT based M2L for a vector of charges + let nvecs = 1; + let tmp = vec![Complex::one(); nsources * nvecs]; + let mut charges = rlst_dynamic_array2!(c64, [nsources, nvecs]); + charges.data_mut().copy_from_slice(&tmp); + let wavenumber = 1f64; + + let kernel = Helmholtz3dKernel::new(wavenumber); + + let mut result = rlst_dynamic_array2!(c64, [nsources, nvecs]); + + group.bench_function(format!("N={nsources}"), |b| { + b.iter(|| { + kernel.evaluate_st( + GreenKernelEvalType::Value, + sources.data(), + targets.data(), + charges.data(), + result.data_mut(), + ) + }) + }); + } + + { + let nsources = 20000; + let ntargets = 20000; + let sources = points_fixture::(nsources, None, None, Some(0)); + let targets = points_fixture::(ntargets, None, None, Some(1)); + + // FFT based M2L for a vector of charges + let nvecs = 1; + let tmp = vec![Complex::one(); nsources * nvecs]; + let mut charges = rlst_dynamic_array2!(c64, [nsources, nvecs]); + charges.data_mut().copy_from_slice(&tmp); + let wavenumber = 1f64; + + let kernel = Helmholtz3dKernel::new(wavenumber); + + let mut result = rlst_dynamic_array2!(c64, [nsources, nvecs]); + + group.bench_function(format!("N={nsources}"), |b| { + b.iter(|| { + kernel.evaluate_st( + GreenKernelEvalType::Value, + sources.data(), + targets.data(), + charges.data(), + result.data_mut(), + ) + }) + }); + } +} + +criterion_group!(d_f64, multithreaded_direct_f64, singlethreaded_direct_f64); + +criterion_main!(d_f64); \ No newline at end of file diff --git a/kifmm/benches/laplace_direct_f64.rs b/kifmm/benches/laplace_direct_f64.rs index be2f9400..98b03822 100644 --- a/kifmm/benches/laplace_direct_f64.rs +++ b/kifmm/benches/laplace_direct_f64.rs @@ -6,8 +6,6 @@ use green_kernels::{laplace_3d::Laplace3dKernel, types::GreenKernelEvalType}; use kifmm::tree::helpers::points_fixture; use rlst::{rlst_dynamic_array2, RawAccess, RawAccessMut}; -extern crate blas_src; -extern crate lapack_src; fn multithreaded_direct_f64(c: &mut Criterion) { let mut group = c.benchmark_group("Multi Threaded Direct f64");