diff --git a/kifmm/Cargo.toml b/kifmm/Cargo.toml index 7fc6a33a..2f98c914 100644 --- a/kifmm/Cargo.toml +++ b/kifmm/Cargo.toml @@ -59,6 +59,22 @@ harness = false name = "laplace_f64" harness = false +[[bench]] +name = "laplace_direct_f64" +harness = false + +[[bench]] +name = "laplace_direct_f32" +harness = false + +[[bench]] +name = "helmholtz_direct_f64" +harness = false + +[[bench]] +name = "helmholtz_direct_f32" +harness = false + [package.metadata.docs.rs] cargo-args = ["-Zunstable-options", "-Zrustdoc-scrape-examples"] rustdoc-args = [ "--html-in-header", "./src/docs-header.html" ] diff --git a/kifmm/benches/helmholtz_direct_f32.rs b/kifmm/benches/helmholtz_direct_f32.rs new file mode 100644 index 00000000..047ac11a --- /dev/null +++ b/kifmm/benches/helmholtz_direct_f32.rs @@ -0,0 +1,179 @@ +use std::time::Duration; + +use criterion::{criterion_group, criterion_main, Criterion}; +use green_kernels::traits::Kernel; +use green_kernels::{helmholtz_3d::Helmholtz3dKernel, types::GreenKernelEvalType}; +use kifmm::tree::helpers::points_fixture; +use num::{Complex, One}; +use rlst::{c32, c64, rlst_dynamic_array2, RawAccess, RawAccessMut}; + + +fn multithreaded_direct_f64(c: &mut Criterion) { + let mut group = c.benchmark_group("Multi Threaded Direct f64"); + + group + .sample_size(10) + .measurement_time(Duration::from_secs(15)); + + { + let nsources = 20000; + let ntargets = 20000; + let sources = points_fixture::(nsources, None, None, Some(0)); + let targets = points_fixture::(ntargets, None, None, Some(1)); + + // FFT based M2L for a vector of charges + let nvecs = 1; + let tmp = vec![Complex::one(); nsources * nvecs]; + let mut charges = rlst_dynamic_array2!(c32, [nsources, nvecs]); + charges.data_mut().copy_from_slice(&tmp); + let wavenumber = 1f32; + + let kernel = Helmholtz3dKernel::new(wavenumber); + + let mut result = rlst_dynamic_array2!(c32, [nsources, nvecs]); + + group.bench_function(format!("N={nsources}"), |b| { + b.iter(|| { + kernel.evaluate_mt( + GreenKernelEvalType::Value, + sources.data(), + targets.data(), + charges.data(), + result.data_mut(), + ) + }) + }); + } + + { + let nsources = 100000; + let ntargets = 100000; + let sources = points_fixture::(nsources, None, None, Some(0)); + let targets = points_fixture::(ntargets, None, None, Some(1)); + + // FFT based M2L for a vector of charges + let nvecs = 1; + let tmp = vec![Complex::one(); nsources * nvecs]; + let mut charges = rlst_dynamic_array2!(c32, [nsources, nvecs]); + charges.data_mut().copy_from_slice(&tmp); + let wavenumber =1f32; + + let kernel = Helmholtz3dKernel::new(wavenumber); + + let mut result = rlst_dynamic_array2!(c32, [nsources, nvecs]); + + group.bench_function(format!("N={nsources}"), |b| { + b.iter(|| { + kernel.evaluate_mt( + GreenKernelEvalType::Value, + sources.data(), + targets.data(), + charges.data(), + result.data_mut(), + ) + }) + }); + } + + { + let nsources = 500000; + let ntargets = 500000; + let sources = points_fixture::(nsources, None, None, Some(0)); + let targets = points_fixture::(ntargets, None, None, Some(1)); + + // FFT based M2L for a vector of charges + let nvecs = 1; + let tmp = vec![Complex::one(); nsources * nvecs]; + let mut charges = rlst_dynamic_array2!(c32, [nsources, nvecs]); + charges.data_mut().copy_from_slice(&tmp); + let wavenumber = 1f32; + + let kernel = Helmholtz3dKernel::new(wavenumber); + + let mut result = rlst_dynamic_array2!(c32, [nsources, nvecs]); + + group.bench_function(format!("N={nsources}"), |b| { + b.iter(|| { + kernel.evaluate_mt( + GreenKernelEvalType::Value, + sources.data(), + targets.data(), + charges.data(), + result.data_mut(), + ) + }) + }); + } +} + +fn singlethreaded_direct_f64(c: &mut Criterion) { + let mut group = c.benchmark_group("Single Threaded Direct f64"); + + group + .sample_size(10) + .measurement_time(Duration::from_secs(15)); + + { + let nsources = 5000; + let ntargets = 5000; + let sources = points_fixture::(nsources, None, None, Some(0)); + let targets = points_fixture::(ntargets, None, None, Some(1)); + + // FFT based M2L for a vector of charges + let nvecs = 1; + let tmp = vec![Complex::one(); nsources * nvecs]; + let mut charges = rlst_dynamic_array2!(c32, [nsources, nvecs]); + charges.data_mut().copy_from_slice(&tmp); + let wavenumber = 1f32; + + let kernel = Helmholtz3dKernel::new(wavenumber); + + let mut result = rlst_dynamic_array2!(c32, [nsources, nvecs]); + + group.bench_function(format!("N={nsources}"), |b| { + b.iter(|| { + kernel.evaluate_st( + GreenKernelEvalType::Value, + sources.data(), + targets.data(), + charges.data(), + result.data_mut(), + ) + }) + }); + } + + { + let nsources = 20000; + let ntargets = 20000; + let sources = points_fixture::(nsources, None, None, Some(0)); + let targets = points_fixture::(ntargets, None, None, Some(1)); + + // FFT based M2L for a vector of charges + let nvecs = 1; + let tmp = vec![Complex::one(); nsources * nvecs]; + let mut charges = rlst_dynamic_array2!(c32, [nsources, nvecs]); + charges.data_mut().copy_from_slice(&tmp); + let wavenumber = 1f32; + + let kernel = Helmholtz3dKernel::new(wavenumber); + + let mut result = rlst_dynamic_array2!(c32, [nsources, nvecs]); + + group.bench_function(format!("N={nsources}"), |b| { + b.iter(|| { + kernel.evaluate_st( + GreenKernelEvalType::Value, + sources.data(), + targets.data(), + charges.data(), + result.data_mut(), + ) + }) + }); + } +} + +criterion_group!(d_f64, multithreaded_direct_f64, singlethreaded_direct_f64); + +criterion_main!(d_f64); \ No newline at end of file diff --git a/kifmm/benches/helmholtz_direct_f64.rs b/kifmm/benches/helmholtz_direct_f64.rs new file mode 100644 index 00000000..c1f764c5 --- /dev/null +++ b/kifmm/benches/helmholtz_direct_f64.rs @@ -0,0 +1,179 @@ +use std::time::Duration; + +use criterion::{criterion_group, criterion_main, Criterion}; +use green_kernels::traits::Kernel; +use green_kernels::{helmholtz_3d::Helmholtz3dKernel, types::GreenKernelEvalType}; +use kifmm::tree::helpers::points_fixture; +use num::{Complex, One}; +use rlst::{c64, rlst_dynamic_array2, RawAccess, RawAccessMut}; + + +fn multithreaded_direct_f64(c: &mut Criterion) { + let mut group = c.benchmark_group("Multi Threaded Direct f64"); + + group + .sample_size(10) + .measurement_time(Duration::from_secs(15)); + + { + let nsources = 20000; + let ntargets = 20000; + let sources = points_fixture::(nsources, None, None, Some(0)); + let targets = points_fixture::(ntargets, None, None, Some(1)); + + // FFT based M2L for a vector of charges + let nvecs = 1; + let tmp = vec![Complex::one(); nsources * nvecs]; + let mut charges = rlst_dynamic_array2!(c64, [nsources, nvecs]); + charges.data_mut().copy_from_slice(&tmp); + let wavenumber = 1f64; + + let kernel = Helmholtz3dKernel::new(wavenumber); + + let mut result = rlst_dynamic_array2!(c64, [nsources, nvecs]); + + group.bench_function(format!("N={nsources}"), |b| { + b.iter(|| { + kernel.evaluate_mt( + GreenKernelEvalType::Value, + sources.data(), + targets.data(), + charges.data(), + result.data_mut(), + ) + }) + }); + } + + { + let nsources = 100000; + let ntargets = 100000; + let sources = points_fixture::(nsources, None, None, Some(0)); + let targets = points_fixture::(ntargets, None, None, Some(1)); + + // FFT based M2L for a vector of charges + let nvecs = 1; + let tmp = vec![Complex::one(); nsources * nvecs]; + let mut charges = rlst_dynamic_array2!(c64, [nsources, nvecs]); + charges.data_mut().copy_from_slice(&tmp); + let wavenumber =1f64; + + let kernel = Helmholtz3dKernel::new(wavenumber); + + let mut result = rlst_dynamic_array2!(c64, [nsources, nvecs]); + + group.bench_function(format!("N={nsources}"), |b| { + b.iter(|| { + kernel.evaluate_mt( + GreenKernelEvalType::Value, + sources.data(), + targets.data(), + charges.data(), + result.data_mut(), + ) + }) + }); + } + + { + let nsources = 500000; + let ntargets = 500000; + let sources = points_fixture::(nsources, None, None, Some(0)); + let targets = points_fixture::(ntargets, None, None, Some(1)); + + // FFT based M2L for a vector of charges + let nvecs = 1; + let tmp = vec![Complex::one(); nsources * nvecs]; + let mut charges = rlst_dynamic_array2!(c64, [nsources, nvecs]); + charges.data_mut().copy_from_slice(&tmp); + let wavenumber = 1f64; + + let kernel = Helmholtz3dKernel::new(wavenumber); + + let mut result = rlst_dynamic_array2!(c64, [nsources, nvecs]); + + group.bench_function(format!("N={nsources}"), |b| { + b.iter(|| { + kernel.evaluate_mt( + GreenKernelEvalType::Value, + sources.data(), + targets.data(), + charges.data(), + result.data_mut(), + ) + }) + }); + } +} + +fn singlethreaded_direct_f64(c: &mut Criterion) { + let mut group = c.benchmark_group("Single Threaded Direct f64"); + + group + .sample_size(10) + .measurement_time(Duration::from_secs(15)); + + { + let nsources = 5000; + let ntargets = 5000; + let sources = points_fixture::(nsources, None, None, Some(0)); + let targets = points_fixture::(ntargets, None, None, Some(1)); + + // FFT based M2L for a vector of charges + let nvecs = 1; + let tmp = vec![Complex::one(); nsources * nvecs]; + let mut charges = rlst_dynamic_array2!(c64, [nsources, nvecs]); + charges.data_mut().copy_from_slice(&tmp); + let wavenumber = 1f64; + + let kernel = Helmholtz3dKernel::new(wavenumber); + + let mut result = rlst_dynamic_array2!(c64, [nsources, nvecs]); + + group.bench_function(format!("N={nsources}"), |b| { + b.iter(|| { + kernel.evaluate_st( + GreenKernelEvalType::Value, + sources.data(), + targets.data(), + charges.data(), + result.data_mut(), + ) + }) + }); + } + + { + let nsources = 20000; + let ntargets = 20000; + let sources = points_fixture::(nsources, None, None, Some(0)); + let targets = points_fixture::(ntargets, None, None, Some(1)); + + // FFT based M2L for a vector of charges + let nvecs = 1; + let tmp = vec![Complex::one(); nsources * nvecs]; + let mut charges = rlst_dynamic_array2!(c64, [nsources, nvecs]); + charges.data_mut().copy_from_slice(&tmp); + let wavenumber = 1f64; + + let kernel = Helmholtz3dKernel::new(wavenumber); + + let mut result = rlst_dynamic_array2!(c64, [nsources, nvecs]); + + group.bench_function(format!("N={nsources}"), |b| { + b.iter(|| { + kernel.evaluate_st( + GreenKernelEvalType::Value, + sources.data(), + targets.data(), + charges.data(), + result.data_mut(), + ) + }) + }); + } +} + +criterion_group!(d_f64, multithreaded_direct_f64, singlethreaded_direct_f64); + +criterion_main!(d_f64); \ No newline at end of file diff --git a/kifmm/benches/laplace_direct_f32.rs b/kifmm/benches/laplace_direct_f32.rs new file mode 100644 index 00000000..6114f7d6 --- /dev/null +++ b/kifmm/benches/laplace_direct_f32.rs @@ -0,0 +1,171 @@ +use std::time::Duration; + +use criterion::{criterion_group, criterion_main, Criterion}; +use green_kernels::traits::Kernel; +use green_kernels::{laplace_3d::Laplace3dKernel, types::GreenKernelEvalType}; +use kifmm::tree::helpers::points_fixture; +use rlst::{rlst_dynamic_array2, RawAccess, RawAccessMut}; + +fn multithreaded_f32(c: &mut Criterion) { + let mut group = c.benchmark_group("Multi Threaded Direct f32"); + + group + .sample_size(10) + .measurement_time(Duration::from_secs(15)); + + { + let nsources = 20000; + let ntargets = 20000; + let sources = points_fixture::(nsources, None, None, Some(0)); + let targets = points_fixture::(ntargets, None, None, Some(1)); + + // FFT based M2L for a vector of charges + let nvecs = 1; + let tmp = vec![1.0; nsources * nvecs]; + let mut charges = rlst_dynamic_array2!(f32, [nsources, nvecs]); + charges.data_mut().copy_from_slice(&tmp); + + let kernel = Laplace3dKernel::new(); + + let mut result = rlst_dynamic_array2!(f32, [nsources, nvecs]); + + group.bench_function(format!("N={nsources}"), |b| { + b.iter(|| { + kernel.evaluate_mt( + GreenKernelEvalType::Value, + sources.data(), + targets.data(), + charges.data(), + result.data_mut(), + ) + }) + }); + } + + { + let nsources = 100000; + let ntargets = 100000; + let sources = points_fixture::(nsources, None, None, Some(0)); + let targets = points_fixture::(ntargets, None, None, Some(1)); + + // FFT based M2L for a vector of charges + let nvecs = 1; + let tmp = vec![1.0; nsources * nvecs]; + let mut charges = rlst_dynamic_array2!(f32, [nsources, nvecs]); + charges.data_mut().copy_from_slice(&tmp); + + let kernel = Laplace3dKernel::new(); + + let mut result = rlst_dynamic_array2!(f32, [nsources, nvecs]); + + group.bench_function(format!("N={nsources}"), |b| { + b.iter(|| { + kernel.evaluate_mt( + GreenKernelEvalType::Value, + sources.data(), + targets.data(), + charges.data(), + result.data_mut(), + ) + }) + }); + } + + { + let nsources = 500000; + let ntargets = 500000; + let sources = points_fixture::(nsources, None, None, Some(0)); + let targets = points_fixture::(ntargets, None, None, Some(1)); + + // FFT based M2L for a vector of charges + let nvecs = 1; + let tmp = vec![1.0; nsources * nvecs]; + let mut charges = rlst_dynamic_array2!(f32, [nsources, nvecs]); + charges.data_mut().copy_from_slice(&tmp); + + let kernel = Laplace3dKernel::new(); + + let mut result = rlst_dynamic_array2!(f32, [nsources, nvecs]); + + group.bench_function(format!("N={nsources}"), |b| { + b.iter(|| { + kernel.evaluate_mt( + GreenKernelEvalType::Value, + sources.data(), + targets.data(), + charges.data(), + result.data_mut(), + ) + }) + }); + } +} + +fn singlethreaded_f32(c: &mut Criterion) { + let mut group = c.benchmark_group("Single Threaded Direct f32"); + + group + .sample_size(10) + .measurement_time(Duration::from_secs(15)); + + { + let nsources = 5000; + let ntargets = 5000; + let sources = points_fixture::(nsources, None, None, Some(0)); + let targets = points_fixture::(ntargets, None, None, Some(1)); + + // FFT based M2L for a vector of charges + let nvecs = 1; + let tmp = vec![1.0; nsources * nvecs]; + let mut charges = rlst_dynamic_array2!(f32, [nsources, nvecs]); + charges.data_mut().copy_from_slice(&tmp); + + let kernel = Laplace3dKernel::new(); + + let mut result = rlst_dynamic_array2!(f32, [nsources, nvecs]); + + group.bench_function(format!("N={nsources}"), |b| { + b.iter(|| { + kernel.evaluate_st( + GreenKernelEvalType::Value, + sources.data(), + targets.data(), + charges.data(), + result.data_mut(), + ) + }) + }); + } + + { + let nsources = 20000; + let ntargets = 20000; + let sources = points_fixture::(nsources, None, None, Some(0)); + let targets = points_fixture::(ntargets, None, None, Some(1)); + + // FFT based M2L for a vector of charges + let nvecs = 1; + let tmp = vec![1.0; nsources * nvecs]; + let mut charges = rlst_dynamic_array2!(f32, [nsources, nvecs]); + charges.data_mut().copy_from_slice(&tmp); + + let kernel = Laplace3dKernel::new(); + + let mut result = rlst_dynamic_array2!(f32, [nsources, nvecs]); + + group.bench_function(format!("N={nsources}"), |b| { + b.iter(|| { + kernel.evaluate_st( + GreenKernelEvalType::Value, + sources.data(), + targets.data(), + charges.data(), + result.data_mut(), + ) + }) + }); + } +} + +criterion_group!(d_f32, multithreaded_f32, singlethreaded_f32); +criterion_main!(d_f32); \ No newline at end of file diff --git a/kifmm/benches/laplace_direct_f64.rs b/kifmm/benches/laplace_direct_f64.rs new file mode 100644 index 00000000..98b03822 --- /dev/null +++ b/kifmm/benches/laplace_direct_f64.rs @@ -0,0 +1,173 @@ +use std::time::Duration; + +use criterion::{criterion_group, criterion_main, Criterion}; +use green_kernels::traits::Kernel; +use green_kernels::{laplace_3d::Laplace3dKernel, types::GreenKernelEvalType}; +use kifmm::tree::helpers::points_fixture; +use rlst::{rlst_dynamic_array2, RawAccess, RawAccessMut}; + + +fn multithreaded_direct_f64(c: &mut Criterion) { + let mut group = c.benchmark_group("Multi Threaded Direct f64"); + + group + .sample_size(10) + .measurement_time(Duration::from_secs(15)); + + { + let nsources = 20000; + let ntargets = 20000; + let sources = points_fixture::(nsources, None, None, Some(0)); + let targets = points_fixture::(ntargets, None, None, Some(1)); + + // FFT based M2L for a vector of charges + let nvecs = 1; + let tmp = vec![1.0; nsources * nvecs]; + let mut charges = rlst_dynamic_array2!(f64, [nsources, nvecs]); + charges.data_mut().copy_from_slice(&tmp); + + let kernel = Laplace3dKernel::new(); + + let mut result = rlst_dynamic_array2!(f64, [nsources, nvecs]); + + group.bench_function(format!("N={nsources}"), |b| { + b.iter(|| { + kernel.evaluate_mt( + GreenKernelEvalType::Value, + sources.data(), + targets.data(), + charges.data(), + result.data_mut(), + ) + }) + }); + } + + { + let nsources = 100000; + let ntargets = 100000; + let sources = points_fixture::(nsources, None, None, Some(0)); + let targets = points_fixture::(ntargets, None, None, Some(1)); + + // FFT based M2L for a vector of charges + let nvecs = 1; + let tmp = vec![1.0; nsources * nvecs]; + let mut charges = rlst_dynamic_array2!(f64, [nsources, nvecs]); + charges.data_mut().copy_from_slice(&tmp); + + let kernel = Laplace3dKernel::new(); + + let mut result = rlst_dynamic_array2!(f64, [nsources, nvecs]); + + group.bench_function(format!("N={nsources}"), |b| { + b.iter(|| { + kernel.evaluate_mt( + GreenKernelEvalType::Value, + sources.data(), + targets.data(), + charges.data(), + result.data_mut(), + ) + }) + }); + } + + { + let nsources = 500000; + let ntargets = 500000; + let sources = points_fixture::(nsources, None, None, Some(0)); + let targets = points_fixture::(ntargets, None, None, Some(1)); + + // FFT based M2L for a vector of charges + let nvecs = 1; + let tmp = vec![1.0; nsources * nvecs]; + let mut charges = rlst_dynamic_array2!(f64, [nsources, nvecs]); + charges.data_mut().copy_from_slice(&tmp); + + let kernel = Laplace3dKernel::new(); + + let mut result = rlst_dynamic_array2!(f64, [nsources, nvecs]); + + group.bench_function(format!("N={nsources}"), |b| { + b.iter(|| { + kernel.evaluate_mt( + GreenKernelEvalType::Value, + sources.data(), + targets.data(), + charges.data(), + result.data_mut(), + ) + }) + }); + } +} + +fn singlethreaded_direct_f64(c: &mut Criterion) { + let mut group = c.benchmark_group("Single Threaded Direct f64"); + + group + .sample_size(10) + .measurement_time(Duration::from_secs(15)); + + { + let nsources = 5000; + let ntargets = 5000; + let sources = points_fixture::(nsources, None, None, Some(0)); + let targets = points_fixture::(ntargets, None, None, Some(1)); + + // FFT based M2L for a vector of charges + let nvecs = 1; + let tmp = vec![1.0; nsources * nvecs]; + let mut charges = rlst_dynamic_array2!(f64, [nsources, nvecs]); + charges.data_mut().copy_from_slice(&tmp); + + let kernel = Laplace3dKernel::new(); + + let mut result = rlst_dynamic_array2!(f64, [nsources, nvecs]); + + group.bench_function(format!("N={nsources}"), |b| { + b.iter(|| { + kernel.evaluate_st( + GreenKernelEvalType::Value, + sources.data(), + targets.data(), + charges.data(), + result.data_mut(), + ) + }) + }); + } + + { + let nsources = 20000; + let ntargets = 20000; + let sources = points_fixture::(nsources, None, None, Some(0)); + let targets = points_fixture::(ntargets, None, None, Some(1)); + + // FFT based M2L for a vector of charges + let nvecs = 1; + let tmp = vec![1.0; nsources * nvecs]; + let mut charges = rlst_dynamic_array2!(f64, [nsources, nvecs]); + charges.data_mut().copy_from_slice(&tmp); + + let kernel = Laplace3dKernel::new(); + + let mut result = rlst_dynamic_array2!(f64, [nsources, nvecs]); + + group.bench_function(format!("N={nsources}"), |b| { + b.iter(|| { + kernel.evaluate_st( + GreenKernelEvalType::Value, + sources.data(), + targets.data(), + charges.data(), + result.data_mut(), + ) + }) + }); + } +} + +criterion_group!(d_f64, multithreaded_direct_f64, singlethreaded_direct_f64); + +criterion_main!(d_f64); \ No newline at end of file