From 03942f3c1e4412992a702f634f8f5a2aee4ad8bb Mon Sep 17 00:00:00 2001 From: Tom Schammo Date: Thu, 14 Nov 2024 18:15:10 +0100 Subject: [PATCH] perf: Increase performance of `trace_beam` Increase performance of `trace_beam` using pointer arithmetic, bypassing the torch indexing. This saves ~81s in the rain simulation making it ~44% faster. --- cpp/src/raytracing.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/cpp/src/raytracing.cpp b/cpp/src/raytracing.cpp index e328523..841ad62 100644 --- a/cpp/src/raytracing.cpp +++ b/cpp/src/raytracing.cpp @@ -55,13 +55,17 @@ constexpr tensor_size_t min_intersect_dist = 1; nf_split_factor) % (360 * nf_split_factor); - for (auto i = split_index[index].item(); - i < split_index[index + 1].item(); i++) { + const auto si_ptr = split_index.data_ptr(); + + // NOLINTBEGIN (*-pro-bounds-pointer-arithmetic) + for (auto i = static_cast(si_ptr[index]); + i < static_cast(si_ptr[index + 1]); i++) { const auto nf = noise_filter[i]; + const auto nf_ptr = nf.data_ptr(); const auto sphere = nf.index({Slice(0, 3)}); - const auto nf3_val = nf[3].item(); + const auto nf3_val = nf_ptr[3]; if (beam_length < nf3_val) { return -1; @@ -72,12 +76,13 @@ constexpr tensor_size_t min_intersect_dist = 1; if (const auto dist_beam_sphere = sqrt(nf3_val * nf3_val - length_beam_sphere * length_beam_sphere); - dist_beam_sphere < nf[4].item()) { + dist_beam_sphere < nf_ptr[4]) { return nf3_val; } } } + // NOLINTEND (*-pro-bounds-pointer-arithmetic) return -1; }