-
-
-
-
-
6 #ifndef BUFFER_MANAGEMENT_HPP
-
7 #define BUFFER_MANAGEMENT_HPP
-
-
-
-
-
-
-
-
-
-
-
18 #include <type_traits>
-
19 #include <unordered_map>
-
-
-
22 #ifdef CPPUDDLE_HAVE_HPX
-
23 #ifndef CPPUDDLE_HAVE_HPX_AWARE_ALLOCATORS
-
-
25 "Warning: CPPuddle build with HPX support but without HPX-aware allocators enabled. \
-
26 For better performance configure CPPuddle with CPPUDDLE_WITH_HPX_AWARE_ALLOCATORS=ON!"
-
-
-
29 #include <hpx/include/runtime.hpp>
-
-
-
-
33 #if defined(CPPUDDLE_HAVE_HPX) && defined(CPPUDDLE_HAVE_HPX_MUTEX)
-
-
35 #include <hpx/mutex.hpp>
-
-
-
38 #ifdef CPPUDDLE_HAVE_COUNTERS
-
39 #include <boost/core/demangle.hpp>
-
40 #if defined(CPPUDDLE_HAVE_HPX)
-
41 #include <hpx/include/performance_counters.hpp>
-
-
-
-
-
-
-
48 namespace memory_recycling {
-
-
50 namespace device_selection {
-
-
-
-
-
58 throw std::runtime_error(
-
59 "Allocators used in Multi-GPU builds need explicit Multi-GPU support "
-
60 "(by having a select_device_functor overload");
-
-
-
-
-
-
-
-
-
70 #if defined(CPPUDDLE_DEACTIVATE_BUFFER_RECYCLING)
-
-
-
-
74 "Warning: Building without buffer recycling! Use only for performance testing! \
-
75 For better performance configure CPPuddle with CPPUDDLE_WITH_BUFFER_RECYCLING=ON!"
-
-
77 template <
typename T,
typename Host_Allocator>
-
78 static T *
get(
size_t number_elements,
bool manage_content_lifetime =
false,
-
79 std::optional<size_t> location_hint = std::nullopt,
-
80 std::optional<size_t> device_id = std::nullopt) {
-
-
82 return Host_Allocator{}.allocate(number_elements);
-
-
85 template <
typename T,
typename Host_Allocator>
-
86 static void mark_unused(T *p,
size_t number_elements,
-
87 std::optional<size_t> location_hint = std::nullopt,
-
88 std::optional<size_t> device_id = std::nullopt) {
-
89 return Host_Allocator{}.deallocate(p, number_elements);
-
-
-
96 template <
typename T,
typename Host_Allocator>
-
97 static T *
get(
size_t number_elements,
bool manage_content_lifetime =
false,
-
98 std::optional<size_t> location_hint = std::nullopt,
-
99 std::optional<size_t> device_id = std::nullopt) {
-
-
101 return buffer_manager<T, Host_Allocator>::get(
-
102 number_elements, manage_content_lifetime, location_hint, device_id);
-
103 }
catch (
const std::exception &exc) {
-
104 std::cerr <<
"ERROR: Encountered unhandled exception in cppuddle get: " << exc.what() << std::endl;
-
105 std::cerr <<
"Rethrowing exception... " << std::endl;;
-
-
-
-
113 template <
typename T,
typename Host_Allocator>
-
-
115 std::optional<size_t> location_hint = std::nullopt,
-
116 std::optional<size_t> device_id = std::nullopt) {
-
-
118 return buffer_manager<T, Host_Allocator>::mark_unused(p, number_elements,
-
119 location_hint, device_id);
-
120 }
catch (
const std::exception &exc) {
-
121 std::cerr <<
"ERROR: Encountered unhandled exception in cppuddle mark_unused: " << exc.what() << std::endl;
-
122 std::cerr <<
"Rethrowing exception... " << std::endl;;
-
-
-
-
-
128 template <
typename T,
typename Host_Allocator>
-
-
130 #ifdef CPPUDDLE_HAVE_COUNTERS
-
131 buffer_manager<T, Host_Allocator>::register_counters_with_hpx();
-
-
133 std::cerr <<
"Warning: Trying to register allocator performance counters "
-
134 "with HPX but CPPuddle was built "
-
135 "without CPPUDDLE_WITH_COUNTERS -- operation will be ignored!"
-
-
-
-
-
-
142 std::lock_guard<mutex_t> guard(instance().callback_protection_mut);
-
143 for (
const auto &clean_function :
-
144 instance().total_cleanup_callbacks) {
-
-
-
-
-
150 std::lock_guard<mutex_t> guard(instance().callback_protection_mut);
-
151 for (
const auto &clean_function :
-
152 instance().partial_cleanup_callbacks) {
-
-
-
-
-
158 std::lock_guard<mutex_t> guard(instance().callback_protection_mut);
-
159 for (
const auto &finalize_function :
-
160 instance().finalize_callbacks) {
-
-
-
-
-
-
166 #ifdef CPPUDDLE_HAVE_COUNTERS
-
167 std::lock_guard<mutex_t> guard(instance().callback_protection_mut);
-
168 for (
const auto &print_function :
-
169 instance().print_callbacks) {
-
-
-
-
173 std::cerr <<
"Warning: Trying to print allocator performance counters but CPPuddle was built "
-
174 "without CPPUDDLE_WITH_COUNTERS -- operation will be ignored!"
-
-
-
-
-
-
-
-
-
-
-
-
188 std::list<std::function<void()>> print_callbacks;
-
191 std::list<std::function<void()>> finalize_callbacks;
-
194 std::list<std::function<void()>> total_cleanup_callbacks;
-
197 std::list<std::function<void()>> partial_cleanup_callbacks;
-
200 buffer_interface() =
default;
-
-
202 mutex_t callback_protection_mut;
-
204 static void add_total_cleanup_callback(
const std::function<
void()> &func) {
-
205 std::lock_guard<mutex_t> guard(instance().callback_protection_mut);
-
206 instance().total_cleanup_callbacks.push_back(func);
-
-
210 static void add_partial_cleanup_callback(
const std::function<
void()> &func) {
-
211 std::lock_guard<mutex_t> guard(instance().callback_protection_mut);
-
212 instance().partial_cleanup_callbacks.push_back(func);
-
-
216 static void add_finalize_callback(
const std::function<
void()> &func) {
-
217 std::lock_guard<mutex_t> guard(instance().callback_protection_mut);
-
218 instance().finalize_callbacks.push_back(func);
-
-
222 static void add_print_callback(
const std::function<
void()> &func) {
-
223 std::lock_guard<mutex_t> guard(instance().callback_protection_mut);
-
224 instance().print_callbacks.push_back(func);
-
-
-
-
-
-
-
-
233 template <
typename T,
typename Host_Allocator>
class buffer_manager {
-
-
-
-
-
238 using buffer_entry_type = std::tuple<T *, size_t, size_t, bool>;
-
-
-
-
243 static void clean() {
-
244 assert(instance() && !is_finalized);
-
-
246 std::lock_guard<mutex_t> guard(instance()[i].mut);
-
247 instance()[i].clean_all_buffers();
-
-
-
-
251 assert(instance() && !is_finalized);
-
-
253 std::lock_guard<mutex_t> guard(instance()[i].mut);
-
254 instance()[i].print_counters();
-
-
-
-
258 assert(instance() && !is_finalized);
-
-
-
261 std::lock_guard<mutex_t> guard(instance()[i].mut);
-
262 instance()[i].clean_all_buffers();
-
-
-
-
267 static void clean_unused_buffers_only() {
-
268 assert(instance() && !is_finalized);
-
-
270 std::lock_guard<mutex_t> guard(instance()[i].mut);
-
271 for (
auto &buffer_tuple : instance()[i].unused_buffer_list) {
-
272 Host_Allocator alloc;
-
273 if (std::get<3>(buffer_tuple)) {
-
274 std::destroy_n(std::get<0>(buffer_tuple), std::get<1>(buffer_tuple));
-
-
276 alloc.deallocate(std::get<0>(buffer_tuple), std::get<1>(buffer_tuple));
-
-
278 instance()[i].unused_buffer_list.clear();
-
-
-
281 #if defined(CPPUDDLE_HAVE_COUNTERS) && defined(CPPUDDLE_HAVE_HPX)
-
282 static size_t get_sum_number_recycling(
bool reset) {
-
-
284 sum_number_recycling = 0;
-
285 return sum_number_recycling;
-
-
287 static size_t get_sum_number_allocation(
bool reset) {
-
-
289 sum_number_allocation = 0;
-
290 return sum_number_allocation;
-
-
292 static size_t get_sum_number_creation(
bool reset) {
-
-
294 sum_number_creation = 0;
-
295 return sum_number_creation;
-
-
297 static size_t get_sum_number_deallocation(
bool reset) {
-
-
299 sum_number_deallocation = 0;
-
300 return sum_number_deallocation;
-
-
302 static size_t get_sum_number_wrong_hints(
bool reset) {
-
-
304 sum_number_wrong_hints = 0;
-
305 return sum_number_wrong_hints;
-
-
307 static size_t get_sum_number_wrong_device_hints(
bool reset) {
-
-
309 sum_number_wrong_hints = 0;
-
310 return sum_number_wrong_device_hints;
-
-
312 static size_t get_sum_number_bad_allocs(
bool reset) {
-
-
314 sum_number_bad_allocs = 0;
-
315 return sum_number_bad_allocs;
-
-
-
318 static void register_counters_with_hpx(
void) {
-
319 std::string alloc_name =
-
320 boost::core::demangle(
typeid(Host_Allocator).name()) +
-
321 std::string(
"_") + boost::core::demangle(
typeid(T).name());
-
322 hpx::performance_counters::install_counter_type(
-
323 std::string(
"/cppuddle/allocators/") + alloc_name + std::string(
"/number_recycling/"),
-
324 &get_sum_number_recycling,
-
325 "Number of allocations using a recycled buffer with this "
-
-
327 hpx::performance_counters::install_counter_type(
-
328 std::string(
"/cppuddle/allocators/") + alloc_name + std::string(
"/number_allocations/"),
-
329 &get_sum_number_allocation,
-
330 "Number of allocations with this allocator");
-
331 hpx::performance_counters::install_counter_type(
-
332 std::string(
"/cppuddle/allocators/") + alloc_name + std::string(
"/number_creations/"),
-
333 &get_sum_number_creation,
-
334 "Number of allocations not using a recycled buffer with this "
-
-
336 hpx::performance_counters::install_counter_type(
-
337 std::string(
"/cppuddle/allocators/") + alloc_name + std::string(
"/number_deallocations/"),
-
338 &get_sum_number_deallocation,
-
339 "Number of deallocations yielding buffers to be recycled with this "
-
-
341 hpx::performance_counters::install_counter_type(
-
342 std::string(
"/cppuddle/allocators/") + alloc_name + std::string(
"/number_wrong_hints/"),
-
343 &get_sum_number_wrong_hints,
-
344 "Number of wrong hints supplied to the dealloc method with this allocator");
-
345 hpx::performance_counters::install_counter_type(
-
346 std::string(
"/cppuddle/allocators/") + alloc_name + std::string(
"/number_wrong_device_hints/"),
-
347 &get_sum_number_wrong_device_hints,
-
348 "Number of wrong device hints supplied to the dealloc method with this allocator");
-
349 hpx::performance_counters::install_counter_type(
-
350 std::string(
"/cppuddle/allocators/") + alloc_name + std::string(
"/number_bad_allocs/"),
-
351 &get_sum_number_bad_allocs,
-
352 "Number of wrong bad allocs which triggered a cleanup of unused buffers");
-
-
-
-
357 static T *
get(
size_t number_of_elements,
bool manage_content_lifetime,
-
358 std::optional<size_t> location_hint = std::nullopt,
-
359 std::optional<size_t> gpu_device_id = std::nullopt) {
-
360 init_callbacks_once();
-
-
362 throw std::runtime_error(
"Tried allocation after finalization");
-
-
364 assert(instance() && !is_finalized);
-
-
366 size_t location_id = 0;
-
-
368 location_id = *location_hint;
-
-
-
371 throw std::runtime_error(
"Tried to create buffer with invalid location_id [get]");
-
-
373 size_t device_id = 0;
-
-
375 device_id = *gpu_device_id;
-
-
-
378 throw std::runtime_error(
"Tried to create buffer with invalid device id [get]! "
-
379 "Is multigpu support enabled with the correct number "
-
-
-
-
-
384 std::lock_guard<mutex_t> guard(instance()[location_id].mut);
-
-
-
387 #ifdef CPPUDDLE_HAVE_COUNTERS
-
388 instance()[location_id].number_allocation++;
-
389 sum_number_allocation++;
-
-
-
392 for (
auto iter = instance()[location_id].unused_buffer_list.begin();
-
393 iter != instance()[location_id].unused_buffer_list.end(); iter++) {
-
-
395 if (std::get<1>(tuple) == number_of_elements) {
-
396 instance()[location_id].unused_buffer_list.erase(iter);
-
-
-
-
400 if (manage_content_lifetime && !std::get<3>(tuple)) {
-
401 std::uninitialized_value_construct_n(std::get<0>(tuple),
-
-
403 std::get<3>(tuple) =
true;
-
404 }
else if (!manage_content_lifetime && std::get<3>(tuple)) {
-
405 std::destroy_n(std::get<0>(tuple), std::get<1>(tuple));
-
406 std::get<3>(tuple) =
false;
-
-
408 instance()[location_id].buffer_map.insert({std::get<0>(tuple), tuple});
-
409 #ifdef CPPUDDLE_HAVE_COUNTERS
-
410 instance()[location_id].number_recycling++;
-
411 sum_number_recycling++;
-
-
413 return std::get<0>(tuple);
-
-
-
-
-
-
-
420 T, Host_Allocator>{}(device_id);
-
421 Host_Allocator alloc;
-
422 T *buffer = alloc.allocate(number_of_elements);
-
423 instance()[location_id].buffer_map.insert(
-
424 {buffer, std::make_tuple(buffer, number_of_elements, 1,
-
425 manage_content_lifetime)});
-
426 #ifdef CPPUDDLE_HAVE_COUNTERS
-
427 instance()[location_id].number_creation++;
-
428 sum_number_creation++;
-
-
430 if (manage_content_lifetime) {
-
431 std::uninitialized_value_construct_n(buffer, number_of_elements);
-
-
-
434 }
catch (std::bad_alloc &e) {
-
-
-
437 <<
"Not enough memory left. Cleaning up unused buffers now..."
-
-
-
440 std::cerr <<
"Buffers cleaned! Try allocation again..." << std::endl;
-
-
-
-
444 Host_Allocator alloc;
-
-
446 T, Host_Allocator>{}(device_id);
-
447 T *buffer = alloc.allocate(number_of_elements);
-
448 instance()[location_id].buffer_map.insert(
-
449 {buffer, std::make_tuple(buffer, number_of_elements, 1,
-
450 manage_content_lifetime)});
-
451 #ifdef CPPUDDLE_HAVE_COUNTERS
-
452 instance()[location_id].number_creation++;
-
453 sum_number_creation++;
-
454 instance()[location_id].number_bad_alloc++;
-
455 sum_number_bad_allocs++;
-
-
457 std::cerr <<
"Second attempt allocation successful!" << std::endl;
-
458 if (manage_content_lifetime) {
-
459 std::uninitialized_value_construct_n(buffer, number_of_elements);
-
-
-
-
-
-
465 static void mark_unused(T *memory_location,
size_t number_of_elements,
-
466 std::optional<size_t> location_hint = std::nullopt,
-
467 std::optional<size_t> device_hint = std::nullopt) {
-
-
-
470 assert(instance() && !is_finalized);
-
-
472 size_t location_id = 0;
-
-
474 location_id = *location_hint;
-
-
476 throw std::runtime_error(
-
477 "Buffer recylcer received invalid location hint [mark_unused]");
-
-
-
480 size_t device_id = 0;
-
-
482 device_id = *device_hint;
-
-
484 throw std::runtime_error(
-
485 "Buffer recylcer received invalid devce hint [mark_unused]");
-
-
-
-
-
-
-
492 std::lock_guard<mutex_t> guard(instance()[location_id].mut);
-
493 if (instance()[location_id].buffer_map.find(memory_location) !=
-
494 instance()[location_id].buffer_map.end()) {
-
495 #ifdef CPPUDDLE_HAVE_COUNTERS
-
496 instance()[location_id].number_deallocation++;
-
497 sum_number_deallocation++;
-
-
499 auto it = instance()[location_id].buffer_map.find(memory_location);
-
500 assert(it != instance()[location_id].buffer_map.end());
-
501 auto &tuple = it->second;
-
-
503 assert(std::get<1>(tuple) == number_of_elements);
-
-
505 instance()[location_id].unused_buffer_list.push_front(tuple);
-
506 instance()[location_id].buffer_map.erase(memory_location);
-
-
-
-
510 #ifdef CPPUDDLE_HAVE_COUNTERS
-
511 instance()[location_id].number_wrong_hints++;
-
512 sum_number_wrong_hints++;
-
-
-
-
-
-
-
-
-
-
-
-
524 std::lock_guard<mutex_t> guard(instance()[location_id].mut);
-
525 if (instance()[location_id].buffer_map.find(memory_location) !=
-
526 instance()[location_id].buffer_map.end()) {
-
527 #ifdef CPPUDDLE_HAVE_COUNTERS
-
528 instance()[location_id].number_deallocation++;
-
529 sum_number_deallocation++;
-
-
531 auto it = instance()[location_id].buffer_map.find(memory_location);
-
532 assert(it != instance()[location_id].buffer_map.end());
-
533 auto &tuple = it->second;
-
-
535 assert(std::get<1>(tuple) == number_of_elements);
-
-
537 instance()[location_id].unused_buffer_list.push_front(tuple);
-
538 instance()[location_id].buffer_map.erase(memory_location);
-
-
-
-
-
543 #ifdef CPPUDDLE_HAVE_COUNTERS
-
-
545 sum_number_wrong_device_hints++;
-
-
-
-
-
-
-
552 if (local_device_id == device_id)
-
-
-
-
-
557 size_t location_id = location_hint.value() + local_device_id *
number_instances;
-
558 std::lock_guard<mutex_t> guard(instance()[location_id].mut);
-
559 if (instance()[location_id].buffer_map.find(memory_location) !=
-
560 instance()[location_id].buffer_map.end()) {
-
561 #ifdef CPPUDDLE_HAVE_COUNTERS
-
562 instance()[location_id].number_deallocation++;
-
563 sum_number_deallocation++;
-
-
565 auto it = instance()[location_id].buffer_map.find(memory_location);
-
566 assert(it != instance()[location_id].buffer_map.end());
-
567 auto &tuple = it->second;
-
-
569 assert(std::get<1>(tuple) == number_of_elements);
-
-
571 instance()[location_id].unused_buffer_list.push_front(tuple);
-
572 instance()[location_id].buffer_map.erase(memory_location);
-
-
-
-
-
-
-
-
580 if (*location_hint + local_device_id *
max_number_gpus == location_id) {
-
-
-
-
584 std::lock_guard<mutex_t> guard(instance()[location_id].mut);
-
585 if (instance()[location_id].buffer_map.find(memory_location) !=
-
586 instance()[location_id].buffer_map.end()) {
-
587 #ifdef CPPUDDLE_HAVE_COUNTERS
-
588 instance()[location_id].number_deallocation++;
-
589 sum_number_deallocation++;
-
-
591 auto it = instance()[location_id].buffer_map.find(memory_location);
-
592 assert(it != instance()[location_id].buffer_map.end());
-
593 auto &tuple = it->second;
-
-
595 assert(std::get<1>(tuple) == number_of_elements);
-
-
597 instance()[location_id].unused_buffer_list.push_front(tuple);
-
598 instance()[location_id].buffer_map.erase(memory_location);
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
616 <<
"Warning! Tried to delete non-existing buffer within CPPuddle!"
-
-
618 std::cerr <<
"Did you forget to call recycler::finalize?" << std::endl;
-
-
-
-
623 std::unordered_map<T *, buffer_entry_type> buffer_map{};
-
625 std::list<buffer_entry_type> unused_buffer_list{};
-
-
628 #ifdef CPPUDDLE_HAVE_COUNTERS
-
630 size_t number_allocation{0}, number_deallocation{0}, number_wrong_hints{0},
-
631 number_recycling{0}, number_creation{0}, number_bad_alloc{0};
-
-
633 static inline std::atomic<size_t> sum_number_allocation{0},
-
634 sum_number_deallocation{0}, sum_number_wrong_hints{0},
-
635 sum_number_wrong_device_hints{0}, sum_number_recycling{0},
-
636 sum_number_creation{0}, sum_number_bad_allocs{0};
-
-
640 buffer_manager() =
default;
-
-
642 operator=(buffer_manager<T, Host_Allocator>
const &other) =
default;
-
-
644 operator=(buffer_manager<T, Host_Allocator> &&other) =
delete;
-
645 static std::unique_ptr<buffer_manager[]>& instance(
void) {
-
646 static std::unique_ptr<buffer_manager[]> instances{
-
-
-
-
650 static void init_callbacks_once(
void) {
-
-
652 #if defined(CPPUDDLE_HAVE_HPX) && defined(CPPUDDLE_HAVE_HPX_MUTEX)
-
653 static hpx::once_flag flag;
-
654 hpx::call_once(flag, []() {
-
-
656 static std::once_flag flag;
-
657 std::call_once(flag, []() {
-
-
659 is_finalized =
false;
-
660 buffer_interface::add_total_cleanup_callback(clean);
-
661 buffer_interface::add_partial_cleanup_callback(
-
662 clean_unused_buffers_only);
-
663 buffer_interface::add_finalize_callback(
-
-
665 #ifdef CPPUDDLE_HAVE_COUNTERS
-
666 buffer_interface::add_print_callback(
-
-
-
-
-
671 static inline std::atomic<bool> is_finalized;
-
-
673 #ifdef CPPUDDLE_HAVE_COUNTERS
-
674 void print_counters(
void) {
-
675 if (number_allocation == 0)
-
-
-
678 size_t number_cleaned = unused_buffer_list.size() + buffer_map.size();
-
679 std::cout <<
"\nBuffer manager destructor for (Alloc: "
-
680 << boost::core::demangle(
typeid(Host_Allocator).name()) <<
", Type: "
-
681 << boost::core::demangle(
typeid(T).name())
-
-
683 <<
"--------------------------------------------------------------------"
-
-
685 <<
"--> Number of bad_allocs that triggered garbage "
-
-
687 << number_bad_alloc << std::endl
-
688 <<
"--> Number of buffers that got requested from this "
-
-
690 << number_allocation << std::endl
-
691 <<
"--> Number of times an unused buffer got recycled for a "
-
-
693 << number_recycling << std::endl
-
694 <<
"--> Number of times a new buffer had to be created for a "
-
-
696 << number_creation << std::endl
-
697 <<
"--> Number cleaned up buffers: "
-
-
699 << number_cleaned << std::endl
-
700 <<
"--> Number wrong deallocation hints: "
-
-
702 << number_wrong_hints << std::endl
-
703 <<
"--> Number of buffers that were marked as used upon "
-
-
705 << buffer_map.size() << std::endl
-
706 <<
"==> Recycle rate: "
-
-
708 <<
static_cast<float>(number_recycling) / number_allocation *
-
-
-
-
-
-
714 void clean_all_buffers(
void) {
-
715 #ifdef CPPUDDLE_HAVE_COUNTERS
-
716 if (number_allocation == 0 && number_recycling == 0 &&
-
717 number_bad_alloc == 0 && number_creation == 0 &&
-
718 unused_buffer_list.empty() && buffer_map.empty()) {
-
-
-
-
722 for (
auto &buffer_tuple : unused_buffer_list) {
-
723 Host_Allocator alloc;
-
724 if (std::get<3>(buffer_tuple)) {
-
725 std::destroy_n(std::get<0>(buffer_tuple), std::get<1>(buffer_tuple));
-
-
727 alloc.deallocate(std::get<0>(buffer_tuple), std::get<1>(buffer_tuple));
-
-
729 for (
auto &map_tuple : buffer_map) {
-
730 auto buffer_tuple = map_tuple.second;
-
731 Host_Allocator alloc;
-
732 if (std::get<3>(buffer_tuple)) {
-
733 std::destroy_n(std::get<0>(buffer_tuple), std::get<1>(buffer_tuple));
-
-
735 alloc.deallocate(std::get<0>(buffer_tuple), std::get<1>(buffer_tuple));
-
-
737 unused_buffer_list.clear();
-
-
739 #ifdef CPPUDDLE_HAVE_COUNTERS
-
740 number_allocation = 0;
-
741 number_recycling = 0;
-
742 number_bad_alloc = 0;
-
-
744 number_wrong_hints = 0;
-
-
-
-
-
-
-
-
-
-
-
-
756 buffer_manager<T, Host_Allocator>
const &other) =
delete;
-
-
758 buffer_manager<T, Host_Allocator> &&other) =
delete;
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
773 static_assert(std::is_same_v<value_type, typename underlying_allocator_type::value_type>);
-
-
-
-
777 #ifndef CPPUDDLE_HAVE_HPX_AWARE_ALLOCATORS
-
-
779 : dealloc_hint(std::nullopt), device_id(std::nullopt) {}
-
-
781 : dealloc_hint(std::nullopt), device_id(std::nullopt) {}
-
-
-
784 : dealloc_hint(std::nullopt), device_id(std::nullopt) {}
-
-
786 T *data = buffer_interface::get<T, Host_Allocator>(n);
-
-
-
-
790 buffer_interface::mark_unused<T, Host_Allocator>(p, n);
-
-
-
-
-
795 explicit recycle_allocator(
const size_t device_id) noexcept
-
796 : dealloc_hint(hpx::get_worker_thread_num() %
number_instances), device_id(device_id) {}
-
797 explicit recycle_allocator(
const size_t device_i,
const size_t location_id) noexcept
-
798 : dealloc_hint(location_id), device_id(device_id) {}
-
799 explicit recycle_allocator(
-
800 recycle_allocator<T, Host_Allocator>
const &other) noexcept
-
801 : dealloc_hint(other.dealloc_hint), device_id(other.device_id) {}
-
802 T *allocate(std::size_t n) {
-
803 T *data = buffer_interface::get<T, Host_Allocator>(
-
-
-
-
807 void deallocate(T *p, std::size_t n) {
-
808 buffer_interface::mark_unused<T, Host_Allocator>(p, n, dealloc_hint,
-
-
-
-
-
813 template <
typename... Args>
-
-
815 ::new (
static_cast<void *
>(p)) T(std::forward<Args>(args)...);
-
-
-
-
819 template <
typename T,
typename U,
typename Host_Allocator>
-
-
-
-
823 if constexpr (std::is_same_v<T, U>)
-
-
-
-
-
828 template <
typename T,
typename U,
typename Host_Allocator>
-
-
-
-
832 if constexpr (std::is_same_v<T, U>)
-
-
-
-
-
-
839 template <
typename T,
typename Host_Allocator>
-
-
-
-
843 static_assert(std::is_same_v<value_type, typename underlying_allocator_type::value_type>);
-
-
-
-
847 #ifndef CPPUDDLE_HAVE_HPX_AWARE_ALLOCATORS
-
-
849 : dealloc_hint(std::nullopt), device_id(std::nullopt) {}
-
-
851 : dealloc_hint(std::nullopt), device_id(std::nullopt) {}
-
-
-
854 : dealloc_hint(std::nullopt), device_id(std::nullopt) {}
-
-
856 T *data = buffer_interface::get<T, Host_Allocator>(
-
-
-
-
-
861 buffer_interface::mark_unused<T, Host_Allocator>(p, n);
-
-
-
-
-
866 explicit aggressive_recycle_allocator(
const size_t device_id) noexcept
-
867 : dealloc_hint(hpx::get_worker_thread_num() %
number_instances), device_id(device_id) {}
-
868 explicit aggressive_recycle_allocator(
const size_t device_id,
const size_t location_id) noexcept
-
869 : dealloc_hint(location_id), device_id(device_id) {}
-
870 explicit aggressive_recycle_allocator(
-
871 recycle_allocator<T, Host_Allocator>
const &other) noexcept
-
872 : dealloc_hint(other.dealloc_hint), device_id(other.device_id) {}
-
873 T *allocate(std::size_t n) {
-
874 T *data = buffer_interface::get<T, Host_Allocator>(
-
875 n,
true, dealloc_hint, device_id);
-
-
-
-
879 void deallocate(T *p, std::size_t n) {
-
880 buffer_interface::mark_unused<T, Host_Allocator>(p, n, dealloc_hint,
-
-
-
-
-
885 #ifndef CPPUDDLE_DEACTIVATE_AGGRESSIVE_ALLOCATORS
-
886 template <
typename... Args>
-
-
-
-
-
-
-
-
-
-
-
897 "Warning: Building without content reusage for aggressive allocators! \
-
898 For better performance configure with CPPUDDLE_WITH_AGGRESSIVE_CONTENT_RECYCLING=ON !"
-
899 template <
typename... Args>
-
900 inline void construct(T *p, Args... args) noexcept {
-
901 ::new (
static_cast<void *
>(p)) T(std::forward<Args>(args)...);
-
-
903 void destroy(T *p) { p->~T(); }
-
-
-
-
907 template <
typename T,
typename U,
typename Host_Allocator>
-
-
-
-
911 if constexpr (std::is_same_v<T, U>)
-
-
-
-
-
916 template <
typename T,
typename U,
typename Host_Allocator>
-
-
-
-
920 if constexpr (std::is_same_v<T, U>)
-
-
-
-
-
-
-
-
-
-
Singleton interface to all buffer_managers.
Definition: buffer_management.hpp:68
-
static void print_performance_counters()
Definition: buffer_management.hpp:165
+
Go to the documentation of this file.
+
+
+
+
+
6#ifndef BUFFER_MANAGEMENT_HPP
+
7#define BUFFER_MANAGEMENT_HPP
+
+
+
+
+
+
+
+
+
+
+
+
19#include <unordered_map>
+
+
+
22#ifdef CPPUDDLE_HAVE_HPX
+
23#ifndef CPPUDDLE_HAVE_HPX_AWARE_ALLOCATORS
+
+
25"Warning: CPPuddle build with HPX support but without HPX-aware allocators enabled. \
+
26For better performance configure CPPuddle with CPPUDDLE_WITH_HPX_AWARE_ALLOCATORS=ON!"
+
+
+
29#include <hpx/include/runtime.hpp>
+
+
+
+
33#if defined(CPPUDDLE_HAVE_HPX) && defined(CPPUDDLE_HAVE_HPX_MUTEX)
+
+
35#include <hpx/mutex.hpp>
+
+
+
38#ifdef CPPUDDLE_HAVE_COUNTERS
+
39#include <boost/core/demangle.hpp>
+
40#if defined(CPPUDDLE_HAVE_HPX)
+
41#include <hpx/include/performance_counters.hpp>
+
+
+
+
+
+
+
48namespace memory_recycling {
+
+
50namespace device_selection {
+
+
+
+
+
+
+
58 throw std::runtime_error(
+
59 "Allocators used in Multi-GPU builds need explicit Multi-GPU support "
+
60 "(by having a select_device_functor overload");
+
+
+
+
+
+
+
+
+
+
+
+
+
70#if defined(CPPUDDLE_DEACTIVATE_BUFFER_RECYCLING)
+
+
+
+
74"Warning: Building without buffer recycling! Use only for performance testing! \
+
75For better performance configure CPPuddle with CPPUDDLE_WITH_BUFFER_RECYCLING=ON!"
+
+
77 template <
typename T,
typename Host_Allocator>
+
78 static T *
get(
size_t number_elements,
bool manage_content_lifetime =
false,
+
79 std::optional<size_t> location_hint = std::nullopt,
+
80 std::optional<size_t> device_id = std::nullopt) {
+
+
82 return Host_Allocator{}.allocate(number_elements);
+
+
85 template <
typename T,
typename Host_Allocator>
+
86 static void mark_unused(T *p,
size_t number_elements,
+
87 std::optional<size_t> location_hint = std::nullopt,
+
88 std::optional<size_t> device_id = std::nullopt) {
+
89 return Host_Allocator{}.deallocate(p, number_elements);
+
+
+
96 template <
typename T,
typename Host_Allocator>
+
+
97 static T *
get(
size_t number_elements,
bool manage_content_lifetime =
false,
+
98 std::optional<size_t> location_hint = std::nullopt,
+
99 std::optional<size_t> device_id = std::nullopt) {
+
+
101 return buffer_manager<T, Host_Allocator>::get(
+
102 number_elements, manage_content_lifetime, location_hint, device_id);
+
103 }
catch (
const std::exception &exc) {
+
104 std::cerr <<
"ERROR: Encountered unhandled exception in cppuddle get: " << exc.what() << std::endl;
+
105 std::cerr <<
"Rethrowing exception... " << std::endl;;
+
+
+
+
+
113 template <
typename T,
typename Host_Allocator>
+
+
+
115 std::optional<size_t> location_hint = std::nullopt,
+
116 std::optional<size_t> device_id = std::nullopt) {
+
+
118 return buffer_manager<T, Host_Allocator>::mark_unused(p, number_elements,
+
119 location_hint, device_id);
+
120 }
catch (
const std::exception &exc) {
+
121 std::cerr <<
"ERROR: Encountered unhandled exception in cppuddle mark_unused: " << exc.what() << std::endl;
+
122 std::cerr <<
"Rethrowing exception... " << std::endl;;
+
+
+
+
+
+
128 template <
typename T,
typename Host_Allocator>
+
+
+
130#ifdef CPPUDDLE_HAVE_COUNTERS
+
131 buffer_manager<T, Host_Allocator>::register_counters_with_hpx();
+
+
133 std::cerr <<
"Warning: Trying to register allocator performance counters "
+
134 "with HPX but CPPuddle was built "
+
135 "without CPPUDDLE_WITH_COUNTERS -- operation will be ignored!"
+
+
+
+
+
+
+
+
142 std::lock_guard<mutex_t> guard(instance().callback_protection_mut);
+
143 for (
const auto &clean_function :
+
144 instance().total_cleanup_callbacks) {
+
+
+
+
+
+
+
150 std::lock_guard<mutex_t> guard(instance().callback_protection_mut);
+
151 for (
const auto &clean_function :
+
152 instance().partial_cleanup_callbacks) {
+
+
+
+
+
+
+
158 std::lock_guard<mutex_t> guard(instance().callback_protection_mut);
+
159 for (
const auto &finalize_function :
+
160 instance().finalize_callbacks) {
+
+
+
+
+
+
+
+
166#ifdef CPPUDDLE_HAVE_COUNTERS
+
167 std::lock_guard<mutex_t> guard(instance().callback_protection_mut);
+
168 for (
const auto &print_function :
+
169 instance().print_callbacks) {
+
+
+
+
173 std::cerr <<
"Warning: Trying to print allocator performance counters but CPPuddle was built "
+
174 "without CPPUDDLE_WITH_COUNTERS -- operation will be ignored!"
+
+
+
+
+
+
+
+
+
+
+
+
+
188 std::list<std::function<void()>> print_callbacks;
+
191 std::list<std::function<void()>> finalize_callbacks;
+
194 std::list<std::function<void()>> total_cleanup_callbacks;
+
197 std::list<std::function<void()>> partial_cleanup_callbacks;
+
200 buffer_interface() =
default;
+
+
202 mutex_t callback_protection_mut;
+
204 static void add_total_cleanup_callback(
const std::function<
void()> &func) {
+
205 std::lock_guard<mutex_t> guard(instance().callback_protection_mut);
+
206 instance().total_cleanup_callbacks.push_back(func);
+
+
210 static void add_partial_cleanup_callback(
const std::function<
void()> &func) {
+
211 std::lock_guard<mutex_t> guard(instance().callback_protection_mut);
+
212 instance().partial_cleanup_callbacks.push_back(func);
+
+
216 static void add_finalize_callback(
const std::function<
void()> &func) {
+
217 std::lock_guard<mutex_t> guard(instance().callback_protection_mut);
+
218 instance().finalize_callbacks.push_back(func);
+
+
222 static void add_print_callback(
const std::function<
void()> &func) {
+
223 std::lock_guard<mutex_t> guard(instance().callback_protection_mut);
+
224 instance().print_callbacks.push_back(func);
+
+
+
+
+
+
+
+
233 template <
typename T,
typename Host_Allocator>
class buffer_manager {
+
+
+
+
+
238 using buffer_entry_type = std::tuple<T *, size_t, size_t, bool>;
+
+
+
+
243 static void clean() {
+
244 assert(instance() && !is_finalized);
+
+
246 std::lock_guard<mutex_t> guard(instance()[i].mut);
+
247 instance()[i].clean_all_buffers();
+
+
+
250 static void print_performance_counters() {
+
251 assert(instance() && !is_finalized);
+
+
253 std::lock_guard<mutex_t> guard(instance()[i].mut);
+
254 instance()[i].print_counters();
+
+
+
257 static void finalize() {
+
258 assert(instance() && !is_finalized);
+
+
+
261 std::lock_guard<mutex_t> guard(instance()[i].mut);
+
262 instance()[i].clean_all_buffers();
+
+
+
+
267 static void clean_unused_buffers_only() {
+
268 assert(instance() && !is_finalized);
+
+
270 std::lock_guard<mutex_t> guard(instance()[i].mut);
+
271 for (
auto &buffer_tuple : instance()[i].unused_buffer_list) {
+
272 Host_Allocator alloc;
+
273 if (std::get<3>(buffer_tuple)) {
+
274 std::destroy_n(std::get<0>(buffer_tuple), std::get<1>(buffer_tuple));
+
+
276 alloc.deallocate(std::get<0>(buffer_tuple), std::get<1>(buffer_tuple));
+
+
278 instance()[i].unused_buffer_list.clear();
+
+
+
281#if defined(CPPUDDLE_HAVE_COUNTERS) && defined(CPPUDDLE_HAVE_HPX)
+
282 static size_t get_sum_number_recycling(
bool reset) {
+
+
284 sum_number_recycling = 0;
+
285 return sum_number_recycling;
+
+
287 static size_t get_sum_number_allocation(
bool reset) {
+
+
289 sum_number_allocation = 0;
+
290 return sum_number_allocation;
+
+
292 static size_t get_sum_number_creation(
bool reset) {
+
+
294 sum_number_creation = 0;
+
295 return sum_number_creation;
+
+
297 static size_t get_sum_number_deallocation(
bool reset) {
+
+
299 sum_number_deallocation = 0;
+
300 return sum_number_deallocation;
+
+
302 static size_t get_sum_number_wrong_hints(
bool reset) {
+
+
304 sum_number_wrong_hints = 0;
+
305 return sum_number_wrong_hints;
+
+
307 static size_t get_sum_number_wrong_device_hints(
bool reset) {
+
+
309 sum_number_wrong_hints = 0;
+
310 return sum_number_wrong_device_hints;
+
+
312 static size_t get_sum_number_bad_allocs(
bool reset) {
+
+
314 sum_number_bad_allocs = 0;
+
315 return sum_number_bad_allocs;
+
+
+
318 static void register_counters_with_hpx(
void) {
+
319 std::string alloc_name =
+
320 boost::core::demangle(
typeid(Host_Allocator).name()) +
+
321 std::string(
"_") + boost::core::demangle(
typeid(T).name());
+
322 hpx::performance_counters::install_counter_type(
+
323 std::string(
"/cppuddle/allocators/") + alloc_name + std::string(
"/number_recycling/"),
+
324 &get_sum_number_recycling,
+
325 "Number of allocations using a recycled buffer with this "
+
+
327 hpx::performance_counters::install_counter_type(
+
328 std::string(
"/cppuddle/allocators/") + alloc_name + std::string(
"/number_allocations/"),
+
329 &get_sum_number_allocation,
+
330 "Number of allocations with this allocator");
+
331 hpx::performance_counters::install_counter_type(
+
332 std::string(
"/cppuddle/allocators/") + alloc_name + std::string(
"/number_creations/"),
+
333 &get_sum_number_creation,
+
334 "Number of allocations not using a recycled buffer with this "
+
+
336 hpx::performance_counters::install_counter_type(
+
337 std::string(
"/cppuddle/allocators/") + alloc_name + std::string(
"/number_deallocations/"),
+
338 &get_sum_number_deallocation,
+
339 "Number of deallocations yielding buffers to be recycled with this "
+
+
341 hpx::performance_counters::install_counter_type(
+
342 std::string(
"/cppuddle/allocators/") + alloc_name + std::string(
"/number_wrong_hints/"),
+
343 &get_sum_number_wrong_hints,
+
344 "Number of wrong hints supplied to the dealloc method with this allocator");
+
345 hpx::performance_counters::install_counter_type(
+
346 std::string(
"/cppuddle/allocators/") + alloc_name + std::string(
"/number_wrong_device_hints/"),
+
347 &get_sum_number_wrong_device_hints,
+
348 "Number of wrong device hints supplied to the dealloc method with this allocator");
+
349 hpx::performance_counters::install_counter_type(
+
350 std::string(
"/cppuddle/allocators/") + alloc_name + std::string(
"/number_bad_allocs/"),
+
351 &get_sum_number_bad_allocs,
+
352 "Number of wrong bad allocs which triggered a cleanup of unused buffers");
+
+
+
+
357 static T *get(
size_t number_of_elements,
bool manage_content_lifetime,
+
358 std::optional<size_t> location_hint = std::nullopt,
+
359 std::optional<size_t> gpu_device_id = std::nullopt) {
+
360 init_callbacks_once();
+
+
362 throw std::runtime_error(
"Tried allocation after finalization");
+
+
364 assert(instance() && !is_finalized);
+
+
366 size_t location_id = 0;
+
+
368 location_id = *location_hint;
+
+
+
371 throw std::runtime_error(
"Tried to create buffer with invalid location_id [get]");
+
+
373 size_t device_id = 0;
+
+
375 device_id = *gpu_device_id;
+
+
+
378 throw std::runtime_error(
"Tried to create buffer with invalid device id [get]! "
+
379 "Is multigpu support enabled with the correct number "
+
+
+
+
+
384 std::lock_guard<mutex_t> guard(instance()[location_id].mut);
+
+
+
387#ifdef CPPUDDLE_HAVE_COUNTERS
+
388 instance()[location_id].number_allocation++;
+
389 sum_number_allocation++;
+
+
+
392 for (
auto iter = instance()[location_id].unused_buffer_list.begin();
+
393 iter != instance()[location_id].unused_buffer_list.end(); iter++) {
+
+
395 if (std::get<1>(tuple) == number_of_elements) {
+
396 instance()[location_id].unused_buffer_list.erase(iter);
+
+
+
+
400 if (manage_content_lifetime && !std::get<3>(tuple)) {
+
401 std::uninitialized_value_construct_n(std::get<0>(tuple),
+
+
403 std::get<3>(tuple) =
true;
+
404 }
else if (!manage_content_lifetime && std::get<3>(tuple)) {
+
405 std::destroy_n(std::get<0>(tuple), std::get<1>(tuple));
+
406 std::get<3>(tuple) =
false;
+
+
408 instance()[location_id].buffer_map.insert({std::get<0>(tuple), tuple});
+
409#ifdef CPPUDDLE_HAVE_COUNTERS
+
410 instance()[location_id].number_recycling++;
+
411 sum_number_recycling++;
+
+
413 return std::get<0>(tuple);
+
+
+
+
+
+
+
420 T, Host_Allocator>{}(device_id);
+
421 Host_Allocator alloc;
+
422 T *buffer = alloc.allocate(number_of_elements);
+
423 instance()[location_id].buffer_map.insert(
+
424 {buffer, std::make_tuple(buffer, number_of_elements, 1,
+
425 manage_content_lifetime)});
+
426#ifdef CPPUDDLE_HAVE_COUNTERS
+
427 instance()[location_id].number_creation++;
+
428 sum_number_creation++;
+
+
430 if (manage_content_lifetime) {
+
431 std::uninitialized_value_construct_n(buffer, number_of_elements);
+
+
+
434 }
catch (std::bad_alloc &e) {
+
+
+
437 <<
"Not enough memory left. Cleaning up unused buffers now..."
+
+
+
440 std::cerr <<
"Buffers cleaned! Try allocation again..." << std::endl;
+
+
+
+
444 Host_Allocator alloc;
+
+
446 T, Host_Allocator>{}(device_id);
+
447 T *buffer = alloc.allocate(number_of_elements);
+
448 instance()[location_id].buffer_map.insert(
+
449 {buffer, std::make_tuple(buffer, number_of_elements, 1,
+
450 manage_content_lifetime)});
+
451#ifdef CPPUDDLE_HAVE_COUNTERS
+
452 instance()[location_id].number_creation++;
+
453 sum_number_creation++;
+
454 instance()[location_id].number_bad_alloc++;
+
455 sum_number_bad_allocs++;
+
+
457 std::cerr <<
"Second attempt allocation successful!" << std::endl;
+
458 if (manage_content_lifetime) {
+
459 std::uninitialized_value_construct_n(buffer, number_of_elements);
+
+
+
+
+
+
465 static void mark_unused(T *memory_location,
size_t number_of_elements,
+
466 std::optional<size_t> location_hint = std::nullopt,
+
467 std::optional<size_t> device_hint = std::nullopt) {
+
+
+
470 assert(instance() && !is_finalized);
+
+
472 size_t location_id = 0;
+
+
474 location_id = *location_hint;
+
+
476 throw std::runtime_error(
+
477 "Buffer recylcer received invalid location hint [mark_unused]");
+
+
+
480 size_t device_id = 0;
+
+
482 device_id = *device_hint;
+
+
484 throw std::runtime_error(
+
485 "Buffer recylcer received invalid devce hint [mark_unused]");
+
+
+
+
+
+
+
492 std::lock_guard<mutex_t> guard(instance()[location_id].mut);
+
493 if (instance()[location_id].buffer_map.find(memory_location) !=
+
494 instance()[location_id].buffer_map.end()) {
+
495#ifdef CPPUDDLE_HAVE_COUNTERS
+
496 instance()[location_id].number_deallocation++;
+
497 sum_number_deallocation++;
+
+
499 auto it = instance()[location_id].buffer_map.find(memory_location);
+
500 assert(it != instance()[location_id].buffer_map.end());
+
501 auto &tuple = it->second;
+
+
503 assert(std::get<1>(tuple) == number_of_elements);
+
+
505 instance()[location_id].unused_buffer_list.push_front(tuple);
+
506 instance()[location_id].buffer_map.erase(memory_location);
+
+
+
+
510#ifdef CPPUDDLE_HAVE_COUNTERS
+
511 instance()[location_id].number_wrong_hints++;
+
512 sum_number_wrong_hints++;
+
+
+
+
+
+
+
+
+
+
+
+
524 std::lock_guard<mutex_t> guard(instance()[location_id].mut);
+
525 if (instance()[location_id].buffer_map.find(memory_location) !=
+
526 instance()[location_id].buffer_map.end()) {
+
527#ifdef CPPUDDLE_HAVE_COUNTERS
+
528 instance()[location_id].number_deallocation++;
+
529 sum_number_deallocation++;
+
+
531 auto it = instance()[location_id].buffer_map.find(memory_location);
+
532 assert(it != instance()[location_id].buffer_map.end());
+
533 auto &tuple = it->second;
+
+
535 assert(std::get<1>(tuple) == number_of_elements);
+
+
537 instance()[location_id].unused_buffer_list.push_front(tuple);
+
538 instance()[location_id].buffer_map.erase(memory_location);
+
+
+
+
+
543#ifdef CPPUDDLE_HAVE_COUNTERS
+
+
545 sum_number_wrong_device_hints++;
+
+
+
+
+
+
+
552 if (local_device_id == device_id)
+
+
+
+
+
557 size_t location_id = location_hint.value() + local_device_id *
number_instances;
+
558 std::lock_guard<mutex_t> guard(instance()[location_id].mut);
+
559 if (instance()[location_id].buffer_map.find(memory_location) !=
+
560 instance()[location_id].buffer_map.end()) {
+
561#ifdef CPPUDDLE_HAVE_COUNTERS
+
562 instance()[location_id].number_deallocation++;
+
563 sum_number_deallocation++;
+
+
565 auto it = instance()[location_id].buffer_map.find(memory_location);
+
566 assert(it != instance()[location_id].buffer_map.end());
+
567 auto &tuple = it->second;
+
+
569 assert(std::get<1>(tuple) == number_of_elements);
+
+
571 instance()[location_id].unused_buffer_list.push_front(tuple);
+
572 instance()[location_id].buffer_map.erase(memory_location);
+
+
+
+
+
+
+
+
580 if (*location_hint + local_device_id *
max_number_gpus == location_id) {
+
+
+
+
584 std::lock_guard<mutex_t> guard(instance()[location_id].mut);
+
585 if (instance()[location_id].buffer_map.find(memory_location) !=
+
586 instance()[location_id].buffer_map.end()) {
+
587#ifdef CPPUDDLE_HAVE_COUNTERS
+
588 instance()[location_id].number_deallocation++;
+
589 sum_number_deallocation++;
+
+
591 auto it = instance()[location_id].buffer_map.find(memory_location);
+
592 assert(it != instance()[location_id].buffer_map.end());
+
593 auto &tuple = it->second;
+
+
595 assert(std::get<1>(tuple) == number_of_elements);
+
+
597 instance()[location_id].unused_buffer_list.push_front(tuple);
+
598 instance()[location_id].buffer_map.erase(memory_location);
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
616 <<
"Warning! Tried to delete non-existing buffer within CPPuddle!"
+
+
618 std::cerr <<
"Did you forget to call recycler::finalize?" << std::endl;
+
+
+
+
623 std::unordered_map<T *, buffer_entry_type> buffer_map{};
+
625 std::list<buffer_entry_type> unused_buffer_list{};
+
+
628#ifdef CPPUDDLE_HAVE_COUNTERS
+
630 size_t number_allocation{0}, number_deallocation{0}, number_wrong_hints{0},
+
631 number_recycling{0}, number_creation{0}, number_bad_alloc{0};
+
+
633 static inline std::atomic<size_t> sum_number_allocation{0},
+
634 sum_number_deallocation{0}, sum_number_wrong_hints{0},
+
635 sum_number_wrong_device_hints{0}, sum_number_recycling{0},
+
636 sum_number_creation{0}, sum_number_bad_allocs{0};
+
+
640 buffer_manager() =
default;
+
+
642 operator=(buffer_manager<T, Host_Allocator>
const &other) =
default;
+
+
644 operator=(buffer_manager<T, Host_Allocator> &&other) =
delete;
+
645 static std::unique_ptr<buffer_manager[]>& instance(
void) {
+
646 static std::unique_ptr<buffer_manager[]> instances{
+
+
+
+
650 static void init_callbacks_once(
void) {
+
+
652#if defined(CPPUDDLE_HAVE_HPX) && defined(CPPUDDLE_HAVE_HPX_MUTEX)
+
653 static hpx::once_flag flag;
+
654 hpx::call_once(flag, []() {
+
+
656 static std::once_flag flag;
+
657 std::call_once(flag, []() {
+
+
659 is_finalized =
false;
+
660 buffer_interface::add_total_cleanup_callback(clean);
+
661 buffer_interface::add_partial_cleanup_callback(
+
662 clean_unused_buffers_only);
+
663 buffer_interface::add_finalize_callback(
+
+
665#ifdef CPPUDDLE_HAVE_COUNTERS
+
666 buffer_interface::add_print_callback(
+
667 print_performance_counters);
+
+
+
+
671 static inline std::atomic<bool> is_finalized;
+
+
673#ifdef CPPUDDLE_HAVE_COUNTERS
+
674 void print_counters(
void) {
+
675 if (number_allocation == 0)
+
+
+
678 size_t number_cleaned = unused_buffer_list.size() + buffer_map.size();
+
679 std::cout <<
"\nBuffer manager destructor for (Alloc: "
+
680 << boost::core::demangle(
typeid(Host_Allocator).name()) <<
", Type: "
+
681 << boost::core::demangle(
typeid(T).name())
+
+
683 <<
"--------------------------------------------------------------------"
+
+
685 <<
"--> Number of bad_allocs that triggered garbage "
+
+
687 << number_bad_alloc << std::endl
+
688 <<
"--> Number of buffers that got requested from this "
+
+
690 << number_allocation << std::endl
+
691 <<
"--> Number of times an unused buffer got recycled for a "
+
+
693 << number_recycling << std::endl
+
694 <<
"--> Number of times a new buffer had to be created for a "
+
+
696 << number_creation << std::endl
+
697 <<
"--> Number cleaned up buffers: "
+
+
699 << number_cleaned << std::endl
+
700 <<
"--> Number wrong deallocation hints: "
+
+
702 << number_wrong_hints << std::endl
+
703 <<
"--> Number of buffers that were marked as used upon "
+
+
705 << buffer_map.size() << std::endl
+
706 <<
"==> Recycle rate: "
+
+
708 <<
static_cast<float>(number_recycling) / number_allocation *
+
+
+
+
+
+
714 void clean_all_buffers(
void) {
+
715#ifdef CPPUDDLE_HAVE_COUNTERS
+
716 if (number_allocation == 0 && number_recycling == 0 &&
+
717 number_bad_alloc == 0 && number_creation == 0 &&
+
718 unused_buffer_list.empty() && buffer_map.empty()) {
+
+
+
+
722 for (
auto &buffer_tuple : unused_buffer_list) {
+
723 Host_Allocator alloc;
+
724 if (std::get<3>(buffer_tuple)) {
+
725 std::destroy_n(std::get<0>(buffer_tuple), std::get<1>(buffer_tuple));
+
+
727 alloc.deallocate(std::get<0>(buffer_tuple), std::get<1>(buffer_tuple));
+
+
729 for (
auto &map_tuple : buffer_map) {
+
730 auto buffer_tuple = map_tuple.second;
+
731 Host_Allocator alloc;
+
732 if (std::get<3>(buffer_tuple)) {
+
733 std::destroy_n(std::get<0>(buffer_tuple), std::get<1>(buffer_tuple));
+
+
735 alloc.deallocate(std::get<0>(buffer_tuple), std::get<1>(buffer_tuple));
+
+
737 unused_buffer_list.clear();
+
+
739#ifdef CPPUDDLE_HAVE_COUNTERS
+
740 number_allocation = 0;
+
741 number_recycling = 0;
+
742 number_bad_alloc = 0;
+
+
744 number_wrong_hints = 0;
+
+
+
+
+
+
+
+
+
+
+
+
756 buffer_manager<T, Host_Allocator>
const &other) =
delete;
+
+
758 buffer_manager<T, Host_Allocator> &&other) =
delete;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
773 static_assert(std::is_same_v<value_type, typename underlying_allocator_type::value_type>);
+
+
+
+
777#ifndef CPPUDDLE_HAVE_HPX_AWARE_ALLOCATORS
+
+
+
779 : dealloc_hint(std::nullopt), device_id(std::nullopt) {}
+
+
+
+
781 : dealloc_hint(std::nullopt), device_id(std::nullopt) {}
+
+
+
+
+
784 : dealloc_hint(std::nullopt), device_id(std::nullopt) {}
+
+
+
+
786 T *data = buffer_interface::get<T, Host_Allocator>(n);
+
+
+
+
+
+
790 buffer_interface::mark_unused<T, Host_Allocator>(p, n);
+
+
+
+
+
794 : dealloc_hint(
hpx::get_worker_thread_num() % number_instances), device_id(0) {}
+
795 explicit recycle_allocator(
const size_t device_id) noexcept
+
796 : dealloc_hint(hpx::get_worker_thread_num() % number_instances), device_id(device_id) {}
+
797 explicit recycle_allocator(
const size_t device_i,
const size_t location_id) noexcept
+
798 : dealloc_hint(location_id), device_id(device_id) {}
+
799 explicit recycle_allocator(
+
800 recycle_allocator<T, Host_Allocator>
const &other) noexcept
+
801 : dealloc_hint(other.dealloc_hint), device_id(other.device_id) {}
+
802 T *allocate(std::size_t n) {
+
803 T *data = buffer_interface::get<T, Host_Allocator>(
+
804 n,
false, hpx::get_worker_thread_num() % number_instances, device_id);
+
+
+
807 void deallocate(T *p, std::size_t n) {
+
808 buffer_interface::mark_unused<T, Host_Allocator>(p, n, dealloc_hint,
+
+
+
+
+
813 template <
typename... Args>
+
+
+
815 ::new (
static_cast<void *
>(p)) T(std::forward<Args>(args)...);
+
+
+
+
+
+
819template <
typename T,
typename U,
typename Host_Allocator>
+
+
+
+
+
823 if constexpr (std::is_same_v<T, U>)
+
+
+
+
+
+
828template <
typename T,
typename U,
typename Host_Allocator>
+
+
+
+
+
832 if constexpr (std::is_same_v<T, U>)
+
+
+
+
+
+
+
839template <
typename T,
typename Host_Allocator>
+
+
+
+
+
843 static_assert(std::is_same_v<value_type, typename underlying_allocator_type::value_type>);
+
+
+
+
847#ifndef CPPUDDLE_HAVE_HPX_AWARE_ALLOCATORS
+
+
+
849 : dealloc_hint(std::nullopt), device_id(std::nullopt) {}
+
+
+
+
851 : dealloc_hint(std::nullopt), device_id(std::nullopt) {}
+
+
+
+
+
854 : dealloc_hint(std::nullopt), device_id(std::nullopt) {}
+
+
+
+
856 T *data = buffer_interface::get<T, Host_Allocator>(
+
+
+
+
+
+
+
861 buffer_interface::mark_unused<T, Host_Allocator>(p, n);
+
+
+
+
+
865 : dealloc_hint(
hpx::get_worker_thread_num() % number_instances), device_id(0) {}
+
866 explicit aggressive_recycle_allocator(
const size_t device_id) noexcept
+
867 : dealloc_hint(hpx::get_worker_thread_num() % number_instances), device_id(device_id) {}
+
868 explicit aggressive_recycle_allocator(
const size_t device_id,
const size_t location_id) noexcept
+
869 : dealloc_hint(location_id), device_id(device_id) {}
+
870 explicit aggressive_recycle_allocator(
+
871 recycle_allocator<T, Host_Allocator>
const &other) noexcept
+
872 : dealloc_hint(other.dealloc_hint), device_id(other.device_id) {}
+
873 T *allocate(std::size_t n) {
+
874 T *data = buffer_interface::get<T, Host_Allocator>(
+
875 n,
true, dealloc_hint, device_id);
+
+
+
+
879 void deallocate(T *p, std::size_t n) {
+
880 buffer_interface::mark_unused<T, Host_Allocator>(p, n, dealloc_hint,
+
+
+
+
+
885#ifndef CPPUDDLE_DEACTIVATE_AGGRESSIVE_ALLOCATORS
+
886 template <
typename... Args>
+
+
+
+
+
+
897"Warning: Building without content reusage for aggressive allocators! \
+
898For better performance configure with CPPUDDLE_WITH_AGGRESSIVE_CONTENT_RECYCLING=ON !"
+
899 template <
typename... Args>
+
900 inline void construct(T *p, Args... args)
noexcept {
+
901 ::new (
static_cast<void *
>(p)) T(std::forward<Args>(args)...);
+
+
903 void destroy(T *p) { p->~T(); }
+
+
+
+
+
907template <
typename T,
typename U,
typename Host_Allocator>
+
+
+
+
+
911 if constexpr (std::is_same_v<T, U>)
+
+
+
+
+
+
916template <
typename T,
typename U,
typename Host_Allocator>
+
+
+
+
+
920 if constexpr (std::is_same_v<T, U>)
+
+
+
+
+
+
+
+
+
+
+
+
Singleton interface to all buffer_managers.
Definition buffer_management.hpp:68
+
static void print_performance_counters()
Definition buffer_management.hpp:165
~buffer_interface()=default
-
static void register_allocator_counters_with_hpx(void)
Register all CPPuddle counters as HPX performance counters.
Definition: buffer_management.hpp:129
+
static void register_allocator_counters_with_hpx(void)
Register all CPPuddle counters as HPX performance counters.
Definition buffer_management.hpp:129
+
buffer_interface & operator=(buffer_interface &&other)=delete
+
buffer_interface & operator=(buffer_interface const &other)=delete
buffer_interface(buffer_interface const &other)=delete
-
static T * get(size_t number_elements, bool manage_content_lifetime=false, std::optional< size_t > location_hint=std::nullopt, std::optional< size_t > device_id=std::nullopt)
Definition: buffer_management.hpp:97
-
static void clean_all()
Deallocate all buffers, no matter whether they are marked as used or not.
Definition: buffer_management.hpp:141
-
buffer_interface & operator=(buffer_interface &&other)=delete
-
buffer_interface & operator=(buffer_interface const &other)=delete
-
static void finalize()
Deallocate all buffers, no matter whether they are marked as used or not.
Definition: buffer_management.hpp:157
+
static void clean_all()
Deallocate all buffers, no matter whether they are marked as used or not.
Definition buffer_management.hpp:141
+
static T * get(size_t number_elements, bool manage_content_lifetime=false, std::optional< size_t > location_hint=std::nullopt, std::optional< size_t > device_id=std::nullopt)
Definition buffer_management.hpp:97
+
static void finalize()
Deallocate all buffers, no matter whether they are marked as used or not.
Definition buffer_management.hpp:157
buffer_interface(buffer_interface &&other)=delete
-
static void clean_unused_buffers()
Deallocated all currently unused buffer.
Definition: buffer_management.hpp:149
-
static void mark_unused(T *p, size_t number_elements, std::optional< size_t > location_hint=std::nullopt, std::optional< size_t > device_id=std::nullopt)
Definition: buffer_management.hpp:114
+
static void clean_unused_buffers()
Deallocated all currently unused buffer.
Definition buffer_management.hpp:149
+
static void mark_unused(T *p, size_t number_elements, std::optional< size_t > location_hint=std::nullopt, std::optional< size_t > device_id=std::nullopt)
Definition buffer_management.hpp:114
-
constexpr bool operator!=(recycle_allocator< T, Host_Allocator > const &, recycle_allocator< U, Host_Allocator > const &) noexcept
Definition: buffer_management.hpp:830
-
constexpr bool operator==(recycle_allocator< T, Host_Allocator > const &, recycle_allocator< U, Host_Allocator > const &) noexcept
Definition: buffer_management.hpp:821
-
Definition: config.hpp:31
-
constexpr size_t max_number_gpus
Definition: config.hpp:52
-
std::mutex mutex_t
Definition: config.hpp:36
-
constexpr size_t number_instances
Definition: config.hpp:50
-
Definition: aggregation_executors_and_allocators.hpp:1042
-
Recycles not only allocations but also the contents of a buffer.
Definition: buffer_management.hpp:840
-
const std::optional< size_t > dealloc_hint
Definition: buffer_management.hpp:843
-
aggressive_recycle_allocator(size_t hint) noexcept
Definition: buffer_management.hpp:850
-
void destroy(T *p)
Definition: buffer_management.hpp:890
-
void deallocate(T *p, std::size_t n)
Definition: buffer_management.hpp:860
-
T value_type
Definition: buffer_management.hpp:841
-
void construct(T *p, Args... args) noexcept
Definition: buffer_management.hpp:887
-
T * allocate(std::size_t n)
Definition: buffer_management.hpp:855
-
aggressive_recycle_allocator(aggressive_recycle_allocator< T, Host_Allocator > const &) noexcept
Definition: buffer_management.hpp:852
-
const std::optional< size_t > device_id
Definition: buffer_management.hpp:845
-
Host_Allocator underlying_allocator_type
Definition: buffer_management.hpp:842
-
aggressive_recycle_allocator() noexcept
Definition: buffer_management.hpp:848
-
Definition: buffer_management.hpp:770
-
T * allocate(std::size_t n)
Definition: buffer_management.hpp:785
-
T value_type
Definition: buffer_management.hpp:771
-
recycle_allocator(size_t hint) noexcept
Definition: buffer_management.hpp:780
-
void destroy(T *p)
Definition: buffer_management.hpp:817
-
recycle_allocator() noexcept
Definition: buffer_management.hpp:778
-
Host_Allocator underlying_allocator_type
Definition: buffer_management.hpp:772
-
void construct(T *p, Args... args) noexcept
Definition: buffer_management.hpp:814
-
void deallocate(T *p, std::size_t n)
Definition: buffer_management.hpp:789
-
const std::optional< size_t > device_id
Definition: buffer_management.hpp:775
-
recycle_allocator(recycle_allocator< T, Host_Allocator > const &other) noexcept
Definition: buffer_management.hpp:782
-
const std::optional< size_t > dealloc_hint
Definition: buffer_management.hpp:773
-
Default device selector - No MultGPU support.
Definition: buffer_management.hpp:55
-
void operator()(const size_t device_id)
Definition: buffer_management.hpp:56
+
constexpr bool operator!=(recycle_allocator< T, Host_Allocator > const &, recycle_allocator< U, Host_Allocator > const &) noexcept
Definition buffer_management.hpp:830
+
constexpr bool operator==(recycle_allocator< T, Host_Allocator > const &, recycle_allocator< U, Host_Allocator > const &) noexcept
Definition buffer_management.hpp:821
+
+
constexpr size_t max_number_gpus
Definition config.hpp:52
+
std::mutex mutex_t
Definition config.hpp:36
+
constexpr size_t number_instances
Definition config.hpp:50
+
Definition aggregation_executors_and_allocators.hpp:1042
+
Recycles not only allocations but also the contents of a buffer.
Definition buffer_management.hpp:840
+
const std::optional< size_t > dealloc_hint
Definition buffer_management.hpp:844
+
aggressive_recycle_allocator(size_t hint) noexcept
Definition buffer_management.hpp:850
+
void destroy(T *p)
Definition buffer_management.hpp:890
+
void deallocate(T *p, std::size_t n)
Definition buffer_management.hpp:860
+
T value_type
Definition buffer_management.hpp:841
+
void construct(T *p, Args... args) noexcept
Definition buffer_management.hpp:887
+
aggressive_recycle_allocator(aggressive_recycle_allocator< T, Host_Allocator > const &) noexcept
Definition buffer_management.hpp:852
+
T * allocate(std::size_t n)
Definition buffer_management.hpp:855
+
const std::optional< size_t > device_id
Definition buffer_management.hpp:845
+
Host_Allocator underlying_allocator_type
Definition buffer_management.hpp:842
+
aggressive_recycle_allocator() noexcept
Definition buffer_management.hpp:848
+
Definition buffer_management.hpp:770
+
T * allocate(std::size_t n)
Definition buffer_management.hpp:785
+
T value_type
Definition buffer_management.hpp:771
+
recycle_allocator(size_t hint) noexcept
Definition buffer_management.hpp:780
+
void destroy(T *p)
Definition buffer_management.hpp:817
+
recycle_allocator() noexcept
Definition buffer_management.hpp:778
+
Host_Allocator underlying_allocator_type
Definition buffer_management.hpp:772
+
void construct(T *p, Args... args) noexcept
Definition buffer_management.hpp:814
+
void deallocate(T *p, std::size_t n)
Definition buffer_management.hpp:789
+
const std::optional< size_t > device_id
Definition buffer_management.hpp:775
+
recycle_allocator(recycle_allocator< T, Host_Allocator > const &other) noexcept
Definition buffer_management.hpp:782
+
const std::optional< size_t > dealloc_hint
Definition buffer_management.hpp:774
+
Default device selector - No MultGPU support.
Definition buffer_management.hpp:55
+
void operator()(const size_t device_id)
Definition buffer_management.hpp:56
diff --git a/buffer__management__interface_8hpp.html b/buffer__management__interface_8hpp.html
index 6e0228a8..99ceb813 100644
--- a/buffer__management__interface_8hpp.html
+++ b/buffer__management__interface_8hpp.html
@@ -1,9 +1,9 @@
-
+
diff --git a/buffer__management__interface_8hpp__dep__incl.map b/buffer__management__interface_8hpp__dep__incl.map
index 395b8574..1287e851 100644
--- a/buffer__management__interface_8hpp__dep__incl.map
+++ b/buffer__management__interface_8hpp__dep__incl.map
@@ -1,27 +1,62 @@
diff --git a/buffer__management__interface_8hpp__dep__incl.md5 b/buffer__management__interface_8hpp__dep__incl.md5
index 0a72efed..9016a3a9 100644
--- a/buffer__management__interface_8hpp__dep__incl.md5
+++ b/buffer__management__interface_8hpp__dep__incl.md5
@@ -1 +1 @@
-a97e0446a7ea24e79eda87be7636f32e
\ No newline at end of file
+6d470b385f648f01641342aa7ab38259
\ No newline at end of file
diff --git a/buffer__management__interface_8hpp__dep__incl.svg b/buffer__management__interface_8hpp__dep__incl.svg
index a63ec266..10f945ef 100644
--- a/buffer__management__interface_8hpp__dep__incl.svg
+++ b/buffer__management__interface_8hpp__dep__incl.svg
@@ -4,563 +4,661 @@
-
+
+
diff --git a/buffer__management__interface_8hpp__dep__incl_org.svg b/buffer__management__interface_8hpp__dep__incl_org.svg
index d1eba507..30e31138 100644
--- a/buffer__management__interface_8hpp__dep__incl_org.svg
+++ b/buffer__management__interface_8hpp__dep__incl_org.svg
@@ -4,475 +4,579 @@
-