SIRIUS 7.5.0
Electronic structure library and applications
memory.hpp
Go to the documentation of this file.
1// Copyright (c) 2013-2019 Anton Kozhevnikov, Thomas Schulthess
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without modification, are permitted provided that
5// the following conditions are met:
6//
7// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
8// following disclaimer.
9// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
10// and the following disclaimer in the documentation and/or other materials provided with the distribution.
11//
12// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
13// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
15// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
16// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
17// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
18// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
20/** \file memory.hpp
21 *
22 * \brief Memory management functions and classes.
23 */
24
25#ifndef __MEMORY_HPP__
26#define __MEMORY_HPP__
27
28#include <list>
29#include <iostream>
30#include <map>
31#include <memory>
32#include <cstring>
33#include <functional>
34#include <algorithm>
35#include <array>
36#include <complex>
37#include <cassert>
38
39#ifdef SIRIUS_USE_MEMORY_POOL
40#include <umpire/ResourceManager.hpp>
41#include <umpire/Allocator.hpp>
42#include <umpire/util/wrap_allocator.hpp>
43#include <umpire/strategy/DynamicPoolList.hpp>
44#include <umpire/strategy/AlignedAllocator.hpp>
45#endif
46
47#include "core/acc/acc.hpp"
48
49namespace sirius {
50
51namespace sddk {
52
53/// Check is the type is a complex number; by default it is not.
54template <typename T>
56{
57 constexpr static bool value{false};
58};
59
60/// Check is the type is a complex number: for std::complex<T> it is true.
61template <typename T>
62struct is_complex<std::complex<T>>
63{
64 constexpr static bool value{true};
65};
66
67/// Memory types where the code can store data.
68/** All memory types can be divided into two (possibly overlapping) groups: accessible by the CPU and accessible by the
69 * device. */
70enum class memory_t : unsigned int
71{
72 /// Nothing.
73 none = 0b0000,
74 /// Host memory.
75 host = 0b0001,
76 /// Pinned host memory. This is host memory + extra bit flag.
77 host_pinned = 0b0011,
78 /// Device memory.
79 device = 0b1000,
80 /// Managed memory (accessible from both host and device).
81 managed = 0b1101,
82};
83
84/// Check if this is a valid host memory (memory, accessible by the host).
85inline bool
87{
88 return static_cast<unsigned int>(mem__) & 0b0001;
89}
90
91/// Check if this is a valid device memory (memory, accessible by the device).
92inline bool
94{
95 return static_cast<unsigned int>(mem__) & 0b1000;
96}
97
98/// Get a memory type from a string.
99inline memory_t
100get_memory_t(std::string name__)
101{
102 std::transform(name__.begin(), name__.end(), name__.begin(), ::tolower);
103 std::map<std::string, memory_t> const m = {{"none", memory_t::none},
104 {"host", memory_t::host},
105 {"host_pinned", memory_t::host_pinned},
106 {"managed", memory_t::managed},
107 {"device", memory_t::device}};
108
109 if (m.count(name__) == 0) {
110 std::stringstream s;
111 s << "get_memory_t(): wrong label of the memory_t enumerator: " << name__;
112 throw std::runtime_error(s.str());
113 }
114 return m.at(name__);
115}
116
117/// Type of the main processing unit.
118/** List the processing units on which the code can run. */
119enum class device_t
120{
121 /// CPU device.
122 CPU = 0,
123
124 /// GPU device (with CUDA programming model).
125 GPU = 1
126};
127
128/// Get type of device by memory type.
129inline device_t
131{
132 switch (mem__) {
133 case memory_t::host:
134 case memory_t::host_pinned: {
135 return device_t::CPU;
136 }
137 case memory_t::device: {
138 return device_t::GPU;
139 }
140 default: {
141 throw std::runtime_error("get_device_t(): wrong memory type");
142 }
143 }
144 return device_t::CPU; // make compiler happy
145}
146
147/// Get device type from the string.
148inline device_t
149get_device_t(std::string name__)
150{
151 std::transform(name__.begin(), name__.end(), name__.begin(), ::tolower);
152 std::map<std::string, device_t> const m = {{"cpu", device_t::CPU}, {"gpu", device_t::GPU}};
153
154 if (m.count(name__) == 0) {
155 std::stringstream s;
156 s << "get_device_t(): wrong label of the device_t enumerator: " << name__;
157 throw std::runtime_error(s.str());
158 }
159 return m.at(name__);
160}
161
162/// Allocate n elements in a specified memory.
163/** Allocate a memory block of the memory_t type. Return a nullptr if this memory is not available, otherwise
164 * return a pointer to an allocated block. */
165template <typename T>
166inline T*
167allocate(size_t n__, memory_t M__)
168{
169 switch (M__) {
170 case memory_t::none: {
171 return nullptr;
172 }
173 case memory_t::host: {
174 return static_cast<T*>(std::malloc(n__ * sizeof(T)));
175 }
176 case memory_t::host_pinned: {
177#ifdef SIRIUS_GPU
178 return acc::allocate_host<T>(n__);
179#else
180 return nullptr;
181#endif
182 }
183 case memory_t::device: {
184#ifdef SIRIUS_GPU
185 return acc::allocate<T>(n__);
186#else
187 return nullptr;
188#endif
189 }
190 default: {
191 throw std::runtime_error("allocate(): unknown memory type");
192 }
193 }
194}
195
196/// Deallocate pointer of a given memory type.
197inline void
198deallocate(void* ptr__, memory_t M__)
199{
200 switch (M__) {
201 case memory_t::none: {
202 break;
203 }
204 case memory_t::host: {
205 std::free(ptr__);
206 break;
207 }
208 case memory_t::host_pinned: {
209#ifdef SIRIUS_GPU
211#endif
212 break;
213 }
214 case memory_t::device: {
215#ifdef SIRIUS_GPU
216 acc::deallocate(ptr__);
217#endif
218 break;
219 }
220 default: {
221 throw std::runtime_error("deallocate(): unknown memory type");
222 }
223 }
224}
225
226/// Copy between different memory types.
227template <typename T, typename F>
228inline void
229copy(memory_t from_mem__, T const* from_ptr__, memory_t to_mem__, F* to_ptr__, size_t n__)
230{
231 if (is_host_memory(to_mem__) && is_host_memory(from_mem__)) {
232 std::copy(from_ptr__, from_ptr__ + n__, to_ptr__);
233 return;
234 }
235#if defined(SIRIUS_GPU)
236 throw std::runtime_error("Copy mixed precision type not supported in device memory");
237 return;
238#endif
239}
240
241template <typename T>
242inline void
243copy(memory_t from_mem__, T const* from_ptr__, memory_t to_mem__, T* to_ptr__, size_t n__)
244{
245 if (is_host_memory(to_mem__) && is_host_memory(from_mem__)) {
246 std::copy(from_ptr__, from_ptr__ + n__, to_ptr__);
247 return;
248 }
249#if defined(SIRIUS_GPU)
250 if (is_device_memory(to_mem__) && is_device_memory(from_mem__)) {
251 acc::copy(to_ptr__, from_ptr__, n__);
252 return;
253 }
254 if (is_device_memory(to_mem__) && is_host_memory(from_mem__)) {
255 acc::copyin(to_ptr__, from_ptr__, n__);
256 return;
257 }
258 if (is_host_memory(to_mem__) && is_device_memory(from_mem__)) {
259 acc::copyout(to_ptr__, from_ptr__, n__);
260 return;
261 }
262#endif
263}
264
265/* forward declaration */
266class memory_pool;
267
268/// Base class for smart pointer deleters.
270{
271 protected:
273 {
274 public:
275 virtual void free(void* ptr__) = 0;
277 {
278 }
279 };
280 std::unique_ptr<memory_t_deleter_base_impl> impl_;
281
282 public:
283 void operator()(void* ptr__)
284 {
285 impl_->free(ptr__);
286 }
287};
288
289/// Deleter for the allocated memory pointer of a given type.
291{
292 protected:
294 {
295 protected:
296 memory_t M_{memory_t::none};
297
298 public:
300 : M_(M__)
301 {
302 }
303 inline void free(void* ptr__)
304 {
305 if (M_ != memory_t::none) {
306 sddk::deallocate(ptr__, M_);
307 }
308 }
309 };
310
311 public:
312 explicit memory_t_deleter(memory_t M__)
313 {
314 impl_ = std::unique_ptr<memory_t_deleter_base_impl>(new memory_t_deleter_impl(M__));
315 }
316};
317
318/// Deleter for the allocated memory pointer from a given memory pool.
320{
321 protected:
323 {
324 protected:
325 memory_pool* mp_{nullptr};
326
327 public:
329 : mp_(mp__)
330 {
331 }
332 inline void free(void* ptr__);
333 };
334
335 public:
336 explicit memory_pool_deleter(memory_pool* mp__)
337 {
338 impl_ = std::unique_ptr<memory_t_deleter_base_impl>(new memory_pool_deleter_impl(mp__));
339 }
340};
341
342/// Allocate n elements and return a unique pointer.
343template <typename T>
344inline std::unique_ptr<T, memory_t_deleter_base>
345get_unique_ptr(size_t n__, memory_t M__)
346{
347 return std::unique_ptr<T, memory_t_deleter_base>(allocate<T>(n__, M__), memory_t_deleter(M__));
348}
349
350//// Memory pool.
351/** This class stores list of allocated memory blocks. Each of the blocks can be divided into subblocks. When subblock
352 * is deallocated it is merged with previous or next free subblock in the memory block. If this was the last subblock
353 * in the block of memory, the (now) free block of memory is merged with the neighbours (if any are available).
354 */
356{
357 private:
358 /// Type of memory that is handeled by this pool.
360
361#ifdef SIRIUS_USE_MEMORY_POOL
362 /// handler to umpire allocator_
363 umpire::Allocator allocator_;
364 /// handler to umpire memory pool
365 umpire::Allocator memory_pool_allocator_;
366#endif
367 public:
368 /// Constructor
370 : M_(M__)
371 {
372 std::string mem_type;
373
374 // All examples in Umpire use upper case names.
375 switch (M__) {
376 case memory_t::host: {
377 mem_type = "HOST";
378 break;
379 }
380 case memory_t::host_pinned: {
381 mem_type = "PINNED";
382 break;
383 }
384 case memory_t::managed: {
385 mem_type = "MANAGED";
386 break;
387 }
388 case memory_t::device: {
389#ifdef SIRIUS_GPU
390 std::stringstream s;
391 s << "DEVICE::" << acc::get_device_id();
392 mem_type = s.str();
393#else
394 mem_type = "NONE";
395 M_ = memory_t::none;
396#endif
397 break;
398 }
399 case memory_t::none: {
400 mem_type = "NONE";
401 break;
402 }
403 default: {
404 break;
405 }
406 }
407#ifdef SIRIUS_USE_MEMORY_POOL
408 if (M_ != memory_t::none) {
409 auto& rm = umpire::ResourceManager::getInstance();
410 this->allocator_ = rm.getAllocator(mem_type);
411
412 if (M_ == memory_t::host) {
413 this->memory_pool_allocator_ =
414 rm.makeAllocator<umpire::strategy::AlignedAllocator>("aligned_allocator", this->allocator_, 256);
415 } else {
416 std::transform(mem_type.begin(), mem_type.end(), mem_type.begin(),
417 [](unsigned char c) { return std::tolower(c); });
418 this->memory_pool_allocator_ =
419 rm.makeAllocator<umpire::strategy::DynamicPoolList>(mem_type + "_dynamic_pool", allocator_);
420 }
421 }
422#endif
423 }
424
425 /// Return a pointer to a memory block for n elements of type T.
426 template <typename T>
427 T* allocate(size_t num_elements__)
428 {
429#if defined(SIRIUS_USE_MEMORY_POOL)
430 if (M_ == memory_t::none) {
431 return nullptr;
432 }
433
434 return static_cast<T*>(memory_pool_allocator_.allocate(num_elements__ * sizeof(T)));
435#else
436 return sddk::allocate<T>(num_elements__, M_);
437#endif
438 }
439
440 /// Delete a pointer and add its memory back to the pool.
441 void free(void* ptr__)
442 {
443#if defined(SIRIUS_USE_MEMORY_POOL)
444 if (M_ == memory_t::none) {
445 return;
446 }
447
448 memory_pool_allocator_.deallocate(ptr__);
449#else
450 sddk::deallocate(ptr__, M_);
451#endif
452 }
453
454 /// Return a unique pointer to the allocated memory.
455 template <typename T>
456 std::unique_ptr<T, memory_t_deleter_base> get_unique_ptr(size_t n__)
457 {
458#if defined(SIRIUS_USE_MEMORY_POOL)
459 return std::unique_ptr<T, memory_t_deleter_base>(this->allocate<T>(n__), memory_pool_deleter(this));
460#else
461 return sddk::get_unique_ptr<T>(n__, M_);
462#endif
463 }
464
465 /// Free all the allocated blocks. umpire does not support this
466 /** All pointers and smart pointers, allocated by the pool are invalidated. */
467 void reset()
468 {
469 }
470
471 /// shrink the memory pool and release all memory.
472 void clear()
473 {
474 if (M_ == memory_t::none) {
475 return;
476 }
477#if defined(SIRIUS_USE_MEMORY_POOL)
478 memory_pool_allocator_.release();
479#endif
480 }
481
482 /// Return the type of memory this pool is managing.
483 inline memory_t memory_type() const
484 {
485 return M_;
486 }
487
488 /// Return the total capacity of the memory pool.
489 size_t total_size() const
490 {
491#if defined(SIRIUS_USE_MEMORY_POOL)
492 if (M_ != memory_t::none) {
493 return memory_pool_allocator_.getActualSize();
494 }
495#endif
496 return 0;
497 }
498
499 /// Get the total free size of the memory pool.
500 size_t free_size() const
501 {
502#if defined(SIRIUS_USE_MEMORY_POOL)
503 if (M_ != memory_t::none) {
504 size_t s = memory_pool_allocator_.getActualSize() - memory_pool_allocator_.getCurrentSize();
505 return s;
506 }
507#endif
508 return 0;
509 }
510
511 /// Get the number of free memory blocks.
512 size_t num_blocks() const
513 {
514#if defined(SIRIUS_USE_MEMORY_POOL)
515 if (M_ != memory_t::none) {
516 auto dynamic_pool = umpire::util::unwrap_allocator<umpire::strategy::DynamicPoolList>(allocator_);
517 return dynamic_pool->getBlocksInPool();
518 }
519#endif
520 return 0;
521 }
522};
523void
524memory_pool_deleter::memory_pool_deleter_impl::free(void* ptr__)
525{
526 mp_->free(ptr__);
527}
528
529/// Return a memory pool.
530/** A memory pool is created when this function called for the first time. */
531sddk::memory_pool& get_memory_pool(sddk::memory_t M__);
532
533#ifdef NDEBUG
534#define mdarray_assert(condition__)
535#else
536#define mdarray_assert(condition__) \
537 { \
538 if (!(condition__)) { \
539 std::stringstream _s; \
540 _s << "Assertion (" << #condition__ << ") failed " \
541 << "at line " << __LINE__ << " of file " << __FILE__ << std::endl \
542 << "array label: " << label_ << std::endl; \
543 for (int i = 0; i < N; i++) { \
544 _s << "dims[" << i << "].size = " << dims_[i].size() << std::endl; \
545 } \
546 throw std::runtime_error(_s.str()); \
547 raise(SIGABRT); \
548 } \
549 }
550#endif
551
552/// Index descriptor of mdarray.
554{
555 public:
556 using index_type = int64_t;
557
558 private:
559 /// Beginning of index.
560 index_type begin_{0};
561
562 /// End of index.
563 index_type end_{-1};
564
565 /// Size of index.
566 size_t size_{0};
567
568 public:
569 /// Constructor of empty descriptor.
571 {
572 }
573
574 /// Constructor for index range [0, size).
575 mdarray_index_descriptor(size_t const size__)
576 : end_(size__ - 1)
577 , size_(size__)
578 {
579 }
580
581 /// Constructor for index range [begin, end]
582 mdarray_index_descriptor(index_type const begin__, index_type const end__)
583 : begin_(begin__)
584 , end_(end__)
585 , size_(end_ - begin_ + 1)
586 {
587 assert(end_ >= begin_);
588 };
589
590 /// Constructor for index range [begin, end]
591 mdarray_index_descriptor(std::pair<int, int> const range__)
592 : begin_(range__.first)
593 , end_(range__.second)
594 , size_(end_ - begin_ + 1)
595 {
596 assert(end_ >= begin_);
597 };
598
599 /// Return first index value.
600 inline index_type begin() const
601 {
602 return begin_;
603 }
604
605 /// Return last index value.
606 inline index_type end() const
607 {
608 return end_;
609 }
610
611 /// Return index size.
612 inline size_t size() const
613 {
614 return size_;
615 }
616
617 inline bool check_range([[maybe_unused]] index_type i__) const
618 {
619#ifdef NDEBUG
620 return true;
621#else
622 if (i__ < begin_ || i__ > end_) {
623 std::cout << "index " << i__ << " out of range [" << begin_ << ", " << end_ << "]" << std::endl;
624 return false;
625 } else {
626 return true;
627 }
628#endif
629 }
630};
631
632/// Multidimensional array with the column-major (Fortran) order.
633/** The implementation supports two memory pointers: one is accessible by CPU and second is accessible by a device.
634 The following constructors are implemented:
635 \code{.cpp}
636 // wrap a host memory pointer and create 2D array 10 x 20.
637 mdarray<T, 2>(ptr, 10, 20);
638
639 // wrap a host and device pointers
640 mdarray<T, 2>(ptr, ptr_d, 10, 20);
641
642 // wrap a device pointers only
643 mdarray<T, 2>(nullptr, ptr_d, 10, 20);
644
645 // create 10 x 20 2D array in main memory
646 mdarray<T, 2>(10, 20);
647
648 // create 10 x 20 2D array in device memory
649 mdarray<T, 2>(10, 20, memory_t::device);
650
651 // create from the pool memory (pool of any memory type is allowed)
652 memory_pool mp(memory_t::host);
653 mdarray<T, 2>(mp, 10, 20);
654 \endcode
655 The pointers can be wrapped only in constructor. Memory allocation can be done by a separate call to .allocate()
656 method.
657 */
658template <typename T, int N>
660{
661 public:
662 using index_type = mdarray_index_descriptor::index_type;
663
664 private:
665 /// Optional array label.
666 std::string label_;
667
668 /// Unique pointer to the allocated memory.
669 std::unique_ptr<T, memory_t_deleter_base> unique_ptr_{nullptr};
670
671 /// Raw pointer.
672 T* raw_ptr_{nullptr};
673#ifdef SIRIUS_GPU
674 /// Unique pointer to the allocated GPU memory.
675 std::unique_ptr<T, memory_t_deleter_base> unique_ptr_device_{nullptr};
676
677 /// Raw pointer to GPU memory
678 T* raw_ptr_device_{nullptr};
679#endif
680 /// Array dimensions.
681 std::array<mdarray_index_descriptor, N> dims_;
682
683 /// List of offsets to compute the element location by dimension indices.
684 std::array<index_type, N> offsets_;
685
686 /// Initialize the offsets used to compute the index of the elements.
687 void init_dimensions(std::array<mdarray_index_descriptor, N> const dims__)
688 {
689 dims_ = dims__;
690
691 offsets_[0] = -dims_[0].begin();
692 size_t ld{1};
693 for (int i = 1; i < N; i++) {
694 ld *= dims_[i - 1].size();
695 offsets_[i] = ld;
696 offsets_[0] -= ld * dims_[i].begin();
697 }
698 }
699
700 /// Return linear index in the range [0, size) by the N-dimensional indices (i0, i1, ...)
701 template <typename... Args>
702 inline index_type idx(Args... args) const
703 {
704 static_assert(N == sizeof...(args), "wrong number of dimensions");
705 std::array<index_type, N> i = {args...};
706
707 for (int j = 0; j < N; j++) {
708 // mdarray_assert(dims_[j].check_range(i[j]) && i[j] >= dims_[j].begin() && i[j] <= dims_[j].end());
709 mdarray_assert(dims_[j].check_range(i[j]));
710 }
711
712 size_t idx = offsets_[0] + i[0];
713 for (int j = 1; j < N; j++) {
714 idx += i[j] * offsets_[j];
715 }
716 mdarray_assert(idx >= 0 && idx < size());
717 return idx;
718 }
719
720 /// Return cosnt pointer to an element at a given index.
721 template <bool check_assert = true>
722 inline T const* at_idx(memory_t mem__, index_type const idx__) const
723 {
724 switch (mem__) {
725 case memory_t::host:
726 case memory_t::host_pinned: {
727 if (check_assert) {
728 mdarray_assert(raw_ptr_ != nullptr);
729 }
730 return &raw_ptr_[idx__];
731 }
732 case memory_t::device: {
733#ifdef SIRIUS_GPU
734 if (check_assert) {
735 mdarray_assert(raw_ptr_device_ != nullptr);
736 }
737 return &raw_ptr_device_[idx__];
738#else
739 std::printf("error at line %i of file %s: not compiled with GPU support\n", __LINE__, __FILE__);
740 throw std::runtime_error("");
741#endif
742 }
743 default: {
744 throw std::runtime_error("mdarray::at_idx(): wrong memory type");
745 }
746 }
747 return nullptr; // make compiler happy;
748 }
749
750 /// Return pointer to an element at a given index.
751 template <bool check_assert = true>
752 inline T* at_idx(memory_t mem__, index_type const idx__)
753 {
754 return const_cast<T*>(static_cast<mdarray<T, N> const&>(*this).at_idx<check_assert>(mem__, idx__));
755 }
756
757 // Call constructor on non-trivial data. Complex numbers are treated as trivial.
758 inline void call_constructor()
759 {
760 if (!(std::is_trivial<T>::value || is_complex<T>::value)) {
761 for (size_t i = 0; i < size(); i++) {
762 new (raw_ptr_ + i) T();
763 }
764 }
765 }
766
767 // Call destructor on non-trivial data. Complex numbers are treated as trivial.
768 inline void call_destructor()
769 {
770 if (!(std::is_trivial<T>::value || is_complex<T>::value)) {
771 for (size_t i = 0; i < this->size(); i++) {
772 (raw_ptr_ + i)->~T();
773 }
774 }
775 }
776
777 /// Copy constructor is forbidden
778 mdarray(mdarray<T, N> const& src) = delete;
779
780 /// Assignment operator is forbidden
781 mdarray<T, N>& operator=(mdarray<T, N> const& src) = delete;
782
783 public:
784 /// Default constructor.
786 {
787 }
788
789 /// Destructor.
791 {
792 deallocate(memory_t::host);
793 deallocate(memory_t::device);
794 }
795
796 /// N-dimensional array with index bounds.
797 mdarray(std::array<mdarray_index_descriptor, N> const dims__, memory_t memory__ = memory_t::host,
798 std::string label__ = "")
799 : label_(label__)
800 {
801 this->init_dimensions(dims__);
802 this->allocate(memory__);
803 }
804
805 /*
806 * 1D array constructors
807 *
808 */
809
810 /// 1D array with memory allocation.
811 mdarray(mdarray_index_descriptor const& d0, memory_t memory__ = memory_t::host, std::string label__ = "")
812 {
813 static_assert(N == 1, "wrong number of dimensions");
814
815 this->label_ = label__;
816 this->init_dimensions({d0});
817 this->allocate(memory__);
818 }
819
820 /// 1D array with memory pool allocation.
821 mdarray(mdarray_index_descriptor const& d0, memory_pool& mp__, std::string label__ = "")
822 {
823 static_assert(N == 1, "wrong number of dimensions");
824
825 this->label_ = label__;
826 this->init_dimensions({d0});
827 this->allocate(mp__);
828 }
829
830 /// 1D array with host pointer wrapper.
831 mdarray(T* ptr__, mdarray_index_descriptor const& d0, std::string label__ = "")
832 {
833 static_assert(N == 1, "wrong number of dimensions");
834
835 this->label_ = label__;
836 this->init_dimensions({d0});
837 this->raw_ptr_ = ptr__;
838 }
839
840 /// 1D array with host and device pointer wrapper.
841 mdarray(T* ptr__, T* ptr_device__, mdarray_index_descriptor const& d0, std::string label__ = "")
842 {
843 static_assert(N == 1, "wrong number of dimensions");
844
845 this->label_ = label__;
846 this->init_dimensions({d0});
847 this->raw_ptr_ = ptr__;
848#ifdef SIRIUS_GPU
849 this->raw_ptr_device_ = ptr_device__;
850#endif
851 }
852
853 /*
854 * 2D array constructors
855 *
856 */
857
858 /// 2D array with memory allocation.
859 mdarray(mdarray_index_descriptor const& d0, mdarray_index_descriptor const& d1, memory_t memory__ = memory_t::host,
860 std::string label__ = "")
861 {
862 static_assert(N == 2, "wrong number of dimensions");
863
864 this->label_ = label__;
865 this->init_dimensions({d0, d1});
866 this->allocate(memory__);
867 }
868
869 /// 3D array with memory allocation.
871 memory_t memory__ = memory_t::host, std::string label__ = "")
872 {
873 static_assert(N == 3, "wrong number of dimensions");
874
875 this->label_ = label__;
876 this->init_dimensions({d0, d1, d2});
877 this->allocate(memory__);
878 }
879
880 /// 4D array with memory allocation.
882 mdarray_index_descriptor const& d3, memory_t memory__ = memory_t::host, std::string label__ = "")
883 {
884 static_assert(N == 4, "wrong number of dimensions");
885
886 this->label_ = label__;
887 this->init_dimensions({d0, d1, d2, d3});
888 this->allocate(memory__);
889 }
890
891 /// 5D array with memory allocation.
893 mdarray_index_descriptor const& d3, mdarray_index_descriptor const& d4, memory_t memory__ = memory_t::host,
894 std::string label__ = "")
895 {
896 static_assert(N == 5, "wrong number of dimensions");
897
898 this->label_ = label__;
899 this->init_dimensions({d0, d1, d2, d3, d4});
900 this->allocate(memory__);
901 }
902
903 /// 6D array with memory allocation.
906 memory_t memory__ = memory_t::host, std::string label__ = "")
907 {
908 static_assert(N == 6, "wrong number of dimensions");
909
910 this->label_ = label__;
911 this->init_dimensions({d0, d1, d2, d3, d4, d5});
912 this->allocate(memory__);
913 }
914
915 /// Wrap a pointer into 2D array.
916 mdarray(T* ptr__, mdarray_index_descriptor const& d0, mdarray_index_descriptor const& d1, std::string label__ = "")
917 {
918 static_assert(N == 2, "wrong number of dimensions");
919
920 this->label_ = label__;
921 this->init_dimensions({d0, d1});
922 this->raw_ptr_ = ptr__;
923 }
924
925 mdarray(T* ptr__, T* ptr_device__, mdarray_index_descriptor const& d0, mdarray_index_descriptor const& d1,
926 std::string label__ = "")
927 {
928 static_assert(N == 2, "wrong number of dimensions");
929
930 this->label_ = label__;
931 this->init_dimensions({d0, d1});
932 this->raw_ptr_ = ptr__;
933#ifdef SIRIUS_GPU
934 this->raw_ptr_device_ = ptr_device__;
935#endif
936 }
937
938 /// 2D array with memory pool allocation.
940 std::string label__ = "")
941 {
942 static_assert(N == 2, "wrong number of dimensions");
943
944 this->label_ = label__;
945 this->init_dimensions({d0, d1});
946 this->allocate(mp__);
947 }
948
949 mdarray(T* ptr__, mdarray_index_descriptor const& d0, mdarray_index_descriptor const& d1,
950 mdarray_index_descriptor const& d2, std::string label__ = "")
951 {
952 static_assert(N == 3, "wrong number of dimensions");
953
954 this->label_ = label__;
955 this->init_dimensions({d0, d1, d2});
956 this->raw_ptr_ = ptr__;
957 }
958
959 mdarray(T* ptr__, T* ptr_device__, mdarray_index_descriptor const& d0, mdarray_index_descriptor const& d1,
960 mdarray_index_descriptor const& d2, std::string label__ = "")
961 {
962 static_assert(N == 3, "wrong number of dimensions");
963
964 this->label_ = label__;
965 this->init_dimensions({d0, d1, d2});
966 this->raw_ptr_ = ptr__;
967#ifdef SIRIUS_GPU
968 this->raw_ptr_device_ = ptr_device__;
969#endif
970 }
971
972 /// 3D array with memory pool allocation.
974 memory_pool& mp__, std::string label__ = "")
975 {
976 static_assert(N == 3, "wrong number of dimensions");
977
978 this->label_ = label__;
979 this->init_dimensions({d0, d1, d2});
980 this->allocate(mp__);
981 }
982
983 mdarray(T* ptr__, mdarray_index_descriptor const& d0, mdarray_index_descriptor const& d1,
984 mdarray_index_descriptor const& d2, mdarray_index_descriptor const& d3, std::string label__ = "")
985 {
986 static_assert(N == 4, "wrong number of dimensions");
987
988 this->label_ = label__;
989 this->init_dimensions({d0, d1, d2, d3});
990 this->raw_ptr_ = ptr__;
991 }
992
993 mdarray(T* ptr__, mdarray_index_descriptor const& d0, mdarray_index_descriptor const& d1,
994 mdarray_index_descriptor const& d2, mdarray_index_descriptor const& d3, mdarray_index_descriptor const& d4,
995 std::string label__ = "")
996 {
997 static_assert(N == 5, "wrong number of dimensions");
998
999 this->label_ = label__;
1000 this->init_dimensions({d0, d1, d2, d3, d4});
1001 this->raw_ptr_ = ptr__;
1002 }
1003
1004 mdarray(T* ptr__, mdarray_index_descriptor const& d0, mdarray_index_descriptor const& d1,
1005 mdarray_index_descriptor const& d2, mdarray_index_descriptor const& d3, mdarray_index_descriptor const& d4,
1006 mdarray_index_descriptor const& d5, std::string label__ = "")
1007 {
1008 static_assert(N == 6, "wrong number of dimensions");
1009
1010 this->label_ = label__;
1011 this->init_dimensions({d0, d1, d2, d3, d4, d5});
1012 this->raw_ptr_ = ptr__;
1013 }
1014
1015 /// Move constructor
1017 : label_(src.label_)
1018 , unique_ptr_(std::move(src.unique_ptr_))
1019 , raw_ptr_(src.raw_ptr_)
1020#ifdef SIRIUS_GPU
1021 , unique_ptr_device_(std::move(src.unique_ptr_device_))
1022 , raw_ptr_device_(src.raw_ptr_device_)
1023#endif
1024 {
1025 for (int i = 0; i < N; i++) {
1026 dims_[i] = src.dims_[i];
1027 offsets_[i] = src.offsets_[i];
1028 }
1029 src.raw_ptr_ = nullptr;
1030#ifdef SIRIUS_GPU
1031 src.raw_ptr_device_ = nullptr;
1032#endif
1033 }
1034
1035 /// Move assignment operator
1037 {
1038 if (this != &src) {
1039 label_ = src.label_;
1040 unique_ptr_ = std::move(src.unique_ptr_);
1041 raw_ptr_ = src.raw_ptr_;
1042 src.raw_ptr_ = nullptr;
1043#ifdef SIRIUS_GPU
1044 unique_ptr_device_ = std::move(src.unique_ptr_device_);
1045 raw_ptr_device_ = src.raw_ptr_device_;
1046 src.raw_ptr_device_ = nullptr;
1047#endif
1048 for (int i = 0; i < N; i++) {
1049 dims_[i] = src.dims_[i];
1050 offsets_[i] = src.offsets_[i];
1051 }
1052 }
1053 return *this;
1054 }
1055
1056 /// Allocate memory for array.
1058 {
1059 /* do nothing for zero-sized array */
1060 if (!this->size()) {
1061 return *this;
1062 }
1063
1064 /* host allocation */
1065 if (is_host_memory(memory__)) {
1066 unique_ptr_ = get_unique_ptr<T>(this->size(), memory__);
1067 raw_ptr_ = unique_ptr_.get();
1068 call_constructor();
1069 }
1070#ifdef SIRIUS_GPU
1071 /* device allocation */
1072 if (is_device_memory(memory__)) {
1073 unique_ptr_device_ = get_unique_ptr<T>(this->size(), memory__);
1074 raw_ptr_device_ = unique_ptr_device_.get();
1075 }
1076#endif
1077 return *this;
1078 }
1079
1080 /// Allocate memory from the pool.
1082 {
1083 /* do nothing for zero-sized array */
1084 if (!this->size()) {
1085 return *this;
1086 }
1087 /* host allocation */
1088 if (is_host_memory(mp__.memory_type())) {
1089 unique_ptr_ = mp__.get_unique_ptr<T>(this->size());
1090 raw_ptr_ = unique_ptr_.get();
1091 call_constructor();
1092 }
1093#ifdef SIRIUS_GPU
1094 /* device allocation */
1095 if (is_device_memory(mp__.memory_type())) {
1096 unique_ptr_device_ = mp__.get_unique_ptr<T>(this->size());
1097 raw_ptr_device_ = unique_ptr_device_.get();
1098 }
1099#endif
1100 return *this;
1101 }
1102
1103 /// Deallocate host or device memory.
1104 inline void deallocate(memory_t memory__)
1105 {
1106 if (is_host_memory(memory__)) {
1107 /* call destructor for non-primitive objects */
1108 if (unique_ptr_) {
1109 call_destructor();
1110 }
1111 unique_ptr_.reset(nullptr);
1112 raw_ptr_ = nullptr;
1113 }
1114#ifdef SIRIUS_GPU
1115 if (is_device_memory(memory__)) {
1116 unique_ptr_device_.reset(nullptr);
1117 raw_ptr_device_ = nullptr;
1118 }
1119#endif
1120 }
1121
1122 /// Access operator() for the elements of multidimensional array.
1123 template <typename... Args>
1124 inline T const& operator()(Args... args) const
1125 {
1126 mdarray_assert(raw_ptr_ != nullptr);
1127 return raw_ptr_[idx(args...)];
1128 }
1129
1130 /// Access operator() for the elements of multidimensional array.
1131 template <typename... Args>
1132 inline T& operator()(Args... args)
1133 {
1134 return const_cast<T&>(static_cast<mdarray<T, N> const&>(*this)(args...));
1135 }
1136
1137 /// Access operator[] for the elements of multidimensional array using a linear index in the range [0, size).
1138 inline T const& operator[](size_t const idx__) const
1139 {
1140 mdarray_assert(idx__ >= 0 && idx__ < size());
1141 return raw_ptr_[idx__];
1142 }
1143
1144 /// Access operator[] for the elements of multidimensional array using a linear index in the range [0, size).
1145 inline T& operator[](size_t const idx__)
1146 {
1147 return const_cast<T&>(static_cast<mdarray<T, N> const&>(*this)[idx__]);
1148 }
1149
1150 template <typename... Args>
1151 inline T const* at(memory_t mem__, Args... args) const
1152 {
1153 return at_idx(mem__, idx(args...));
1154 }
1155
1156 template <typename... Args>
1157 inline T* at(memory_t mem__, Args... args)
1158 {
1159 return const_cast<T*>(static_cast<mdarray<T, N> const&>(*this).at(mem__, args...));
1160 }
1161
1162 /// Return pointer to the beginning of array.
1163 inline T const* at(memory_t mem__) const
1164 {
1165 return at_idx<false>(mem__, 0);
1166 }
1167
1168 /// Return pointer to the beginning of array.
1169 inline T* at(memory_t mem__)
1170 {
1171 return const_cast<T*>(static_cast<mdarray<T, N> const&>(*this).at(mem__));
1172 }
1173
1174 T* host_data()
1175 {
1176 assert(raw_ptr_ != nullptr);
1177 return raw_ptr_;
1178 }
1179
1180 const T* host_data() const
1181 {
1182 assert(raw_ptr_ != nullptr);
1183 return raw_ptr_;
1184 }
1185
1186 T* device_data()
1187 {
1188#if defined(SIRIUS_GPU)
1189 assert(raw_ptr_device_ != nullptr);
1190 return raw_ptr_device_;
1191#else
1192 throw std::runtime_error("not compiled with GPU support");
1193#endif
1194 }
1195
1196 const T* device_data() const
1197 {
1198#if defined(SIRIUS_GPU)
1199 assert(raw_ptr_device_ != nullptr);
1200 return raw_ptr_device_;
1201#else
1202 throw std::runtime_error("not compiled with GPU support");
1203#endif
1204 }
1205
1206 /// Return total size (number of elements) of the array.
1207 inline size_t size() const
1208 {
1209 size_t size_{1};
1210
1211 for (int i = 0; i < N; i++) {
1212 size_ *= dims_[i].size();
1213 }
1214
1215 return size_;
1216 }
1217
1218 /// Return size of particular dimension.
1219 inline size_t size(int i) const
1220 {
1221 mdarray_assert(i < N);
1222 return dims_[i].size();
1223 }
1224
1225 /// Return a descriptor of a dimension.
1226 inline mdarray_index_descriptor dim(int i) const
1227 {
1228 mdarray_assert(i < N);
1229 return dims_[i];
1230 }
1231
1232 /// Return leading dimension size.
1233 inline uint32_t ld() const
1234 {
1235 mdarray_assert(dims_[0].size() < size_t(1 << 31));
1236
1237 return (int32_t)dims_[0].size();
1238 }
1239
1240 /// Compute hash of the array
1241 /** Example: std::printf("hash(h) : %16llX\n", h.hash()); */
1242 inline uint64_t hash(uint64_t h__ = 5381) const
1243 {
1244 for (size_t i = 0; i < size() * sizeof(T); i++) {
1245 h__ = ((h__ << 5) + h__) + ((unsigned char*)raw_ptr_)[i];
1246 }
1247
1248 return h__;
1249 }
1250
1251 /// Compute weighted checksum.
1252 inline T checksum_w(size_t idx0__, size_t size__) const
1253 {
1254 T cs{0};
1255 for (size_t i = 0; i < size__; i++) {
1256 cs += raw_ptr_[idx0__ + i] * static_cast<double>((i & 0xF) - 8);
1257 }
1258 return cs;
1259 }
1260
1261 /// Compute checksum.
1262 inline T checksum(size_t idx0__, size_t size__) const
1263 {
1264 T cs{0};
1265 for (size_t i = 0; i < size__; i++) {
1266 cs += raw_ptr_[idx0__ + i];
1267 }
1268 return cs;
1269 }
1270
1271 inline T checksum() const
1272 {
1273 return checksum(0, size());
1274 }
1275
1276 inline T* begin()
1277 {
1278 return this->at(memory_t::host);
1279 }
1280
1281 inline T const* begin() const
1282 {
1283 return this->at(memory_t::host);
1284 }
1285
1286 inline T* end()
1287 {
1288 return this->at(memory_t::host) + this->size();
1289 }
1290
1291 inline T const* end() const
1292 {
1293 return this->at(memory_t::host) + this->size();
1294 }
1295
1296 //== template <device_t pu>
1297 //== inline T checksum() const
1298 //== {
1299 //== switch (pu) {
1300 //== case CPU: {
1301 //== return checksum();
1302 //== }
1303 //== case GPU: {
1304 //== auto cs = acc::allocate<T>(1);
1305 //== acc::zero(cs, 1);
1306 //== add_checksum_gpu(raw_ptr_device_, (int)size(), 1, cs);
1307 //== T cs1;
1308 //== acc::copyout(&cs1, cs, 1);
1309 //== acc::deallocate(cs);
1310 //== return cs1;
1311 //== }
1312 //== }
1313 //== }
1314
1315 /// Zero n elements starting from idx0.
1316 inline void zero(memory_t mem__, size_t idx0__, size_t n__)
1317 {
1318 mdarray_assert(idx0__ + n__ <= size());
1319 if (n__ && is_host_memory(mem__)) {
1320 mdarray_assert(raw_ptr_ != nullptr);
1321 // std::fill(raw_ptr_ + idx0__, raw_ptr_ + idx0__ + n__, 0);
1322 std::memset((void*)&raw_ptr_[idx0__], 0, n__ * sizeof(T));
1323 }
1324#ifdef SIRIUS_GPU
1325 if (n__ && on_device() && is_device_memory(mem__)) {
1326 mdarray_assert(raw_ptr_device_ != nullptr);
1327 acc::zero(&raw_ptr_device_[idx0__], n__);
1328 }
1329#endif
1330 }
1331
1332 /// Zero the entire array.
1333 inline void zero(memory_t mem__ = memory_t::host)
1334 {
1335 this->zero(mem__, 0, size());
1336 }
1337
1338 /// Copy n elements starting from idx0 from one memory type to another.
1339 inline void copy_to(memory_t mem__, size_t idx0__, size_t n__, acc::stream_id sid = acc::stream_id(-1))
1340 {
1341 if (n__ == 0) {
1342 return;
1343 }
1344#ifdef SIRIUS_GPU
1345 mdarray_assert(raw_ptr_ != nullptr);
1346 mdarray_assert(raw_ptr_device_ != nullptr);
1347 mdarray_assert(idx0__ + n__ <= size());
1348 if (is_host_memory(mem__) && is_device_memory(mem__)) {
1349 throw std::runtime_error(
1350 "mdarray::copy_to(): memory is both host and device, check what to do with this case");
1351 }
1352 /* copy to device memory */
1353 if (is_device_memory(mem__)) {
1354 if (sid() == -1) {
1355 /* synchronous (blocking) copy */
1356 acc::copyin(&raw_ptr_device_[idx0__], &raw_ptr_[idx0__], n__);
1357 } else {
1358 /* asynchronous (non-blocking) copy */
1359 acc::copyin(&raw_ptr_device_[idx0__], &raw_ptr_[idx0__], n__, sid);
1360 }
1361 }
1362 /* copy back from device to host */
1363 if (is_host_memory(mem__)) {
1364 if (sid() == -1) {
1365 /* synchronous (blocking) copy */
1366 acc::copyout(&raw_ptr_[idx0__], &raw_ptr_device_[idx0__], n__);
1367 } else {
1368 /* asynchronous (non-blocking) copy */
1369 acc::copyout(&raw_ptr_[idx0__], &raw_ptr_device_[idx0__], n__, sid);
1370 }
1371 }
1372#endif
1373 }
1374
1375 /// Copy entire array from one memory type to another.
1376 inline void copy_to(memory_t mem__, acc::stream_id sid = acc::stream_id(-1))
1377 {
1378 this->copy_to(mem__, 0, size(), sid);
1379 }
1380
1381 /// Check if device pointer is available.
1382 inline bool on_device() const
1383 {
1384#ifdef SIRIUS_GPU
1385 return (raw_ptr_device_ != nullptr);
1386#else
1387 return false;
1388#endif
1389 }
1390
1391 auto label() const
1392 {
1393 return label_;
1394 }
1395
1396 inline bool on_host() const
1397 {
1398 return (raw_ptr_ != nullptr);
1399 }
1400
1401 mdarray<T, N>& operator=(std::function<T(void)> f__)
1402 {
1403 for (size_t i = 0; i < this->size(); i++) {
1404 (*this)[i] = f__();
1405 }
1406 return *this;
1407 }
1408
1409 mdarray<T, N>& operator=(std::function<T(index_type)> f__)
1410 {
1411 static_assert(N == 1, "wrong number of dimensions");
1412
1413 for (index_type i0 = this->dims_[0].begin(); i0 <= this->dims_[0].end(); i0++) {
1414 (*this)(i0) = f__(i0);
1415 }
1416 return *this;
1417 }
1418
1419 mdarray<T, N>& operator=(std::function<T(index_type, index_type)> f__)
1420 {
1421 static_assert(N == 2, "wrong number of dimensions");
1422
1423 for (index_type i1 = this->dims_[1].begin(); i1 <= this->dims_[1].end(); i1++) {
1424 for (index_type i0 = this->dims_[0].begin(); i0 <= this->dims_[0].end(); i0++) {
1425 (*this)(i0, i1) = f__(i0, i1);
1426 }
1427 }
1428 return *this;
1429 }
1430};
1431
1432// Alias for matrix
1433template <typename T>
1434using matrix = mdarray<T, 2>;
1435
1436/// Serialize to std::ostream
1437template <typename T, int N>
1438std::ostream&
1439operator<<(std::ostream& out, mdarray<T, N> const& v)
1440{
1441 if (v.size()) {
1442 out << v[0];
1443 for (size_t i = 1; i < v.size(); i++) {
1444 out << std::string(" ") << v[i];
1445 }
1446 }
1447 return out;
1448}
1449
1450/// Copy content of the array to another array of identical size but different precision.
1451template <typename T, typename F, int N>
1452inline void
1453copy(mdarray<F, N> const& src__, mdarray<T, N>& dest__)
1454{
1455 if (src__.size() == 0) {
1456 return;
1457 }
1458 for (int i = 0; i < N; i++) {
1459 if (dest__.dim(i).begin() != src__.dim(i).begin() || dest__.dim(i).end() != src__.dim(i).end()) {
1460 std::stringstream s;
1461 s << "error at line " << __LINE__ << " of file " << __FILE__ << " : array dimensions don't match";
1462 throw std::runtime_error(s.str());
1463 }
1464 }
1465 std::cout << "=== WARNING at line " << __LINE__ << " of file " << __FILE__ << " ===" << std::endl;
1466 std::cout << " Copying matrix element with different type, possible loss of data precision" << std::endl;
1467 std::copy(&src__.at(memory_t::host)[0], &src__.at(memory_t::host)[0] + src__.size(), &dest__.at(memory_t::host)[0]);
1468}
1469
1470/// Copy content of the array to another array of identical size.
1471/** For example:
1472 \code{.cpp}
1473 mdarray<double, 2> src(10, 20);
1474 mdarray<double, 2> dest(10, 20);
1475 copy(src, dest);
1476 \endcode
1477 */
1478template <typename T, int N>
1479inline void
1480copy(mdarray<T, N> const& src__, mdarray<T, N>& dest__)
1481{
1482 if (src__.size() == 0) {
1483 return;
1484 }
1485 for (int i = 0; i < N; i++) {
1486 if (dest__.dim(i).begin() != src__.dim(i).begin() || dest__.dim(i).end() != src__.dim(i).end()) {
1487 std::stringstream s;
1488 s << "error at line " << __LINE__ << " of file " << __FILE__ << " : array dimensions don't match";
1489 throw std::runtime_error(s.str());
1490 }
1491 }
1492 std::copy(&src__[0], &src__[0] + src__.size(), &dest__[0]);
1493}
1494
1495/// Copy all memory present on destination.
1496template <typename T, int N>
1497void
1499{
1500
1501 assert(dst.size() == src.size());
1502 // TODO: make sure dst and src don't overlap
1503
1504 if (dst.on_device()) {
1505 acc::copy(dst.device_data(), src.device_data(), src.size());
1506 }
1507
1508 if (dst.on_host()) {
1509 std::copy(src.host_data(), src.host_data() + dst.size(), dst.host_data());
1510 }
1511}
1512
1513/// Copy memory specified by device from src to dst.
1514template <typename T, int N>
1515void
1517{
1518 // TODO also compare shapes
1519 if (src.size() == 0) {
1520 // nothing TODO
1521 return;
1522 }
1523
1524 assert(src.size() == dst.size());
1525 if (device == device_t::GPU) {
1526 assert(src.on_device() && dst.on_device());
1527 acc::copy(dst.device_data(), src.device_data(), dst.size());
1528 } else if (device == device_t::CPU) {
1529 assert(src.on_host() && dst.on_host());
1530 std::copy(src.host_data(), src.host_data() + dst.size(), dst.host_data());
1531 }
1532}
1533
1534template <class numeric_t, std::size_t... Ts>
1535auto
1536_empty_like_inner(std::index_sequence<Ts...>& seq [[maybe_unused]], std::size_t (&dims)[sizeof...(Ts)],
1537 memory_pool* mempool)
1538{
1539 if (mempool == nullptr) {
1540 return mdarray<numeric_t, sizeof...(Ts)>{dims[Ts]...};
1541 } else {
1542 mdarray<numeric_t, sizeof...(Ts)> out{dims[Ts]...};
1543 out.allocate(*mempool);
1544 return out;
1545 }
1546}
1547
1548template <typename T, int N>
1549auto
1550empty_like(const mdarray<T, N>& src)
1551{
1552 auto I = std::make_index_sequence<N>{};
1553 std::size_t dims[N];
1554 for (int i = 0; i < N; ++i) {
1555 dims[i] = src.size(i);
1556 }
1557 return _empty_like_inner<T>(I, dims, nullptr);
1558}
1559
1560template <typename T, int N>
1561auto
1562empty_like(const mdarray<T, N>& src, memory_pool& mempool)
1563{
1564 auto I = std::make_index_sequence<N>{};
1565 std::size_t dims[N];
1566 for (int i = 0; i < N; ++i) {
1567 dims[i] = src.size(i);
1568 }
1569 return _empty_like_inner<T>(I, dims, &mempool);
1570}
1571
1572} // namespace sddk
1573
1574} // namespace sirius
1575
1576#endif // __MEMORY_HPP__
Interface to accelerators API.
Helper class to wrap stream id (integer number).
Definition: acc.hpp:132
Index descriptor of mdarray.
Definition: memory.hpp:554
index_type end() const
Return last index value.
Definition: memory.hpp:606
mdarray_index_descriptor(std::pair< int, int > const range__)
Constructor for index range [begin, end].
Definition: memory.hpp:591
mdarray_index_descriptor()
Constructor of empty descriptor.
Definition: memory.hpp:570
mdarray_index_descriptor(size_t const size__)
Constructor for index range [0, size).
Definition: memory.hpp:575
size_t size() const
Return index size.
Definition: memory.hpp:612
index_type begin() const
Return first index value.
Definition: memory.hpp:600
mdarray_index_descriptor(index_type const begin__, index_type const end__)
Constructor for index range [begin, end].
Definition: memory.hpp:582
Multidimensional array with the column-major (Fortran) order.
Definition: memory.hpp:660
T const * at_idx(memory_t mem__, index_type const idx__) const
Return cosnt pointer to an element at a given index.
Definition: memory.hpp:722
mdarray< T, N > & operator=(mdarray< T, N > const &src)=delete
Assignment operator is forbidden.
mdarray(mdarray_index_descriptor const &d0, mdarray_index_descriptor const &d1, mdarray_index_descriptor const &d2, memory_pool &mp__, std::string label__="")
3D array with memory pool allocation.
Definition: memory.hpp:973
mdarray(mdarray_index_descriptor const &d0, mdarray_index_descriptor const &d1, mdarray_index_descriptor const &d2, mdarray_index_descriptor const &d3, mdarray_index_descriptor const &d4, memory_t memory__=memory_t::host, std::string label__="")
5D array with memory allocation.
Definition: memory.hpp:892
mdarray< T, N > & allocate(memory_pool &mp__)
Allocate memory from the pool.
Definition: memory.hpp:1081
mdarray_index_descriptor dim(int i) const
Return a descriptor of a dimension.
Definition: memory.hpp:1226
mdarray(mdarray_index_descriptor const &d0, mdarray_index_descriptor const &d1, mdarray_index_descriptor const &d2, memory_t memory__=memory_t::host, std::string label__="")
3D array with memory allocation.
Definition: memory.hpp:870
T * at_idx(memory_t mem__, index_type const idx__)
Return pointer to an element at a given index.
Definition: memory.hpp:752
index_type idx(Args... args) const
Return linear index in the range [0, size) by the N-dimensional indices (i0, i1, ....
Definition: memory.hpp:702
void copy_to(memory_t mem__, acc::stream_id sid=acc::stream_id(-1))
Copy entire array from one memory type to another.
Definition: memory.hpp:1376
mdarray(T *ptr__, mdarray_index_descriptor const &d0, std::string label__="")
1D array with host pointer wrapper.
Definition: memory.hpp:831
uint64_t hash(uint64_t h__=5381) const
Compute hash of the array.
Definition: memory.hpp:1242
T & operator[](size_t const idx__)
Access operator[] for the elements of multidimensional array using a linear index in the range [0,...
Definition: memory.hpp:1145
void copy_to(memory_t mem__, size_t idx0__, size_t n__, acc::stream_id sid=acc::stream_id(-1))
Copy n elements starting from idx0 from one memory type to another.
Definition: memory.hpp:1339
mdarray(mdarray_index_descriptor const &d0, memory_t memory__=memory_t::host, std::string label__="")
1D array with memory allocation.
Definition: memory.hpp:811
T const & operator()(Args... args) const
Access operator() for the elements of multidimensional array.
Definition: memory.hpp:1124
mdarray(mdarray_index_descriptor const &d0, memory_pool &mp__, std::string label__="")
1D array with memory pool allocation.
Definition: memory.hpp:821
std::array< index_type, N > offsets_
List of offsets to compute the element location by dimension indices.
Definition: memory.hpp:684
void zero(memory_t mem__, size_t idx0__, size_t n__)
Zero n elements starting from idx0.
Definition: memory.hpp:1316
mdarray()
Default constructor.
Definition: memory.hpp:785
mdarray(mdarray< T, N > const &src)=delete
Copy constructor is forbidden.
uint32_t ld() const
Return leading dimension size.
Definition: memory.hpp:1233
mdarray< T, N > & operator=(mdarray< T, N > &&src)
Move assignment operator.
Definition: memory.hpp:1036
T const & operator[](size_t const idx__) const
Access operator[] for the elements of multidimensional array using a linear index in the range [0,...
Definition: memory.hpp:1138
mdarray(mdarray< T, N > &&src)
Move constructor.
Definition: memory.hpp:1016
mdarray(T *ptr__, T *ptr_device__, mdarray_index_descriptor const &d0, std::string label__="")
1D array with host and device pointer wrapper.
Definition: memory.hpp:841
void zero(memory_t mem__=memory_t::host)
Zero the entire array.
Definition: memory.hpp:1333
mdarray(std::array< mdarray_index_descriptor, N > const dims__, memory_t memory__=memory_t::host, std::string label__="")
N-dimensional array with index bounds.
Definition: memory.hpp:797
std::string label_
Optional array label.
Definition: memory.hpp:666
mdarray(mdarray_index_descriptor const &d0, mdarray_index_descriptor const &d1, mdarray_index_descriptor const &d2, mdarray_index_descriptor const &d3, mdarray_index_descriptor const &d4, mdarray_index_descriptor const &d5, memory_t memory__=memory_t::host, std::string label__="")
6D array with memory allocation.
Definition: memory.hpp:904
~mdarray()
Destructor.
Definition: memory.hpp:790
T * at(memory_t mem__)
Return pointer to the beginning of array.
Definition: memory.hpp:1169
T & operator()(Args... args)
Access operator() for the elements of multidimensional array.
Definition: memory.hpp:1132
std::array< mdarray_index_descriptor, N > dims_
Array dimensions.
Definition: memory.hpp:681
T const * at(memory_t mem__) const
Return pointer to the beginning of array.
Definition: memory.hpp:1163
mdarray(mdarray_index_descriptor const &d0, mdarray_index_descriptor const &d1, memory_t memory__=memory_t::host, std::string label__="")
2D array with memory allocation.
Definition: memory.hpp:859
void init_dimensions(std::array< mdarray_index_descriptor, N > const dims__)
Initialize the offsets used to compute the index of the elements.
Definition: memory.hpp:687
void deallocate(memory_t memory__)
Deallocate host or device memory.
Definition: memory.hpp:1104
mdarray< T, N > & allocate(memory_t memory__)
Allocate memory for array.
Definition: memory.hpp:1057
size_t size(int i) const
Return size of particular dimension.
Definition: memory.hpp:1219
mdarray(mdarray_index_descriptor const &d0, mdarray_index_descriptor const &d1, mdarray_index_descriptor const &d2, mdarray_index_descriptor const &d3, memory_t memory__=memory_t::host, std::string label__="")
4D array with memory allocation.
Definition: memory.hpp:881
mdarray(T *ptr__, mdarray_index_descriptor const &d0, mdarray_index_descriptor const &d1, std::string label__="")
Wrap a pointer into 2D array.
Definition: memory.hpp:916
T checksum(size_t idx0__, size_t size__) const
Compute checksum.
Definition: memory.hpp:1262
bool on_device() const
Check if device pointer is available.
Definition: memory.hpp:1382
T checksum_w(size_t idx0__, size_t size__) const
Compute weighted checksum.
Definition: memory.hpp:1252
size_t size() const
Return total size (number of elements) of the array.
Definition: memory.hpp:1207
mdarray(mdarray_index_descriptor const &d0, mdarray_index_descriptor const &d1, memory_pool &mp__, std::string label__="")
2D array with memory pool allocation.
Definition: memory.hpp:939
Deleter for the allocated memory pointer from a given memory pool.
Definition: memory.hpp:320
memory_t M_
Type of memory that is handeled by this pool.
Definition: memory.hpp:359
size_t total_size() const
Return the total capacity of the memory pool.
Definition: memory.hpp:489
size_t free_size() const
Get the total free size of the memory pool.
Definition: memory.hpp:500
memory_t memory_type() const
Return the type of memory this pool is managing.
Definition: memory.hpp:483
memory_pool(memory_t M__)
Constructor.
Definition: memory.hpp:369
size_t num_blocks() const
Get the number of free memory blocks.
Definition: memory.hpp:512
std::unique_ptr< T, memory_t_deleter_base > get_unique_ptr(size_t n__)
Return a unique pointer to the allocated memory.
Definition: memory.hpp:456
void free(void *ptr__)
Delete a pointer and add its memory back to the pool.
Definition: memory.hpp:441
void reset()
Free all the allocated blocks. umpire does not support this.
Definition: memory.hpp:467
T * allocate(size_t num_elements__)
Return a pointer to a memory block for n elements of type T.
Definition: memory.hpp:427
void clear()
shrink the memory pool and release all memory.
Definition: memory.hpp:472
Base class for smart pointer deleters.
Definition: memory.hpp:270
Deleter for the allocated memory pointer of a given type.
Definition: memory.hpp:291
device_t get_device_t(memory_t mem__)
Get type of device by memory type.
Definition: memory.hpp:130
device_t
Type of the main processing unit.
Definition: memory.hpp:120
@ GPU
GPU device (with CUDA programming model).
bool is_device_memory(memory_t mem__)
Check if this is a valid device memory (memory, accessible by the device).
Definition: memory.hpp:93
memory_t
Memory types where the code can store data.
Definition: memory.hpp:71
@ host_pinned
Pinned host memory. This is host memory + extra bit flag.
@ managed
Managed memory (accessible from both host and device).
std::unique_ptr< T, memory_t_deleter_base > get_unique_ptr(size_t n__, memory_t M__)
Allocate n elements and return a unique pointer.
Definition: memory.hpp:345
memory_t get_memory_t(std::string name__)
Get a memory type from a string.
Definition: memory.hpp:100
void auto_copy(mdarray< T, N > &dst, const mdarray< T, N > &src)
Copy all memory present on destination.
Definition: memory.hpp:1498
bool is_host_memory(memory_t mem__)
Check if this is a valid host memory (memory, accessible by the host).
Definition: memory.hpp:86
void deallocate(void *ptr__)
Deallocate GPU memory.
Definition: acc.hpp:435
void deallocate_host(void *ptr__)
Deallocate host memory.
Definition: acc.hpp:454
int get_device_id()
Get current device ID.
Definition: acc.hpp:191
void copyout(T *target__, T const *source__, size_t n__)
Copy memory from device to host.
Definition: acc.hpp:367
void copyin(T *target__, T const *source__, size_t n__)
Copy memory from host to device.
Definition: acc.hpp:337
void copy(T *target__, T const *source__, size_t n__)
Copy memory inside a device.
Definition: acc.hpp:320
void zero(T *ptr__, size_t n__)
Zero the device memory.
Definition: acc.hpp:397
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > transform(::spla::Context &spla_ctx__, sddk::memory_t mem__, la::dmatrix< F > const &M__, int irow0__, int jcol0__, real_type< F > alpha__, Wave_functions< T > const &wf_in__, spin_index s_in__, band_range br_in__, real_type< F > beta__, Wave_functions< T > &wf_out__, spin_index s_out__, band_range br_out__)
Apply linear transformation to the wave-functions.
Namespace of the SIRIUS library.
Definition: sirius.f90:5
std::ostream & operator<<(std::ostream &out, hbar &&b)
Inject horisontal bar to ostream.
Check is the type is a complex number; by default it is not.
Definition: memory.hpp:56