39#ifdef SIRIUS_USE_MEMORY_POOL
40#include <umpire/ResourceManager.hpp>
41#include <umpire/Allocator.hpp>
42#include <umpire/util/wrap_allocator.hpp>
43#include <umpire/strategy/DynamicPoolList.hpp>
44#include <umpire/strategy/AlignedAllocator.hpp>
57 constexpr static bool value{
false};
64 constexpr static bool value{
true};
88 return static_cast<unsigned int>(mem__) & 0b0001;
95 return static_cast<unsigned int>(mem__) & 0b1000;
102 std::transform(name__.begin(), name__.end(), name__.begin(), ::tolower);
103 std::map<std::string, memory_t>
const m = {{
"none", memory_t::none},
104 {
"host", memory_t::host},
105 {
"host_pinned", memory_t::host_pinned},
106 {
"managed", memory_t::managed},
107 {
"device", memory_t::device}};
109 if (m.count(name__) == 0) {
111 s <<
"get_memory_t(): wrong label of the memory_t enumerator: " << name__;
112 throw std::runtime_error(s.str());
134 case memory_t::host_pinned: {
135 return device_t::CPU;
137 case memory_t::device: {
138 return device_t::GPU;
141 throw std::runtime_error(
"get_device_t(): wrong memory type");
144 return device_t::CPU;
151 std::transform(name__.begin(), name__.end(), name__.begin(), ::tolower);
152 std::map<std::string, device_t>
const m = {{
"cpu", device_t::CPU}, {
"gpu", device_t::GPU}};
154 if (m.count(name__) == 0) {
156 s <<
"get_device_t(): wrong label of the device_t enumerator: " << name__;
157 throw std::runtime_error(s.str());
170 case memory_t::none: {
173 case memory_t::host: {
174 return static_cast<T*
>(std::malloc(n__ *
sizeof(T)));
176 case memory_t::host_pinned: {
178 return acc::allocate_host<T>(n__);
183 case memory_t::device: {
185 return acc::allocate<T>(n__);
191 throw std::runtime_error(
"allocate(): unknown memory type");
201 case memory_t::none: {
204 case memory_t::host: {
208 case memory_t::host_pinned: {
214 case memory_t::device: {
221 throw std::runtime_error(
"deallocate(): unknown memory type");
227template <
typename T,
typename F>
232 std::copy(from_ptr__, from_ptr__ + n__, to_ptr__);
235#if defined(SIRIUS_GPU)
236 throw std::runtime_error(
"Copy mixed precision type not supported in device memory");
243copy(
memory_t from_mem__, T
const* from_ptr__,
memory_t to_mem__, T* to_ptr__,
size_t n__)
246 std::copy(from_ptr__, from_ptr__ + n__, to_ptr__);
249#if defined(SIRIUS_GPU)
250 if (is_device_memory(to_mem__) && is_device_memory(from_mem__)) {
275 virtual void free(
void* ptr__) = 0;
280 std::unique_ptr<memory_t_deleter_base_impl> impl_;
283 void operator()(
void* ptr__)
303 inline void free(
void* ptr__)
305 if (M_ != memory_t::none) {
306 sddk::deallocate(ptr__, M_);
332 inline void free(
void* ptr__);
344inline std::unique_ptr<T, memory_t_deleter_base>
347 return std::unique_ptr<T, memory_t_deleter_base>(allocate<T>(n__, M__),
memory_t_deleter(M__));
361#ifdef SIRIUS_USE_MEMORY_POOL
363 umpire::Allocator allocator_;
365 umpire::Allocator memory_pool_allocator_;
372 std::string mem_type;
376 case memory_t::host: {
380 case memory_t::host_pinned: {
384 case memory_t::managed: {
385 mem_type =
"MANAGED";
388 case memory_t::device: {
399 case memory_t::none: {
407#ifdef SIRIUS_USE_MEMORY_POOL
408 if (
M_ != memory_t::none) {
409 auto& rm = umpire::ResourceManager::getInstance();
410 this->allocator_ = rm.getAllocator(mem_type);
412 if (
M_ == memory_t::host) {
413 this->memory_pool_allocator_ =
414 rm.makeAllocator<umpire::strategy::AlignedAllocator>(
"aligned_allocator", this->allocator_, 256);
416 std::transform(mem_type.begin(), mem_type.end(), mem_type.begin(),
417 [](
unsigned char c) { return std::tolower(c); });
418 this->memory_pool_allocator_ =
419 rm.makeAllocator<umpire::strategy::DynamicPoolList>(mem_type +
"_dynamic_pool", allocator_);
426 template <
typename T>
429#if defined(SIRIUS_USE_MEMORY_POOL)
430 if (
M_ == memory_t::none) {
434 return static_cast<T*
>(memory_pool_allocator_.allocate(num_elements__ *
sizeof(T)));
436 return sddk::allocate<T>(num_elements__,
M_);
443#if defined(SIRIUS_USE_MEMORY_POOL)
444 if (
M_ == memory_t::none) {
448 memory_pool_allocator_.deallocate(ptr__);
450 sddk::deallocate(ptr__,
M_);
455 template <
typename T>
458#if defined(SIRIUS_USE_MEMORY_POOL)
459 return std::unique_ptr<T, memory_t_deleter_base>(this->allocate<T>(n__),
memory_pool_deleter(
this));
461 return sddk::get_unique_ptr<T>(n__,
M_);
474 if (
M_ == memory_t::none) {
477#if defined(SIRIUS_USE_MEMORY_POOL)
478 memory_pool_allocator_.release();
491#if defined(SIRIUS_USE_MEMORY_POOL)
492 if (
M_ != memory_t::none) {
493 return memory_pool_allocator_.getActualSize();
502#if defined(SIRIUS_USE_MEMORY_POOL)
503 if (
M_ != memory_t::none) {
504 size_t s = memory_pool_allocator_.getActualSize() - memory_pool_allocator_.getCurrentSize();
514#if defined(SIRIUS_USE_MEMORY_POOL)
515 if (
M_ != memory_t::none) {
516 auto dynamic_pool = umpire::util::unwrap_allocator<umpire::strategy::DynamicPoolList>(allocator_);
517 return dynamic_pool->getBlocksInPool();
524memory_pool_deleter::memory_pool_deleter_impl::free(
void* ptr__)
531sddk::memory_pool& get_memory_pool(sddk::memory_t M__);
534#define mdarray_assert(condition__)
536#define mdarray_assert(condition__) \
538 if (!(condition__)) { \
539 std::stringstream _s; \
540 _s << "Assertion (" << #condition__ << ") failed " \
541 << "at line " << __LINE__ << " of file " << __FILE__ << std::endl \
542 << "array label: " << label_ << std::endl; \
543 for (int i = 0; i < N; i++) { \
544 _s << "dims[" << i << "].size = " << dims_[i].size() << std::endl; \
546 throw std::runtime_error(_s.str()); \
556 using index_type = int64_t;
560 index_type begin_{0};
585 , size_(end_ - begin_ + 1)
587 assert(end_ >= begin_);
592 : begin_(range__.first)
593 , end_(range__.second)
594 , size_(end_ - begin_ + 1)
596 assert(end_ >= begin_);
606 inline index_type
end()
const
617 inline bool check_range([[maybe_unused]] index_type i__)
const
622 if (i__ < begin_ || i__ > end_) {
623 std::cout <<
"index " << i__ <<
" out of range [" << begin_ <<
", " << end_ <<
"]" << std::endl;
658template <
typename T,
int N>
662 using index_type = mdarray_index_descriptor::index_type;
669 std::unique_ptr<T, memory_t_deleter_base> unique_ptr_{
nullptr};
672 T* raw_ptr_{
nullptr};
675 std::unique_ptr<T, memory_t_deleter_base> unique_ptr_device_{
nullptr};
678 T* raw_ptr_device_{
nullptr};
681 std::array<mdarray_index_descriptor, N>
dims_;
691 offsets_[0] = -dims_[0].begin();
693 for (
int i = 1; i < N; i++) {
694 ld *= dims_[i - 1].size();
696 offsets_[0] -= ld * dims_[i].begin();
701 template <
typename... Args>
702 inline index_type
idx(Args... args)
const
704 static_assert(N ==
sizeof...(args),
"wrong number of dimensions");
705 std::array<index_type, N> i = {args...};
707 for (
int j = 0; j < N; j++) {
709 mdarray_assert(dims_[j].check_range(i[j]));
712 size_t idx = offsets_[0] + i[0];
713 for (
int j = 1; j < N; j++) {
714 idx += i[j] * offsets_[j];
716 mdarray_assert(idx >= 0 && idx < size());
721 template <
bool check_assert = true>
726 case memory_t::host_pinned: {
728 mdarray_assert(raw_ptr_ !=
nullptr);
730 return &raw_ptr_[idx__];
732 case memory_t::device: {
735 mdarray_assert(raw_ptr_device_ !=
nullptr);
737 return &raw_ptr_device_[idx__];
739 std::printf(
"error at line %i of file %s: not compiled with GPU support\n", __LINE__, __FILE__);
740 throw std::runtime_error(
"");
744 throw std::runtime_error(
"mdarray::at_idx(): wrong memory type");
751 template <
bool check_assert = true>
754 return const_cast<T*
>(
static_cast<mdarray<T, N> const&
>(*this).
at_idx<check_assert>(mem__, idx__));
758 inline void call_constructor()
761 for (
size_t i = 0; i < size(); i++) {
762 new (raw_ptr_ + i) T();
768 inline void call_destructor()
770 if (!(std::is_trivial<T>::value || is_complex<T>::value)) {
771 for (
size_t i = 0; i < this->size(); i++) {
772 (raw_ptr_ + i)->~T();
792 deallocate(memory_t::host);
793 deallocate(memory_t::device);
797 mdarray(std::array<mdarray_index_descriptor, N>
const dims__,
memory_t memory__ = memory_t::host,
798 std::string label__ =
"")
801 this->init_dimensions(dims__);
802 this->allocate(memory__);
813 static_assert(N == 1,
"wrong number of dimensions");
815 this->label_ = label__;
816 this->init_dimensions({d0});
817 this->allocate(memory__);
823 static_assert(N == 1,
"wrong number of dimensions");
825 this->label_ = label__;
826 this->init_dimensions({d0});
827 this->allocate(mp__);
833 static_assert(N == 1,
"wrong number of dimensions");
835 this->label_ = label__;
836 this->init_dimensions({d0});
837 this->raw_ptr_ = ptr__;
843 static_assert(N == 1,
"wrong number of dimensions");
845 this->label_ = label__;
846 this->init_dimensions({d0});
847 this->raw_ptr_ = ptr__;
849 this->raw_ptr_device_ = ptr_device__;
860 std::string label__ =
"")
862 static_assert(N == 2,
"wrong number of dimensions");
864 this->label_ = label__;
865 this->init_dimensions({d0, d1});
866 this->allocate(memory__);
871 memory_t memory__ = memory_t::host, std::string label__ =
"")
873 static_assert(N == 3,
"wrong number of dimensions");
875 this->label_ = label__;
876 this->init_dimensions({d0, d1, d2});
877 this->allocate(memory__);
884 static_assert(N == 4,
"wrong number of dimensions");
886 this->label_ = label__;
887 this->init_dimensions({d0, d1, d2, d3});
888 this->allocate(memory__);
894 std::string label__ =
"")
896 static_assert(N == 5,
"wrong number of dimensions");
898 this->label_ = label__;
899 this->init_dimensions({d0, d1, d2, d3, d4});
900 this->allocate(memory__);
906 memory_t memory__ = memory_t::host, std::string label__ =
"")
908 static_assert(N == 6,
"wrong number of dimensions");
910 this->label_ = label__;
911 this->init_dimensions({d0, d1, d2, d3, d4, d5});
912 this->allocate(memory__);
918 static_assert(N == 2,
"wrong number of dimensions");
920 this->label_ = label__;
921 this->init_dimensions({d0, d1});
922 this->raw_ptr_ = ptr__;
926 std::string label__ =
"")
928 static_assert(N == 2,
"wrong number of dimensions");
930 this->label_ = label__;
931 this->init_dimensions({d0, d1});
932 this->raw_ptr_ = ptr__;
934 this->raw_ptr_device_ = ptr_device__;
940 std::string label__ =
"")
942 static_assert(N == 2,
"wrong number of dimensions");
944 this->label_ = label__;
945 this->init_dimensions({d0, d1});
946 this->allocate(mp__);
952 static_assert(N == 3,
"wrong number of dimensions");
954 this->label_ = label__;
955 this->init_dimensions({d0, d1, d2});
956 this->raw_ptr_ = ptr__;
959 mdarray(T* ptr__, T* ptr_device__, mdarray_index_descriptor
const& d0, mdarray_index_descriptor
const& d1,
960 mdarray_index_descriptor
const& d2, std::string label__ =
"")
962 static_assert(N == 3,
"wrong number of dimensions");
964 this->label_ = label__;
965 this->init_dimensions({d0, d1, d2});
966 this->raw_ptr_ = ptr__;
968 this->raw_ptr_device_ = ptr_device__;
976 static_assert(N == 3,
"wrong number of dimensions");
978 this->label_ = label__;
979 this->init_dimensions({d0, d1, d2});
980 this->allocate(mp__);
986 static_assert(N == 4,
"wrong number of dimensions");
988 this->label_ = label__;
989 this->init_dimensions({d0, d1, d2, d3});
990 this->raw_ptr_ = ptr__;
993 mdarray(T* ptr__, mdarray_index_descriptor
const& d0, mdarray_index_descriptor
const& d1,
994 mdarray_index_descriptor
const& d2, mdarray_index_descriptor
const& d3, mdarray_index_descriptor
const& d4,
995 std::string label__ =
"")
997 static_assert(N == 5,
"wrong number of dimensions");
999 this->label_ = label__;
1000 this->init_dimensions({d0, d1, d2, d3, d4});
1001 this->raw_ptr_ = ptr__;
1004 mdarray(T* ptr__, mdarray_index_descriptor
const& d0, mdarray_index_descriptor
const& d1,
1005 mdarray_index_descriptor
const& d2, mdarray_index_descriptor
const& d3, mdarray_index_descriptor
const& d4,
1006 mdarray_index_descriptor
const& d5, std::string label__ =
"")
1008 static_assert(N == 6,
"wrong number of dimensions");
1010 this->label_ = label__;
1011 this->init_dimensions({d0, d1, d2, d3, d4, d5});
1012 this->raw_ptr_ = ptr__;
1017 : label_(src.label_)
1018 , unique_ptr_(std::move(src.unique_ptr_))
1019 , raw_ptr_(src.raw_ptr_)
1021 , unique_ptr_device_(std::move(src.unique_ptr_device_))
1022 , raw_ptr_device_(src.raw_ptr_device_)
1025 for (
int i = 0; i < N; i++) {
1026 dims_[i] = src.dims_[i];
1027 offsets_[i] = src.offsets_[i];
1029 src.raw_ptr_ =
nullptr;
1031 src.raw_ptr_device_ =
nullptr;
1039 label_ = src.label_;
1040 unique_ptr_ = std::move(src.unique_ptr_);
1041 raw_ptr_ = src.raw_ptr_;
1042 src.raw_ptr_ =
nullptr;
1044 unique_ptr_device_ = std::move(src.unique_ptr_device_);
1045 raw_ptr_device_ = src.raw_ptr_device_;
1046 src.raw_ptr_device_ =
nullptr;
1048 for (
int i = 0; i < N; i++) {
1049 dims_[i] = src.dims_[i];
1050 offsets_[i] = src.offsets_[i];
1060 if (!this->size()) {
1066 unique_ptr_ = get_unique_ptr<T>(this->size(), memory__);
1067 raw_ptr_ = unique_ptr_.get();
1073 unique_ptr_device_ = get_unique_ptr<T>(this->size(), memory__);
1074 raw_ptr_device_ = unique_ptr_device_.get();
1084 if (!this->size()) {
1090 raw_ptr_ = unique_ptr_.get();
1097 raw_ptr_device_ = unique_ptr_device_.get();
1111 unique_ptr_.reset(
nullptr);
1116 unique_ptr_device_.reset(
nullptr);
1117 raw_ptr_device_ =
nullptr;
1123 template <
typename... Args>
1126 mdarray_assert(raw_ptr_ !=
nullptr);
1127 return raw_ptr_[idx(args...)];
1131 template <
typename... Args>
1134 return const_cast<T&
>(
static_cast<mdarray<T, N> const&
>(*this)(args...));
1140 mdarray_assert(idx__ >= 0 && idx__ < size());
1141 return raw_ptr_[idx__];
1147 return const_cast<T&
>(
static_cast<mdarray<T, N> const&
>(*this)[idx__]);
1150 template <
typename... Args>
1151 inline T
const* at(
memory_t mem__, Args... args)
const
1153 return at_idx(mem__, idx(args...));
1156 template <
typename... Args>
1157 inline T* at(
memory_t mem__, Args... args)
1159 return const_cast<T*
>(
static_cast<mdarray<T, N> const&
>(*this).at(mem__, args...));
1165 return at_idx<false>(mem__, 0);
1171 return const_cast<T*
>(
static_cast<mdarray<T, N> const&
>(*this).at(mem__));
1176 assert(raw_ptr_ !=
nullptr);
1180 const T* host_data()
const
1182 assert(raw_ptr_ !=
nullptr);
1188#if defined(SIRIUS_GPU)
1189 assert(raw_ptr_device_ !=
nullptr);
1190 return raw_ptr_device_;
1192 throw std::runtime_error(
"not compiled with GPU support");
1196 const T* device_data()
const
1198#if defined(SIRIUS_GPU)
1199 assert(raw_ptr_device_ !=
nullptr);
1200 return raw_ptr_device_;
1202 throw std::runtime_error(
"not compiled with GPU support");
1211 for (
int i = 0; i < N; i++) {
1212 size_ *= dims_[i].size();
1221 mdarray_assert(i < N);
1222 return dims_[i].size();
1228 mdarray_assert(i < N);
1233 inline uint32_t
ld()
const
1235 mdarray_assert(dims_[0].size() <
size_t(1 << 31));
1237 return (int32_t)dims_[0].size();
1242 inline uint64_t
hash(uint64_t h__ = 5381)
const
1244 for (
size_t i = 0; i < size() *
sizeof(T); i++) {
1245 h__ = ((h__ << 5) + h__) + ((
unsigned char*)raw_ptr_)[i];
1255 for (
size_t i = 0; i < size__; i++) {
1256 cs += raw_ptr_[idx0__ + i] *
static_cast<double>((i & 0xF) - 8);
1265 for (
size_t i = 0; i < size__; i++) {
1266 cs += raw_ptr_[idx0__ + i];
1271 inline T checksum()
const
1273 return checksum(0, size());
1278 return this->at(memory_t::host);
1281 inline T
const* begin()
const
1283 return this->at(memory_t::host);
1288 return this->at(memory_t::host) + this->size();
1291 inline T
const* end()
const
1293 return this->at(memory_t::host) + this->size();
1318 mdarray_assert(idx0__ + n__ <= size());
1320 mdarray_assert(raw_ptr_ !=
nullptr);
1322 std::memset((
void*)&raw_ptr_[idx0__], 0, n__ *
sizeof(T));
1326 mdarray_assert(raw_ptr_device_ !=
nullptr);
1327 acc::zero(&raw_ptr_device_[idx0__], n__);
1335 this->
zero(mem__, 0, size());
1345 mdarray_assert(raw_ptr_ !=
nullptr);
1346 mdarray_assert(raw_ptr_device_ !=
nullptr);
1347 mdarray_assert(idx0__ + n__ <= size());
1349 throw std::runtime_error(
1350 "mdarray::copy_to(): memory is both host and device, check what to do with this case");
1356 acc::copyin(&raw_ptr_device_[idx0__], &raw_ptr_[idx0__], n__);
1359 acc::copyin(&raw_ptr_device_[idx0__], &raw_ptr_[idx0__], n__, sid);
1366 acc::copyout(&raw_ptr_[idx0__], &raw_ptr_device_[idx0__], n__);
1369 acc::copyout(&raw_ptr_[idx0__], &raw_ptr_device_[idx0__], n__, sid);
1378 this->copy_to(mem__, 0, size(), sid);
1385 return (raw_ptr_device_ !=
nullptr);
1396 inline bool on_host()
const
1398 return (raw_ptr_ !=
nullptr);
1401 mdarray<T, N>& operator=(std::function<T(
void)> f__)
1403 for (
size_t i = 0; i < this->size(); i++) {
1409 mdarray<T, N>& operator=(std::function<T(index_type)> f__)
1411 static_assert(N == 1,
"wrong number of dimensions");
1413 for (index_type i0 = this->dims_[0].begin(); i0 <= this->dims_[0].end(); i0++) {
1414 (*this)(i0) = f__(i0);
1419 mdarray<T, N>& operator=(std::function<T(index_type, index_type)> f__)
1421 static_assert(N == 2,
"wrong number of dimensions");
1423 for (index_type i1 = this->dims_[1].begin(); i1 <= this->dims_[1].end(); i1++) {
1424 for (index_type i0 = this->dims_[0].begin(); i0 <= this->dims_[0].end(); i0++) {
1425 (*this)(i0, i1) = f__(i0, i1);
1433template <
typename T>
1434using matrix = mdarray<T, 2>;
1437template <
typename T,
int N>
1443 for (
size_t i = 1; i < v.size(); i++) {
1444 out << std::string(
" ") << v[i];
1451template <
typename T,
typename F,
int N>
1455 if (src__.
size() == 0) {
1458 for (
int i = 0; i < N; i++) {
1460 std::stringstream s;
1461 s <<
"error at line " << __LINE__ <<
" of file " << __FILE__ <<
" : array dimensions don't match";
1462 throw std::runtime_error(s.str());
1465 std::cout <<
"=== WARNING at line " << __LINE__ <<
" of file " << __FILE__ <<
" ===" << std::endl;
1466 std::cout <<
" Copying matrix element with different type, possible loss of data precision" << std::endl;
1467 std::copy(&src__.at(memory_t::host)[0], &src__.at(memory_t::host)[0] + src__.
size(), &dest__.at(memory_t::host)[0]);
1478template <
typename T,
int N>
1482 if (src__.
size() == 0) {
1485 for (
int i = 0; i < N; i++) {
1487 std::stringstream s;
1488 s <<
"error at line " << __LINE__ <<
" of file " << __FILE__ <<
" : array dimensions don't match";
1489 throw std::runtime_error(s.str());
1492 std::copy(&src__[0], &src__[0] + src__.
size(), &dest__[0]);
1496template <
typename T,
int N>
1505 acc::copy(dst.device_data(), src.device_data(), src.
size());
1508 if (dst.on_host()) {
1509 std::copy(src.host_data(), src.host_data() + dst.
size(), dst.host_data());
1514template <
typename T,
int N>
1519 if (src.
size() == 0) {
1525 if (device == device_t::GPU) {
1527 acc::copy(dst.device_data(), src.device_data(), dst.
size());
1528 }
else if (device == device_t::CPU) {
1529 assert(src.on_host() && dst.on_host());
1530 std::copy(src.host_data(), src.host_data() + dst.
size(), dst.host_data());
1534template <
class numeric_t, std::size_t... Ts>
1536_empty_like_inner(std::index_sequence<Ts...>& seq [[maybe_unused]], std::size_t (&dims)[
sizeof...(Ts)],
1539 if (mempool ==
nullptr) {
1540 return mdarray<numeric_t,
sizeof...(Ts)>{dims[Ts]...};
1542 mdarray<numeric_t,
sizeof...(Ts)> out{dims[Ts]...};
1548template <
typename T,
int N>
1550empty_like(
const mdarray<T, N>& src)
1552 auto I = std::make_index_sequence<N>{};
1553 std::size_t dims[N];
1554 for (
int i = 0; i < N; ++i) {
1555 dims[i] = src.size(i);
1557 return _empty_like_inner<T>(I, dims,
nullptr);
1560template <
typename T,
int N>
1562empty_like(
const mdarray<T, N>& src, memory_pool& mempool)
1564 auto I = std::make_index_sequence<N>{};
1565 std::size_t dims[N];
1566 for (
int i = 0; i < N; ++i) {
1567 dims[i] = src.size(i);
1569 return _empty_like_inner<T>(I, dims, &mempool);
Interface to accelerators API.
Helper class to wrap stream id (integer number).
Index descriptor of mdarray.
index_type end() const
Return last index value.
mdarray_index_descriptor(std::pair< int, int > const range__)
Constructor for index range [begin, end].
mdarray_index_descriptor()
Constructor of empty descriptor.
mdarray_index_descriptor(size_t const size__)
Constructor for index range [0, size).
size_t size() const
Return index size.
index_type begin() const
Return first index value.
mdarray_index_descriptor(index_type const begin__, index_type const end__)
Constructor for index range [begin, end].
Multidimensional array with the column-major (Fortran) order.
T const * at_idx(memory_t mem__, index_type const idx__) const
Return cosnt pointer to an element at a given index.
mdarray< T, N > & operator=(mdarray< T, N > const &src)=delete
Assignment operator is forbidden.
mdarray(mdarray_index_descriptor const &d0, mdarray_index_descriptor const &d1, mdarray_index_descriptor const &d2, memory_pool &mp__, std::string label__="")
3D array with memory pool allocation.
mdarray(mdarray_index_descriptor const &d0, mdarray_index_descriptor const &d1, mdarray_index_descriptor const &d2, mdarray_index_descriptor const &d3, mdarray_index_descriptor const &d4, memory_t memory__=memory_t::host, std::string label__="")
5D array with memory allocation.
mdarray< T, N > & allocate(memory_pool &mp__)
Allocate memory from the pool.
mdarray_index_descriptor dim(int i) const
Return a descriptor of a dimension.
mdarray(mdarray_index_descriptor const &d0, mdarray_index_descriptor const &d1, mdarray_index_descriptor const &d2, memory_t memory__=memory_t::host, std::string label__="")
3D array with memory allocation.
T * at_idx(memory_t mem__, index_type const idx__)
Return pointer to an element at a given index.
index_type idx(Args... args) const
Return linear index in the range [0, size) by the N-dimensional indices (i0, i1, ....
void copy_to(memory_t mem__, acc::stream_id sid=acc::stream_id(-1))
Copy entire array from one memory type to another.
mdarray(T *ptr__, mdarray_index_descriptor const &d0, std::string label__="")
1D array with host pointer wrapper.
uint64_t hash(uint64_t h__=5381) const
Compute hash of the array.
T & operator[](size_t const idx__)
Access operator[] for the elements of multidimensional array using a linear index in the range [0,...
void copy_to(memory_t mem__, size_t idx0__, size_t n__, acc::stream_id sid=acc::stream_id(-1))
Copy n elements starting from idx0 from one memory type to another.
mdarray(mdarray_index_descriptor const &d0, memory_t memory__=memory_t::host, std::string label__="")
1D array with memory allocation.
T const & operator()(Args... args) const
Access operator() for the elements of multidimensional array.
mdarray(mdarray_index_descriptor const &d0, memory_pool &mp__, std::string label__="")
1D array with memory pool allocation.
std::array< index_type, N > offsets_
List of offsets to compute the element location by dimension indices.
void zero(memory_t mem__, size_t idx0__, size_t n__)
Zero n elements starting from idx0.
mdarray()
Default constructor.
mdarray(mdarray< T, N > const &src)=delete
Copy constructor is forbidden.
uint32_t ld() const
Return leading dimension size.
mdarray< T, N > & operator=(mdarray< T, N > &&src)
Move assignment operator.
T const & operator[](size_t const idx__) const
Access operator[] for the elements of multidimensional array using a linear index in the range [0,...
mdarray(mdarray< T, N > &&src)
Move constructor.
mdarray(T *ptr__, T *ptr_device__, mdarray_index_descriptor const &d0, std::string label__="")
1D array with host and device pointer wrapper.
void zero(memory_t mem__=memory_t::host)
Zero the entire array.
mdarray(std::array< mdarray_index_descriptor, N > const dims__, memory_t memory__=memory_t::host, std::string label__="")
N-dimensional array with index bounds.
std::string label_
Optional array label.
mdarray(mdarray_index_descriptor const &d0, mdarray_index_descriptor const &d1, mdarray_index_descriptor const &d2, mdarray_index_descriptor const &d3, mdarray_index_descriptor const &d4, mdarray_index_descriptor const &d5, memory_t memory__=memory_t::host, std::string label__="")
6D array with memory allocation.
T * at(memory_t mem__)
Return pointer to the beginning of array.
T & operator()(Args... args)
Access operator() for the elements of multidimensional array.
std::array< mdarray_index_descriptor, N > dims_
Array dimensions.
T const * at(memory_t mem__) const
Return pointer to the beginning of array.
mdarray(mdarray_index_descriptor const &d0, mdarray_index_descriptor const &d1, memory_t memory__=memory_t::host, std::string label__="")
2D array with memory allocation.
void init_dimensions(std::array< mdarray_index_descriptor, N > const dims__)
Initialize the offsets used to compute the index of the elements.
void deallocate(memory_t memory__)
Deallocate host or device memory.
mdarray< T, N > & allocate(memory_t memory__)
Allocate memory for array.
size_t size(int i) const
Return size of particular dimension.
mdarray(mdarray_index_descriptor const &d0, mdarray_index_descriptor const &d1, mdarray_index_descriptor const &d2, mdarray_index_descriptor const &d3, memory_t memory__=memory_t::host, std::string label__="")
4D array with memory allocation.
mdarray(T *ptr__, mdarray_index_descriptor const &d0, mdarray_index_descriptor const &d1, std::string label__="")
Wrap a pointer into 2D array.
T checksum(size_t idx0__, size_t size__) const
Compute checksum.
bool on_device() const
Check if device pointer is available.
T checksum_w(size_t idx0__, size_t size__) const
Compute weighted checksum.
size_t size() const
Return total size (number of elements) of the array.
mdarray(mdarray_index_descriptor const &d0, mdarray_index_descriptor const &d1, memory_pool &mp__, std::string label__="")
2D array with memory pool allocation.
Deleter for the allocated memory pointer from a given memory pool.
memory_t M_
Type of memory that is handeled by this pool.
size_t total_size() const
Return the total capacity of the memory pool.
size_t free_size() const
Get the total free size of the memory pool.
memory_t memory_type() const
Return the type of memory this pool is managing.
memory_pool(memory_t M__)
Constructor.
size_t num_blocks() const
Get the number of free memory blocks.
std::unique_ptr< T, memory_t_deleter_base > get_unique_ptr(size_t n__)
Return a unique pointer to the allocated memory.
void free(void *ptr__)
Delete a pointer and add its memory back to the pool.
void reset()
Free all the allocated blocks. umpire does not support this.
T * allocate(size_t num_elements__)
Return a pointer to a memory block for n elements of type T.
void clear()
shrink the memory pool and release all memory.
Base class for smart pointer deleters.
Deleter for the allocated memory pointer of a given type.
device_t get_device_t(memory_t mem__)
Get type of device by memory type.
device_t
Type of the main processing unit.
@ GPU
GPU device (with CUDA programming model).
bool is_device_memory(memory_t mem__)
Check if this is a valid device memory (memory, accessible by the device).
memory_t
Memory types where the code can store data.
@ host_pinned
Pinned host memory. This is host memory + extra bit flag.
@ managed
Managed memory (accessible from both host and device).
std::unique_ptr< T, memory_t_deleter_base > get_unique_ptr(size_t n__, memory_t M__)
Allocate n elements and return a unique pointer.
memory_t get_memory_t(std::string name__)
Get a memory type from a string.
void auto_copy(mdarray< T, N > &dst, const mdarray< T, N > &src)
Copy all memory present on destination.
bool is_host_memory(memory_t mem__)
Check if this is a valid host memory (memory, accessible by the host).
void deallocate(void *ptr__)
Deallocate GPU memory.
void deallocate_host(void *ptr__)
Deallocate host memory.
int get_device_id()
Get current device ID.
void copyout(T *target__, T const *source__, size_t n__)
Copy memory from device to host.
void copyin(T *target__, T const *source__, size_t n__)
Copy memory from host to device.
void copy(T *target__, T const *source__, size_t n__)
Copy memory inside a device.
void zero(T *ptr__, size_t n__)
Zero the device memory.
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > transform(::spla::Context &spla_ctx__, sddk::memory_t mem__, la::dmatrix< F > const &M__, int irow0__, int jcol0__, real_type< F > alpha__, Wave_functions< T > const &wf_in__, spin_index s_in__, band_range br_in__, real_type< F > beta__, Wave_functions< T > &wf_out__, spin_index s_out__, band_range br_out__)
Apply linear transformation to the wave-functions.
Namespace of the SIRIUS library.
std::ostream & operator<<(std::ostream &out, hbar &&b)
Inject horisontal bar to ostream.
Check is the type is a complex number; by default it is not.