MAIA bb96820c
Multiphysics at AIA
Loading...
Searching...
No Matches
parallelfor.h
Go to the documentation of this file.
1// Copyright (C) 2024 The m-AIA AUTHORS
2//
3// This file is part of m-AIA (https://git.rwth-aachen.de/aia/m-AIA/m-AIA)
4//
5// SPDX-License-Identifier: LGPL-3.0-only
6
7#ifndef PARALLELFOR
8#define PARALLELFOR
9
10#include "INCLUDE/maiatypes.h"
11#include "compiler_config.h"
12
13#ifdef MAIA_PSTL
14#include <algorithm>
15#include <execution>
16#ifdef MAIA_NVHPC_COMPILER
17// WAR: https://nvbugs/3285841
18#include <thrust/iterator/counting_iterator.h>
19#else
20#include <cstddef>
21#include <iterator>
22#endif
23#endif
24
25#if !defined(CHUNK_SIZE)
26#define CHUNK_SIZE (4096)
27#endif
28
29namespace maia {
30
31#if defined(MAIA_PSTL) && !defined(MAIA_NVHPC_COMPILER)
32
46 using iterator_category = std::forward_iterator_tag;
47 using difference_type = std::ptrdiff_t;
49 using pointer = MInt*;
50 using reference = MInt&;
51
52 RangeIterator(value_type value) : m_value(value) {}
53
54 // FIXME: This might be critical as (pointer const ..) is cast to (pointer ..)
57 return *p;
58 }
59 pointer operator->() { return &m_value; }
60
61 // Pre-/Postfix increment
63 m_value++;
64 return *this;
65 }
67 RangeIterator tmp = *this;
68 ++(*this);
69 return tmp;
70 }
71 // Pre-/Postfix decrement
73 m_value--;
74 return *this;
75 }
77 RangeIterator tmp = *this;
78 --(*this);
79 return tmp;
80 }
81
82 // some operator
83 friend MBool operator==(const RangeIterator& a, const RangeIterator& b) { return a.m_value == b.m_value; };
84 friend MBool operator!=(const RangeIterator& a, const RangeIterator& b) { return a.m_value != b.m_value; };
85 friend MBool operator>(const RangeIterator& a, const RangeIterator& b) { return a.m_value > b.m_value; };
86
87 private:
89};
90
91#endif // defined(MAIA_PSTL) && !defined(MAIA_NVHPC_COMPILER)
92
98template <class UnaryFunction>
99inline void parallelFor_base(MInt begin, MInt end, UnaryFunction&& f) {
100#if defined(_OPENMP)
101#pragma omp parallel for schedule(static, CHUNK_SIZE) default(none) shared(begin, end, f)
102#endif
103 for(MInt i = begin; i < end; i++) {
104 f(i);
105 }
106}
107
113template <class UnaryFunction>
114inline void parallelFor_pstl(MInt begin, MInt end, UnaryFunction&& f) {
115#if defined(MAIA_PSTL)
116#if defined(MAIA_NVHPC_COMPILER)
117 // WAR: https://nvbugs/3285841
118 // TODO labels:gpu So far this only works with begin == 0. The
119 // RangeIterator is not working for nvhpc's pstl implementation, yet.
120 auto begin_ = thrust::counting_iterator(MInt{begin});
121 auto end_ = end;
122#else
123 auto begin_ = RangeIterator(begin);
124 auto end_ = RangeIterator(end);
125#endif
126 // TODO miro: GCC: How to trigger the usage of more threads? Currently in my
127 // case it using only 1 thread. Hence, OpenMP is performing better
128 std::for_each_n(std::execution::par_unseq, begin_, end_, f);
129#else /* defined(MAIA_PSTL) */
130 parallelFor_base(begin, end, f);
131#endif
132}
133
146template <MBool portedToGpu = false, class UnaryFunction>
147inline void parallelFor(MInt begin, MInt end, UnaryFunction&& f) {
148 if constexpr(portedToGpu) {
149 parallelFor_pstl(begin, end, f);
150 } else {
151 parallelFor_base(begin, end, f);
152 }
153}
154
160template <class UnaryFunction, class T>
161inline void parallelFor_base(const std::vector<T>& container, UnaryFunction&& f) {
162 const MInt end = container.size();
163#if defined(_OPENMP)
164#pragma omp parallel for schedule(static, CHUNK_SIZE) default(none) shared(end, f, container)
165#endif
166 for(MInt i = 0; i < end; i++) {
167 f(container[i]);
168 }
169}
170
176template <class UnaryFunction, class T>
177inline void parallelFor_pstl(const std::vector<T>& container, UnaryFunction&& f) {
178#if defined(MAIA_PSTL)
179 std::for_each_n(std::execution::par_unseq, container.begin(), container.end(), f);
180#else
181 parallelFor_base(container, f);
182#endif
183}
184
196template <MBool portedToGpu = false, class UnaryFunction, class T>
197inline void parallelFor(const std::vector<T>& container, UnaryFunction&& f) {
198 if constexpr(portedToGpu) {
199 parallelFor_pstl(container, f);
200 } else {
201 parallelFor_base(container, f);
202 }
203}
204
205
211template <MInt nDim, class UnaryFunction>
212inline void parallelFor_base(std::array<MInt, nDim> begin, std::array<MInt, nDim> end, UnaryFunction&& f) {
213 if constexpr(nDim == 3) {
214#if defined(_OPENMP)
215#pragma omp parallel for schedule(static, CHUNK_SIZE) default(none) shared(begin, end, f) collapse(3)
216#endif
217 for(MInt k = begin[2]; k < end[2]; k++) {
218 for(MInt j = begin[1]; j < end[1]; j++) {
219 for(MInt i = begin[0]; i < end[0]; i++) {
220 f(i, j, k);
221 }
222 }
223 }
224 } else if constexpr(nDim == 2) {
225#if defined(_OPENMP)
226#pragma omp parallel for schedule(static, CHUNK_SIZE) default(none) shared(begin, end, f) collapse(2)
227#endif
228 for(MInt j = begin[1]; j < end[1]; j++) {
229 for(MInt i = begin[0]; i < end[0]; i++) {
230 f(i, j);
231 }
232 }
233 } else {
234 mTerm(1, AT_, "Only nDim==2 and nDim==3 supported");
235 }
236}
237
243template <MInt nDim, class UnaryFunction>
244inline void parallelFor_pstl(std::array<MInt, nDim> begin, std::array<MInt, nDim> end, UnaryFunction&& f) {
245#if defined(MAIA_PSTL)
246 std::array<MInt, nDim> size{};
247 const MInt beginI = 0;
248 MInt endI = 1;
249 for(MInt dim = 0; dim < nDim; ++dim) {
250 size[dim] = end[dim] - begin[dim];
251 endI *= size[dim];
252 }
253#if defined(MAIA_NVHPC_COMPILER)
254 // WAR: https://nvbugs/3285841
255 // TODO labels:gpu So far this only works with begin == 0. The
256 // RangeIterator is not working for nvhpc's pstl implementation, yet.
257 auto begin_ = thrust::counting_iterator(MInt{beginI});
258 auto end_ = endI;
259#else
260 auto begin_ = RangeIterator(beginI);
261 auto end_ = RangeIterator(endI);
262#endif
263
264 if constexpr(nDim == 3) {
265 std::for_each_n(std::execution::par_unseq, begin_, end_, [=](auto& I) {
266 const MInt k = (I / (size[0] * size[1])) + begin[2];
267 const MInt j = ((I - k * size[0] * size[1]) / size[0]) + begin[1];
268 const MInt i = (I % size[0]) + begin[0];
269
270 f(i, j, k);
271 });
272 } else if constexpr(nDim == 2) {
273 std::for_each_n(std::execution::par_unseq, begin_, end_, [=](auto& I) {
274 const MInt j = (I / size[0]) + begin[1];
275 const MInt i = (I % size[0]) + begin[0];
276
277 f(i, j);
278 });
279 }
280#else /* defined(MAIA_PSTL) */
281 parallelFor_base<nDim>(begin, end, f);
282#endif
283}
284
285
305template <MBool portedToGpu = false, MInt nDim, class UnaryFunction>
306inline void parallelFor(std::array<MInt, nDim> begin, std::array<MInt, nDim> end, UnaryFunction&& f) {
307 if constexpr(portedToGpu) {
308 parallelFor_pstl<nDim>(begin, end, f);
309 } else {
310 parallelFor_base<nDim>(begin, end, f);
311 }
312}
313
314} // namespace maia
315#endif /* PARALLELFOR */
void mTerm(const MInt errorCode, const MString &location, const MString &message)
Definition: functions.cpp:29
int32_t MInt
Definition: maiatypes.h:62
bool MBool
Definition: maiatypes.h:58
Namespace for auxiliary functions/classes.
void parallelFor_base(MInt begin, MInt end, UnaryFunction &&f)
Wrapper function for parallel for loop (no PSTL)
Definition: parallelfor.h:99
void parallelFor_pstl(MInt begin, MInt end, UnaryFunction &&f)
Wrapper function for parallel for loop (PSTL)
Definition: parallelfor.h:114
void parallelFor(MInt begin, MInt end, UnaryFunction &&f)
Wrapper function for parallel for loop.
Definition: parallelfor.h:147
Definition: contexttypes.h:19
Dummy iterator class.
Definition: parallelfor.h:45
std::ptrdiff_t difference_type
Definition: parallelfor.h:47
RangeIterator operator++(MInt)
Definition: parallelfor.h:66
RangeIterator(value_type value)
Definition: parallelfor.h:52
RangeIterator & operator--()
Definition: parallelfor.h:72
friend MBool operator!=(const RangeIterator &a, const RangeIterator &b)
Definition: parallelfor.h:84
reference operator*() const
Definition: parallelfor.h:55
RangeIterator operator--(MInt)
Definition: parallelfor.h:76
pointer operator->()
Definition: parallelfor.h:59
RangeIterator & operator++()
Definition: parallelfor.h:62
friend MBool operator==(const RangeIterator &a, const RangeIterator &b)
Definition: parallelfor.h:83
friend MBool operator>(const RangeIterator &a, const RangeIterator &b)
Definition: parallelfor.h:85
std::forward_iterator_tag iterator_category
Definition: parallelfor.h:46