MAIA bb96820c
Multiphysics at AIA
Loading...
Searching...
No Matches
maia::grid Namespace Reference

Namespaces

namespace  cell
 
namespace  hilbert
 
namespace  tree
 

Classes

class  Controller
 Grid controller manages adaptation and load balancing in multi-solver environment. More...
 
class  IO
 
class  Proxy
 
struct  SolverTimers
 
struct  Timers
 

Functions

template<typename IdType >
void partitionCellToGlobalOffsets (const IdType *const partitionCellOffsets, const IdType *const partitionCellToGlobalIds, const IdType noDomains, IdType *const globalIdOffsets)
 Translates a list of partition cell offsets to global offsets. More...
 
template<typename WeightType , typename IdType >
WeightType optimalPartitioningSerial (const WeightType *const weights, const IdType noWeights, const IdType noPartitions, IdType *const offsets)
 Serial algorithm to find the optimal (not ideal) partitioning with given workloads based on a probing algorithm with bisection. More...
 
template<typename WeightType , typename IdType >
void heuristicPartitioningParallel (const WeightType *const localWeights, const IdType noLocalWeights, const MLong localOffset, const MLong noGlobalWeights, const WeightType totalWorkload, const IdType noGlobalPartitions, const IdType globalPartitionId, const IdType noGroups, const MPI_Comm comm, MPI_Comm &commGroup, MPI_Comm &commMasters, IdType &groupId, IdType &groupLocalId, MLong *const groupOffsets)
 Fully parallel partitioning into <noGroups> partitions based on a heuristical approach. More...
 
template<typename WeightType , typename IdType >
void partitionParallelSplit (const WeightType *const localWeights, const IdType noLocalWeights, const IdType localWeightOffset, const IdType noDomains, const IdType domainId, const MPI_Comm mpiComm, IdType *const offsets)
 

Function Documentation

◆ heuristicPartitioningParallel()

template<typename WeightType , typename IdType >
void maia::grid::heuristicPartitioningParallel ( const WeightType *const  localWeights,
const IdType  noLocalWeights,
const MLong  localOffset,
const MLong  noGlobalWeights,
const WeightType  totalWorkload,
const IdType  noGlobalPartitions,
const IdType  globalPartitionId,
const IdType  noGroups,
const MPI_Comm  comm,
MPI_Comm &  commGroup,
MPI_Comm &  commMasters,
IdType &  groupId,
IdType &  groupLocalId,
MLong *const  groupOffsets 
)
Author
Lennart Schneiders
Date
07.2017

Creates a rough estimate of the domain boundaries based on heuristics. This is used to create a coarse partitioning into several groups. Within the groups, a more accurate partition algorithm will be executed at a later stage. Note that for noGroups=1 one obtains a domain decompositioning similar to the one we had previously. For details on the overall scheme see 'Scalable high-quality 1D partitioning', by M. Lieber and W.E. Nagel (HPCS 2014)

Definition at line 225 of file partition.h.

230 {
231 // create some sub-communicators for later local data processing
232 const IdType noRanksPerGroup = noGlobalPartitions / noGroups;
233 groupId = (globalPartitionId >= noGroups * noRanksPerGroup) ? noGroups - 1 : globalPartitionId / noRanksPerGroup;
234 groupLocalId = globalPartitionId - groupId * noRanksPerGroup;
235 MBool isGroupMaster = (groupLocalId == 0);
236 IdType masterTag = isGroupMaster ? 0 : MPI_UNDEFINED;
237
238 MPI_Comm_split(comm, groupId, globalPartitionId, &commGroup, AT_, "commGroup");
239 MPI_Comm_split(comm, masterTag, globalPartitionId, &commMasters, AT_, "commMaster");
240
241 // compute global workload offsets/prefixes and the weights of the various groups according to their number of ranks
242 WeightType localOffset0 = F0;
243 WeightType localWorkload = F0;
244 for(IdType i = 0; i < noLocalWeights; i++) {
245 localWorkload += localWeights[i];
246 }
247 MPI_Exscan(&localWorkload, &localOffset0, 1, MPI_DOUBLE, MPI_SUM, comm, AT_, "localWorkload", "localOffset0");
248 ScratchSpace<WeightType> groupWeight(noGroups, AT_, "groupWeight");
249 for(IdType i = 0; i < noGroups; i++)
250 groupWeight[i] = ((WeightType)(i * noRanksPerGroup)) / ((WeightType)noGlobalPartitions);
251
252 // check heuristically for partition boundaries within my local data range and store it
253 std::vector<std::pair<IdType, MLong>> localPartitionCellOffsets;
254 localWorkload = localOffset0;
255 for(IdType i = 0; i < noLocalWeights; i++) {
256 WeightType f0 = localWorkload / totalWorkload;
257 WeightType f1 = (localWorkload + localWeights[i]) / totalWorkload;
258 for(IdType g = 1; g < noGroups; g++) {
259 if(f0 <= groupWeight(g) && f1 > groupWeight(g)) {
260 // localPartitionCellOffsets.push_back( make_pair(g,localOffset+i+1) );
261 if(fabs(groupWeight(g) - f0) < fabs(f1 - groupWeight(g)))
262 localPartitionCellOffsets.push_back(std::make_pair(g, localOffset + i));
263 else
264 localPartitionCellOffsets.push_back(std::make_pair(g, localOffset + i + 1));
265 }
266 }
267 localWorkload += localWeights[i];
268 }
269
270 // send all local partition offsets to rank 0
271 IdType recvCount = 0;
272 IdType noOffsetsFound = (signed)localPartitionCellOffsets.size();
273 if(noOffsetsFound > 0) {
274 MLongScratchSpace sendBuf(noOffsetsFound, 2, AT_, "sendBuf");
275 for(IdType i = 0; i < noOffsetsFound; i++) {
276 sendBuf(i, 0) = localPartitionCellOffsets[i].first;
277 sendBuf(i, 1) = localPartitionCellOffsets[i].second;
278 }
279 if(globalPartitionId == 0) {
280 for(IdType i = 0; i < noOffsetsFound; i++) {
281 groupOffsets[sendBuf(i, 0)] = sendBuf(i, 1);
282 recvCount++;
283 }
284 } else {
285 MPI_Send(&sendBuf[0], 2 * noOffsetsFound, MPI_LONG, 0, 100, comm, AT_, "sendBuf[0]");
286 }
287 }
288
289 // rank 0 receives all global offsets
290 if(globalPartitionId == 0) {
291 const WeightType time0 = MPI_Wtime();
292 WeightType time1 = MPI_Wtime();
293 groupOffsets[0] = 0;
294 groupOffsets[noGroups] = noGlobalWeights;
295 while(recvCount < noGroups - 1) {
296 MPI_Status status;
297 MInt recvSize;
298 MInt flag;
299 MPI_Iprobe(MPI_ANY_SOURCE, 100, comm, &flag, &status);
300 if(flag) {
301 MPI_Get_count(&status, MPI_LONG, &recvSize, AT_);
302 MInt source = status.MPI_SOURCE;
303 MInt recvOffsets = recvSize / 2;
304 MLongScratchSpace recvBuf(recvOffsets, 2, AT_, "recvBuf");
305 MPI_Recv(&recvBuf[0], recvSize, MPI_LONG, source, 100, comm, &status, AT_, "recvBuf[0]");
306 for(IdType i = 0; i < recvOffsets; i++) {
307 groupOffsets[recvBuf(i, 0)] = recvBuf(i, 1);
308 }
309 recvCount += recvOffsets;
310 }
311 if((MInt)((MPI_Wtime() - time0) / 10) > (MInt)((time1 - time0) / 10)) {
312 std::cerr << "Rank " << globalPartitionId << " already waiting " << MPI_Wtime() - time0
313 << " seconds for incoming group offsets..." << std::endl;
314 }
315 time1 = MPI_Wtime();
316 }
317 }
318
319 // distribute the offsets
320 MPI_Bcast(&groupOffsets[0], noGroups + 1, MPI_LONG, 0, comm, AT_, "groupOffsets[0]");
321}
This class is a ScratchSpace.
Definition: scratch.h:758
int32_t MInt
Definition: maiatypes.h:62
bool MBool
Definition: maiatypes.h:58
int MPI_Recv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Status *status, const MString &name, const MString &varname)
same as MPI_Recv
int MPI_Get_count(const MPI_Status *status, MPI_Datatype datatype, int *count, const MString &name)
same as MPI_Get_count
int MPI_Exscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, const MString &name, const MString &sndvarname, const MString &rcvvarname)
same as MPI_Exscan
int MPI_Comm_split(MPI_Comm comm, int color, int key, MPI_Comm *newcomm, const MString &name, const MString &varname)
same as MPI_Comm_split, but updates the number of MPI communicators
int MPI_Iprobe(int source, int tag, MPI_Comm comm, int *flag, MPI_Status *status, const MString &name)
Iprobe MPI to get status without actually receiving the message.
int MPI_Bcast(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm, const MString &name, const MString &varname)
same as MPI_Bcast
int MPI_Send(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, const MString &name, const MString &varname)
same as MPI_Send

◆ optimalPartitioningSerial()

template<typename WeightType , typename IdType >
WeightType maia::grid::optimalPartitioningSerial ( const WeightType *const  weights,
const IdType  noWeights,
const IdType  noPartitions,
IdType *const  offsets 
)
Author
Lennart Schneiders
Date
07.2017 The algorithm guarantees to find the optimal workload distribution. To speed up, the termination criterion <eps> is used to stop the bisection iterations after the partitioning has converged to a workload imbalance not more that 0.01% above its optimal value. For details on the overall scheme see 'Scalable high-quality 1D partitioning', by M. Lieber and W.E. Nagel (HPCS 2014)

Definition at line 61 of file partition.h.

62 {
63 WeightType totalWorkload = F0;
64 WeightType maxWorkload = F0;
65 ScratchSpace<WeightType> prefixSum(noWeights + 1, AT_, "prefixSum");
66 prefixSum(0) = F0;
67 for(IdType i = 0; i < noWeights; i++) {
68 prefixSum(i + 1) = prefixSum(i) + weights[i];
69 maxWorkload = mMax(maxWorkload, weights[i]);
70 }
71 totalWorkload = prefixSum(noWeights);
72 WeightType optimalWorkload = totalWorkload / ((WeightType)noPartitions);
73 ScratchSpace<IdType> offsetTmp(noPartitions + 1, AT_, "offsetTmp");
74 WeightType eps = mMin(0.0001 * optimalWorkload, 0.5 * maxWorkload / optimalWorkload);
75 // WeightType eps = 0.0001*optimalWorkload;
76 IdType noPartitionsOpt = 0;
77 WeightType maxPartitionWorkload = F0;
78 for(IdType i = 0; i <= noPartitions; i++) {
79 offsets[i] = -1;
80 }
81 WeightType lowerVal = mMax(maxWorkload, optimalWorkload);
82 WeightType upperVal = lowerVal + mMax(maxWorkload, F2 * eps);
83 WeightType probeWorkload = lowerVal; // try the optimal workload first
84 WeightType workloadUsed = -F1;
85 MInt itCnt = 0;
86 if(noWeights <= noPartitions) {
87 std::cerr << "Warning: less or equal number of weights than number of partitions!" << std::endl;
88 for(IdType i = 0; i <= noPartitions; i++) {
89 offsets[i] = mMin(noPartitions, i);
90 }
91 for(IdType i = 0; i < noPartitions; i++) {
92 maxPartitionWorkload = mMax(maxPartitionWorkload, weights[i]);
93 }
94 return maxPartitionWorkload;
95 }
96 while(upperVal - lowerVal > eps) {
97 offsetTmp.fill(-1);
98 offsetTmp(0) = 0;
99 IdType id = noWeights / noPartitions; // initial offset guess
100 IdType lastCpu = 1;
101 for(IdType cpu = 1; cpu < noPartitions; cpu++) {
102 MBool found = false;
103 while(!found) {
104 if(prefixSum(id) - prefixSum(offsetTmp(cpu - 1)) > probeWorkload)
105 id--;
106 else if(!(prefixSum(id + 1) - prefixSum(offsetTmp(cpu - 1)) > probeWorkload))
107 id++;
108 else
109 found = true;
110 if(id >= noWeights) {
111 cpu = noPartitions;
112 id = noWeights;
113 found = true;
114 }
115 }
116 if(cpu < noPartitions) {
117 while((noWeights - id) < (noPartitions - cpu))
118 id--;
119 id = mMax(offsetTmp[cpu - 1] + 1, id);
120 offsetTmp(cpu) = id;
121 lastCpu = cpu;
122 id = mMin(noWeights - 1, 2 * offsetTmp(cpu) - offsetTmp(cpu - 1)); // initial offset guess
123 }
124 }
125 if(prefixSum(noWeights) - prefixSum(offsetTmp(lastCpu)) > probeWorkload) {
126 lowerVal = probeWorkload;
127 } else {
128 std::copy(offsetTmp.begin(), offsetTmp.begin() + noPartitions + 1, offsets);
129 workloadUsed = probeWorkload;
130 noPartitionsOpt = lastCpu + 1;
131 upperVal = probeWorkload;
132 }
133 if(itCnt > 0 && offsets[0] == -1) {
134 upperVal += F2 * maxWorkload; // enlarge the interval if no valid distribution was found previously
135 }
136 probeWorkload = F1B2 * (lowerVal + upperVal);
137 itCnt++;
138 }
139 probeWorkload = workloadUsed;
140
141 //--
142 for(IdType i = noPartitionsOpt; i <= noPartitions; i++) {
143 offsets[i] = noWeights;
144 }
145
146 // fine tuning, avoid domains with tiny workload, however, this is not affecting the overall imbalance
147 for(IdType i = 1; i < noPartitionsOpt; i++) {
148 while((prefixSum[offsets[i + 1]] - prefixSum[offsets[i]] + weights[offsets[i] - 1])
149 < (prefixSum[offsets[i]] - prefixSum[offsets[i - 1]] - weights[offsets[i] - 1])
150 && offsets[i] > offsets[i - 1] + 1) {
151 offsets[i]--;
152 }
153 }
154 // This may occur for strong local refinement and oddly leaves one or more last ranks empty, i.e., they are not even
155 // required for the optimal partitioning. Note, however, that the optimal partitioning is usually far away from the
156 // ideal partitioning (totalWorkload/noRanks) for these cases, in the first place! In this case, try to redistribute
157 // the workload with a certain penalty, so they are not idling.
158 if(noPartitionsOpt < noPartitions) {
159 // try to split existing partitions approximately in half
160 WeightType probeSplit = 0.5;
161 IdType diff = noPartitions - noPartitionsOpt;
162 while(diff > 0) {
163 probeSplit -= 0.05;
164 const IdType diff0 = diff;
165 for(IdType k = 0; k < diff0; k++) {
166 for(IdType i = 0; i < noPartitionsOpt; i++) {
167 if(prefixSum[offsets[i + 1]] - prefixSum[offsets[i]] > F2 * probeSplit) {
168 IdType id = (offsets[i + 1] + offsets[i]) / 2;
169 MBool found = false;
170 while(!found && id > offsets[i] && id < offsets[i + 1]) {
171 if(prefixSum(id) - prefixSum(offsets[i]) < probeSplit * probeWorkload)
172 id++;
173 else if(!(prefixSum(id - 1) - prefixSum(offsets[i]) < probeSplit * probeWorkload))
174 id--;
175 else
176 found = true;
177 }
178 // std::cout << offsets[i] << ":" << prefixSum(offsets[i]) << " " << id << ":"
179 // << prefixSum(id) << " " << offsets[i + 1] << ":" << prefixSum(offsets[i + 1])
180 // << " " << probeSplit << " " << probeWorkload << std::endl;
181 if(id > offsets[i] && id < offsets[i + 1]
182 && prefixSum(id) - prefixSum(offsets[i]) > probeSplit * probeWorkload
183 && prefixSum(offsets[i + 1]) - prefixSum(id) > probeSplit * probeWorkload) {
184 // std::cout << "DEBUG1" << std::endl;
185 for(IdType j = noPartitions; j > i + 1; j--) {
186 offsets[j] = offsets[j - 1]; // shift right
187 }
188 offsets[i + 1] = id; // set intermediate offset
189 i = noPartitionsOpt;
190 diff--;
191 }
192 }
193 }
194 }
195 if(probeSplit <= 0) {
196 std::cout << "DIFF " << diff << std::endl;
197 mTerm(1, AT_, "partition algorithm failed due to redistribution error");
198 }
199 }
200 }
201
202 offsets[noPartitions] = noWeights;
203
204 // Return the maximum local workload such that the quality of the final partitioning can be determined later on.
205 maxPartitionWorkload = F0;
206 for(IdType i = 0; i < noPartitions; i++) {
207 maxPartitionWorkload = mMax(maxPartitionWorkload, (prefixSum[offsets[i + 1]] - prefixSum[offsets[i]]));
208 }
209 return maxPartitionWorkload;
210}
void mTerm(const MInt errorCode, const MString &location, const MString &message)
Definition: functions.cpp:29
constexpr T mMin(const T &x, const T &y)
Definition: functions.h:90
constexpr T mMax(const T &x, const T &y)
Definition: functions.h:94
MInt id
Definition: maiatypes.h:71

◆ partitionCellToGlobalOffsets()

template<typename IdType >
void maia::grid::partitionCellToGlobalOffsets ( const IdType *const  partitionCellOffsets,
const IdType *const  partitionCellToGlobalIds,
const IdType  noDomains,
IdType *const  globalIdOffsets 
)
Author
Michael Schlottke (mic) mic@a.nosp@m.ia.r.nosp@m.wth-a.nosp@m.ache.nosp@m.n.de, Jerry Grimmen
Note
changed, since unnecessarily complicated, Lennart
Date
2014-12-02
Template Parameters
WeightTypeNumeric type for weight information.
IdTypeInteger type to store counters and size information.
Parameters
[in]partitionCellOffsetsList of partition cell offsets (access by domain id).
[in]partitionCellToGlobalIdsList of global ids for each partition cell (access by partition cell id).
[in]noDomainsNumber of domains to which the cells are distributed.
[out]globalIdOffsetsPointer to list that will hold the global id of the first cell that goes into the respective domain. Must hold at least noDomains elements.

This algorithm is only useful for partition cells and is generally not applicable to other problems.

Definition at line 40 of file partition.h.

43 {
44 // Set global offset to zero for first domain
45 globalIdOffsets[0] = 0;
46
47 // Calculate global offsets (domain 0 is skipped since it is already zero)
48 for(IdType domainId = 1; domainId < noDomains; domainId++) {
49 globalIdOffsets[domainId] = partitionCellToGlobalIds[partitionCellOffsets[domainId]];
50 }
51}

◆ partitionParallelSplit()

template<typename WeightType , typename IdType >
void maia::grid::partitionParallelSplit ( const WeightType *const  localWeights,
const IdType  noLocalWeights,
const IdType  localWeightOffset,
const IdType  noDomains,
const IdType  domainId,
const MPI_Comm  mpiComm,
IdType *const  offsets 
)

Definition at line 325 of file partition.h.

331 {
332 // Compute sum of local weights
333 WeightType localTotalWeight = WeightType();
334 for(IdType i = 0; i < noLocalWeights; i++) {
335 localTotalWeight += localWeights[i];
336 }
337
338 // Compute global prefix sum offsets
339 WeightType prefixSumOffset = WeightType();
340 MPI_Exscan(&localTotalWeight, &prefixSumOffset, 1, type_traits<WeightType>::mpiType(), MPI_SUM, mpiComm, AT_,
341 "localTotalWeight", "prefixSumOffset");
342
343 // Exchange total global weight from last process
344 WeightType globalWeight = prefixSumOffset + localTotalWeight;
345 MPI_Bcast(&globalWeight, 1, type_traits<WeightType>::mpiType(), noDomains - 1, mpiComm, AT_, "globalWeight");
346
347 // 'Optimal' weight on a single domain
348 const WeightType optimalWeight = globalWeight / static_cast<WeightType>(noDomains);
349
350 // Set all offsets to zero
351 std::fill_n(offsets, noDomains, IdType());
352
353 // Compute the current domain
354 IdType currentDomainId = std::floor(prefixSumOffset / optimalWeight);
355
356 ScratchSpace<IdType> foundDomain(noDomains, AT_, "foundDomain");
357 std::fill(foundDomain.begin(), foundDomain.end(), IdType());
358
359 // Last prefix sum value, start with global offset
360 WeightType lastPrefixSum = prefixSumOffset;
361 for(IdType i = 1; i < noLocalWeights + 1; i++) {
362 // Increase prefix sum by last weight (exclusive prefix sum)
363 const WeightType prefixSum = localWeights[i - 1] + lastPrefixSum;
364 // Determine optimal splitter, i.e. the next domain offset
365 const WeightType splitter = (currentDomainId + 1) * optimalWeight;
366 // Check if splitter position reached
367 if(!(prefixSum < splitter)) {
368 // Set offset of next domain
369 currentDomainId++;
370 offsets[currentDomainId] = localWeightOffset + i;
371
372 // Compare distances between splitter and prefix sum, and splitter and the last prefix sum
373 const WeightType distance = ABS(prefixSum - splitter);
374 const WeightType lastDistance = ABS(lastPrefixSum - splitter);
375 // If the previous distance is closer to the optimal splitter decrease the offset by one
376 // This limits the maximum domain weight deviation by w_max/w_opt, with w_max the maximum
377 // weight of a single element/object and w_opt the optimal weight for a domain.
378 // However, only do this if the previous domain is not empty afterwards, i.e. has the same
379 // offset.
380 if(lastDistance < distance && offsets[currentDomainId] > offsets[currentDomainId - 1] + 1) {
381 offsets[currentDomainId]--;
382 }
383 // Store the domain id on which the offset was found
384 foundDomain[currentDomainId] = domainId;
385 }
386 lastPrefixSum = prefixSum;
387
388 if(currentDomainId == noDomains - 1) {
389 break;
390 }
391 }
392
393 if(lastPrefixSum + 0.0001 * optimalWeight > (currentDomainId + 1) * optimalWeight) {
394 offsets[currentDomainId + 1] = localWeightOffset + noLocalWeights;
395 foundDomain[currentDomainId + 1] = domainId;
396 std::cerr << "DEBUG partition: set offset " << currentDomainId + 1 << " on domain " << domainId
397 << " with last local partition cell " << localWeightOffset + noLocalWeights << " " << lastPrefixSum << " "
398 << (currentDomainId + 1) * optimalWeight << std::endl;
399 }
400
401 // Take max of domains in case an offset is found twice which can happen in special situations
402 MPI_Allreduce(MPI_IN_PLACE, &foundDomain[0], noDomains, type_traits<IdType>::mpiType(), MPI_MAX, mpiComm, AT_,
403 "MPI_IN_PLACE", "foundDomain");
404 for(IdType i = 0; i < noDomains; i++) {
405 if(offsets[i] > 0 && foundDomain[i] != domainId) {
406 std::cerr << "DEBUG partition: reset offset " << i << " on domain " << domainId << ", also found on domain "
407 << foundDomain[i] << std::endl;
408 offsets[i] = 0;
409 }
410 }
411
412 // Combine offsets of all processes
413 MPI_Allreduce(MPI_IN_PLACE, &offsets[0], noDomains, type_traits<IdType>::mpiType(), MPI_MAX, mpiComm, AT_,
414 "MPI_IN_PLACE", "offsets");
415
416 // Check for invalid offsets!
417 for(IdType i = 1; i < noDomains; i++) {
418 if(offsets[i] <= offsets[i - 1]) {
419 m_log << "Partition: invalid offsets " << i - 1 << " " << offsets[i - 1] << " and " << i << " " << offsets[i]
420 << "; setting offset " << i << " to " << offsets[i - 1] + 1 << std::endl;
421 offsets[i] = offsets[i - 1] + 1;
422 }
423 }
424
425 // Storage for total domain weights
426 ScratchSpace<WeightType> domainWeights(noDomains, AT_, "domainWeights");
427 std::fill(domainWeights.begin(), domainWeights.end(), WeightType());
428
429 // Find local weight offset in determined domain offsets
430 IdType* domainIt = std::lower_bound(&offsets[0], &offsets[0] + noDomains, localWeightOffset);
431 // Target domain of first local weight on this domain
432 IdType targetDomain = (*domainIt == localWeightOffset) ? std::distance(&offsets[0], domainIt)
433 : std::distance(&offsets[0], domainIt) - 1;
434
435 // Add weights to corresponding domains
436 for(IdType i = 0; i < noLocalWeights; i++) {
437 domainWeights[targetDomain] += localWeights[i];
438
439 // Increase domain id if the next global weight index corresponds to the next domain offset
440 if(targetDomain < noDomains - 1 && localWeightOffset + i + 1 == offsets[targetDomain + 1]) {
441 targetDomain++;
442 }
443 }
444
445 // Combine locally determined domain weights
446 MPI_Allreduce(MPI_IN_PLACE, &domainWeights[0], noDomains, type_traits<WeightType>::mpiType(), MPI_SUM, mpiComm, AT_,
447 "MPI_IN_PLACE", "domainWeights");
448
449 WeightType maxWeight = WeightType();
450 WeightType minWeight = domainWeights[0];
451
452 // DEBUG output
453 for(IdType i = 0; i < noDomains; i++) {
454 /* m_log << "domain weight " << i << " " << domainWeights[i] << endl; */
455 maxWeight = std::max(maxWeight, domainWeights[i]);
456 minWeight = std::min(minWeight, domainWeights[i]);
457 }
458 m_log << "maximum domain weight " << maxWeight << " (max/opt = " << maxWeight / optimalWeight << ")" << std::endl;
459 m_log << "minimum domain weight " << minWeight << " (min/opt = " << minWeight / optimalWeight << ")" << std::endl;
460}
Real ABS(const Real x)
Definition: functions.h:85
InfoOutFile m_log
int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, const MString &name, const MString &sndvarname, const MString &rcvvarname)
same as MPI_Allreduce