MAIA bb96820c
Multiphysics at AIA
Loading...
Searching...
No Matches
cartesiangridio.h
Go to the documentation of this file.
1// Copyright (C) 2024 The m-AIA AUTHORS
2//
3// This file is part of m-AIA (https://git.rwth-aachen.de/aia/m-AIA/m-AIA)
4//
5// SPDX-License-Identifier: LGPL-3.0-only
6
7#ifndef CARTESIANGRIDIO_H
8#define CARTESIANGRIDIO_H
9
10// needed by pv-plugin
11#include "INCLUDE/maiamacro.h"
12// needed by pv-plugin
13#include "COMM/mpiexchange.h"
14#include "COMM/mpioverride.h"
15#include "GEOM/geometry.h"
17#include "IO/context.h"
18#include "MEMORY/alloc.h"
19#include "MEMORY/scratch.h"
20#include "UTIL/hilbert.h"
21#include "cartesiangridtree.h"
22#include "partition.h"
23
24namespace maia {
25namespace grid {
26
27template <typename Grid>
28class IO {
29 Grid& grid_;
30
31 using MInt_dyn_array = typename Grid::MInt_dyn_array;
32 using Tree = typename Grid::Tree;
33 using Cell = typename Tree::Cell;
34
35 // constexpr variables:
36 static constexpr MInt nDim = Grid::nDim_();
37 static constexpr MInt m_noDirs = 2 * nDim;
38 static constexpr MInt m_maxNoChilds = IPOW2(nDim);
39
40 // references:
49 std::vector<MInt>& m_nghbrDomains;
63 std::map<MLong, MInt>& m_globalToLocalId;
71 std::vector<MInt>& m_partitionLevelAncestorIds;
74 std::vector<std::vector<MInt>>& m_partitionLevelAncestorHaloCells;
75 std::vector<std::vector<MInt>>& m_partitionLevelAncestorWindowCells;
76
77 // methods:
78 MInt noDomains() const { return grid_.noDomains(); }
79 MInt noNeighborDomains() const { return grid_.noNeighborDomains(); }
80 MInt noAzimuthalNeighborDomains() const { return grid_.noAzimuthalNeighborDomains(); }
81 MPI_Comm mpiComm() const { return grid_.mpiComm(); }
82 MInt domainId() const { return grid_.domainId(); }
83 MInt solverId() const { return grid_.solverId(); }
84 void loadDonorGridPartition(const MLong* const partitionCellsId, const MInt noPartitionCells) {
85 grid_.loadDonorGridPartition(partitionCellsId, noPartitionCells);
86 }
87 MLong generateHilbertIndex(const MInt cellId, const MInt minLevel) {
88 return grid_.generateHilbertIndex(cellId, minLevel);
89 }
90 void resetCell(const MInt& cellId) { grid_.resetCell(cellId); }
91 MLong& a_globalId(const MInt cellId) { return grid_.a_globalId(cellId); }
92 MLong& a_parentId(const MInt cellId) { return grid_.a_parentId(cellId); }
93 MInt& a_level(const MInt cellId) { return grid_.a_level(cellId); }
94
95 MInt a_hasNeighbor(const MInt cellId, const MInt dir) const { return grid_.a_hasNeighbor(cellId, dir); }
96 MLong& a_neighborId(const MInt cellId, const MInt dir) { return grid_.a_neighborId(cellId, dir); }
97 MFloat& a_coordinate(const MInt cellId, const MInt dir) { return grid_.a_coordinate(cellId, dir); }
98 maia::grid::cell::BitsetType::reference a_hasProperty(const MInt cellId, const Cell p) {
99 return grid_.a_hasProperty(cellId, p);
100 }
101 MLong& a_childId(const MInt cellId, const MInt pos) { return grid_.a_childId(cellId, pos); }
102 MFloat cellLengthAtLevel(const MInt level) const { return grid_.cellLengthAtLevel(level); }
103 MFloat cellLengthAtCell(const MInt cellId) const { return grid_.cellLengthAtCell(cellId); }
104 MInt a_noChildren(const MInt cellId) { return grid_.a_noChildren(cellId); }
105 MInt& a_noOffsprings(const MInt cellId) { return grid_.a_noOffsprings(cellId); }
106 template <class U>
107 maia::grid::cell::BitsetType::reference a_hasProperty(Collector<U>* NotUsed(cells_), const MInt cellId,
108 const Cell p) {
109 return grid_.a_hasProperty(cellId, p);
110 }
111
112 template <class U>
113 MInt& a_noOffsprings(Collector<U>* NotUsed(cells_), const MInt cellId) {
114 return grid_.a_noOffsprings(cellId);
115 }
116 template <class U>
117 MFloat& a_workload(Collector<U>* NotUsed(cells_), const MInt cellId) {
118 return grid_.a_workload(cellId);
119 }
120 MFloat a_weight(const MInt cellId) { return grid_.a_weight(cellId); }
121 template <class U>
122 MInt a_noCells(Collector<U>* NotUsed(cells_)) {
123 return m_tree.size();
124 }
125 // solvers not having cells in the old collector need their own accessor
126
127 IO(Grid& g)
128 : grid_(g),
164
165 public:
166 static void load(Grid& g, MString const& fileName) { IO(g).loadGrid(fileName); }
167 template <class CELLTYPE>
168 static void save(Grid& g, const MChar* fileName, Collector<CELLTYPE>* cpu_cells,
169 const std::vector<std::vector<MInt>>& cpu_haloCells,
170 const std::vector<std::vector<MInt>>& cpu_windowCells,
171 const std::vector<std::vector<MInt>>& cpu_azimuthalHaloCells,
172 const std::vector<MInt>& cpu_azimuthalUnmappedHaloCells,
173 const std::vector<std::vector<MInt>>& cpu_azimuthalWindowCells, MInt* const recalcIdTree) {
174 IO(g).saveGrid(fileName, cpu_cells, cpu_haloCells, cpu_windowCells, cpu_azimuthalHaloCells,
175 cpu_azimuthalUnmappedHaloCells, cpu_azimuthalWindowCells, recalcIdTree);
176 }
177 template <class CELLTYPE>
178 static void calculateNoOffspringsAndWorkload(Grid& g, Collector<CELLTYPE>* cpu_cells, MInt cpu_noCells,
179 const std::vector<std::vector<MInt>>& cpu_haloCells,
180 const std::vector<std::vector<MInt>>& cpu_windowCells,
181 const std::vector<std::vector<MInt>>& partitionLevelAncestorWindowCells,
182 const std::vector<std::vector<MInt>>& partitionLevelAncestorHaloCells) {
183 IO(g).calculateNoOffspringsAndWorkload(cpu_cells, cpu_noCells, cpu_haloCells, cpu_windowCells,
184 partitionLevelAncestorWindowCells, partitionLevelAncestorHaloCells);
185 }
186 static void partitionParallel(Grid& g, const MInt tmpCount, const MLong tmpOffset,
187 const MFloat* const partitionCellsWorkload, const MLong* const partitionCellsGlobalId,
188 const MFloat totalWorkload, MLong* const partitionCellOffsets,
189 MLong* const globalIdOffsets, const MBool computeOnlyPartition = false) {
190 IO(g).domainPartitioningParallel(tmpCount, tmpOffset, partitionCellsWorkload, partitionCellsGlobalId, totalWorkload,
191 partitionCellOffsets, globalIdOffsets, computeOnlyPartition);
192 }
193
194 static void communicatePartitionLevelAncestorData(Grid& g, const MInt noLocalMinLevelCells,
195 const MInt* const localMinLevelCells,
196 const MInt* const localMinLevelId,
197 const MLong* const minLevelCellsTreeId,
198 const MUchar* const cellInfo) {
199 IO(g).communicatePartitionLevelAncestorData(noLocalMinLevelCells, localMinLevelCells, localMinLevelId,
200 minLevelCellsTreeId, cellInfo);
201 }
203
204 private:
227 void domainPartitioningParallel(const MInt tmpCount, const MLong tmpOffset,
228 const MFloat* const partitionCellsWorkload, const MLong* const partitionCellsGlobalId,
229 const MFloat totalWorkload, MLong* const partitionCellOffsets,
230 MLong* const globalIdOffsets, const MBool computeOnlyPartition = false) {
231 TRACE();
232
233 using namespace std;
234
235 const MFloat safetyFac = 1.5;
236 const MInt maxLocalCells =
237 (MInt)(((MFloat)Scratch::getAvailableMemory()) / (safetyFac * ((MFloat)(sizeof(MInt) + sizeof(MFloat)))));
238 const MInt maxNoPartitionCellsPerGroup =
239 mMin(maxLocalCells, (MInt)IPOW2(16)); // this is a rough estimate, due to unbalanced workloads the actual
240 // distribution can vary significantly
241 const MInt intermediateNoGroups_ = 1 + (m_noPartitionCellsGlobal - 1) / maxNoPartitionCellsPerGroup;
242 const MInt noGroups =
243 mMax(1, mMin(noDomains() / 24,
244 intermediateNoGroups_)); // make sure there are at least twentyfour ranks per group
245 const MInt noRanksPerGroup = noDomains() / noGroups;
246
247 // 1. The partition cell globalIds and workloads are read in parallel, equally distributed, by all domains
248 MLongScratchSpace groupOffsets(noGroups + 1, AT_, "groupOffsets");
249 const MFloat idealWorkload = totalWorkload / ((MFloat)noDomains());
250 MInt groupId = 0;
251 MInt groupLocalId = domainId();
252 groupOffsets(0) = 0;
253 groupOffsets(noGroups) = m_noPartitionCellsGlobal;
254 MPI_Comm mpiCommGroup = mpiComm();
255 MPI_Comm mpiCommMasters = MPI_COMM_SELF;
256 if(noGroups > 1) {
257 // 2. A coarse partitioning into noGroups groups is carried out based on a heuristic algorithm (fully parallel)
258 // and returns the groupOffsets
260 &partitionCellsWorkload[0], tmpCount, tmpOffset, m_noPartitionCellsGlobal, totalWorkload, noDomains(),
261 domainId(), noGroups, mpiComm(), mpiCommGroup, mpiCommMasters, groupId, groupLocalId, &groupOffsets[0]);
262 }
263 MLong groupPartitionOffset0 = groupOffsets[groupId];
264 MLong groupPartitionOffset1 = groupOffsets[groupId + 1];
265 MBool isGroupMaster = (groupLocalId == 0);
266 MInt noGroupRanks = 0;
267 MPI_Comm_size(mpiCommGroup, &noGroupRanks);
268
269 MInt groupPartitionCellsCnt = isGroupMaster ? groupPartitionOffset1 - groupPartitionOffset0 : 0;
270 ASSERT(groupPartitionCellsCnt >= 0, "");
271 MFloatScratchSpace partitionCellsWorkloadLocal(mMax(1, groupPartitionCellsCnt), AT_, "partitionCellsWorkloadLocal");
272 MLongScratchSpace partitionCellsIdLocal(mMax(1, groupPartitionCellsCnt), AT_, "partitionCellsIdLocal");
273 // 3. The local partition cell data is redistributed from the individual domains to the group masters, as indicated
274 // by the groupOffsets
275 collectDistributedGroupData(tmpCount, tmpOffset, &partitionCellsGlobalId[0], &partitionCellsWorkload[0], noGroups,
276 &groupOffsets[0], noRanksPerGroup, m_noPartitionCellsGlobal, groupPartitionCellsCnt,
277 &partitionCellsIdLocal[0], &partitionCellsWorkloadLocal[0], mpiComm(), domainId());
278
279 MLongScratchSpace partitionCellOffsetsLocal(noGroupRanks + 1, AT_, "partitionCellOffsetsLocal");
280 MLongScratchSpace globalIdOffsetsLocal(noGroupRanks + 1, AT_, "globalIdOffsetsLocal");
281 if(isGroupMaster) {
282 // 4. The master performs a serial algorithm to find the (locally) optimal partitioning
284 &partitionCellsWorkloadLocal[0], static_cast<MLong>(groupPartitionCellsCnt), static_cast<MLong>(noGroupRanks),
285 &partitionCellOffsetsLocal[0]);
286 MFloat maxDeviation = maxWorkloadGroup / idealWorkload;
287 MPI_Allreduce(MPI_IN_PLACE, &maxDeviation, 1, MPI_DOUBLE, MPI_MAX, mpiCommMasters, AT_, "MPI_IN_PLACE",
288 "maxDeviation");
289 if(noDomains() > 1 && groupId == 0) {
290 std::cerr << "global workload imbalance is " << (maxDeviation - 1.0) * 100.0 << "% ";
291 m_log << "global workload imbalance is " << (maxDeviation - 1.0) * 100.0 << "% (using " << noGroups
292 << " coarse partition groups, each with at least " << noRanksPerGroup << " ranks);"
293 << " ideal workload is " << idealWorkload << " per rank." << std::endl;
294 }
295
296 MIntScratchSpace recvCnt(noGroups, AT_, "recvCnt");
297 MIntScratchSpace recvDispl(noGroups, AT_, "recvDispl");
298 for(MLong i = 0; i < noGroupRanks; i++) {
299 globalIdOffsetsLocal[i] = partitionCellsIdLocal[partitionCellOffsetsLocal[i]];
300 partitionCellOffsetsLocal[i] += groupPartitionOffset0;
301 }
302 for(MLong i = 0; i < noGroups; i++) {
303 recvDispl[i] = i * noRanksPerGroup;
304 recvCnt[i] = (i == noGroups - 1) ? noDomains() - i * noRanksPerGroup : noRanksPerGroup;
305 }
306 // 5. Group masters assemble the offsets
307 MPI_Allgatherv(&globalIdOffsetsLocal[0], noGroupRanks, MPI_LONG, &globalIdOffsets[0], &recvCnt[0], &recvDispl[0],
308 MPI_LONG, mpiCommMasters, AT_, "globalIdOffsetsLocal[0]", "globalIdOffsets[0]");
309 MPI_Allgatherv(&partitionCellOffsetsLocal[0], noGroupRanks, MPI_LONG, &partitionCellOffsets[0], &recvCnt[0],
310 &recvDispl[0], MPI_LONG, mpiCommMasters, AT_, "partitionCellOffsetsLocal[0]",
311 "partitionCellOffsets[0]");
312 globalIdOffsets[noDomains()] = m_noCellsGlobal + m_32BitOffset;
313 partitionCellOffsets[noDomains()] = m_noPartitionCellsGlobal;
314 for(MInt i = 0; i < noGroupRanks; i++) {
315 partitionCellOffsetsLocal[i] -= groupPartitionOffset0;
316 }
317 }
318
319 // 6. Group masters send the final offsets to their slaves
320 globalIdOffsets[0] = m_32BitOffset;
321 partitionCellOffsets[0] = 0;
322 MPI_Bcast(&globalIdOffsets[0], noDomains() + 1, MPI_LONG, 0, mpiCommGroup, AT_, "globalIdOffsets[0]");
323 MPI_Bcast(&partitionCellOffsets[0], noDomains() + 1, MPI_LONG, 0, mpiCommGroup, AT_, "partitionCellOffsets[0]");
324
325 // Grid variables are changes hereafter, skip if enabled and just return a new partitioning in
326 // the passed data arrays to be used e.g. for load balancing without enforcing it directly.
327 if(computeOnlyPartition) {
328 return;
329 }
330
331 // 7. Group masters send the local partition cell global ids to their slaves
332 m_noPartitionCells = partitionCellOffsets[domainId() + 1] - partitionCellOffsets[domainId()];
333 if(m_noPartitionCells <= 0) {
334 mTerm(1, AT_, "Cannot allocate array with " + std::to_string(m_noPartitionCells) + " elements.");
335 }
336 mAlloc(m_localPartitionCellGlobalIds, m_noPartitionCells, "m_localPartitionCellGlobalIds", static_cast<MLong>(-1),
337 AT_);
338 mAlloc(m_localPartitionCellLocalIds, m_noPartitionCells, "m_localPartitionCellLocalIds", -1, AT_);
339
340 if(isGroupMaster) {
341 for(MInt i = 1; i < noGroupRanks; i++) {
342 MLong sendOffset = partitionCellOffsetsLocal[i];
343 MLong sendCount = partitionCellOffsetsLocal[i + 1] - partitionCellOffsetsLocal[i];
344 MPI_Send(&partitionCellsIdLocal[sendOffset], sendCount, MPI_LONG, i, 66, mpiCommGroup, AT_,
345 "partitionCellsIdLocal[sendOffset]");
346 }
347 MInt sendCount = partitionCellOffsetsLocal[1] - partitionCellOffsetsLocal[0];
348 std::copy(&partitionCellsIdLocal[0], &partitionCellsIdLocal[0] + sendCount, &m_localPartitionCellGlobalIds[0]);
349 } else {
350 MPI_Recv(&m_localPartitionCellGlobalIds[0], m_noPartitionCells, MPI_LONG, 0, 66, mpiCommGroup, MPI_STATUS_IGNORE,
351 AT_, "m_localPartitionCellGlobalIds[0]");
352 }
353
354 // save the offsets of the local partitionCells
355 m_noPartitionCells = partitionCellOffsets[domainId() + 1] - partitionCellOffsets[domainId()];
357 partitionCellOffsets[domainId()]; // begin of the local partitionCells in the gobal partitionCell array
358 m_localPartitionCellOffsets[1] = partitionCellOffsets[domainId() + 1]; // end index of the gobal partitionCell array
360 m_noPartitionCellsGlobal; // end of the local partitionCells in the gobal partitionCell array
361 MInt noCells = globalIdOffsets[domainId() + 1] - globalIdOffsets[domainId()];
362 m_noInternalCells = noCells;
363
364 std::copy(&globalIdOffsets[0], &globalIdOffsets[0] + noDomains() + 1, &m_domainOffsets[0]);
365 for(MInt i = 0; i <= noDomains(); ++i) {
368 mTerm(1, AT_, "Invalid domain offsets A.");
369 if(i < noDomains() && m_domainOffsets[i] >= m_domainOffsets[i + 1]) {
370 cout << "m_domainOffsets[i]: " << m_domainOffsets[i] << std::endl;
371 cout << "m_domainOffsets[i+1]: " << m_domainOffsets[i + 1] << std::endl;
372 mTerm(1, AT_, "Invalid domain offsets B.");
373 }
374 }
375 }
376
377
378 //-----------------------------------------------------------------------
379
380
385 void loadGrid(const MString& fileName) {
386 TRACE();
387
388 using namespace std;
389
390 auto logDuration = [this](const MFloat timeStart, const MString comment) {
391 logDuration_(timeStart, "GRID", comment, mpiComm(), domainId(), noDomains());
392 };
393 const MFloat gridTimeStart = wallTime();
394
395 // Log grid information
396 auto logGridInfo = [](const MInt minLevel, const MFloat lengthLevel0, const MFloat* const boundingBox,
397 const MFloat* const centerOfGravity, const MString message) {
398 stringstream msg;
399 msg << endl
400 << " * " << message << ":" << endl
401 << " - minLevel = " << minLevel << endl
402 << " - boundingBox = ";
403 for(MInt i = 0; i < nDim; i++) {
404 msg << setprecision(15) << "[" << boundingBox[i] << ", " << boundingBox[nDim + i] << "]";
405 if(i < nDim - 1) {
406 msg << " x ";
407 }
408 }
409 msg << endl << " - centerOfGravity = [";
410 for(MInt i = 0; i < nDim; i++) {
411 msg << setprecision(15) << centerOfGravity[i];
412 if(i < nDim - 1) {
413 msg << ", ";
414 }
415 }
416 msg << "]" << endl << " - lengthLevel0 = " << setprecision(15) << lengthLevel0;
417 m_log << msg.str() << std::endl;
418 };
419
420
421 // 0. Open the file.
422 const MFloat openGridTimeStart = wallTime();
423 if(domainId() == 0) std::cerr << "Loading grid file " << fileName << std::endl;
424 m_log << "Loading grid file..." << std::endl;
425 using namespace maia::parallel_io;
426 ParallelIo grid(fileName, PIO_READ, mpiComm());
427 logDuration(openGridTimeStart, "Open grid file");
428
429
430 // 1. Read global attributes
431 const MFloat attTimeStart = wallTime();
432 MFloat totalWorkload = F0;
433 MInt gridDim = -1;
434 grid.getAttribute(&gridDim, "nDim");
435 grid.getAttribute(&m_noCellsGlobal, "noCells");
436 grid.getAttribute(&m_noPartitionCellsGlobal, "noPartitionCells");
437 grid.getAttribute(&m_noMinLevelCellsGlobal, "noMinLevelCells");
438 grid.getAttribute(&m_minLevel, "minLevel");
439 grid.getAttribute(&m_maxLevel, "maxLevel");
440 if(grid.hasAttribute("maxUniformRefinementLevel"))
441 grid.getAttribute(&m_maxUniformRefinementLevel, "maxUniformRefinementLevel", 1);
442 grid.getAttribute(&totalWorkload, "totalWorkload");
443 grid.getAttribute(&m_noPartitionLevelAncestorsGlobal, "noPartitionLevelAncestors");
444 grid.getAttribute(&m_maxPartitionLevelShift, "maxPartitionLevelShift");
445 grid.getAttribute(&m_lengthLevel0, "lengthLevel0");
446 grid.getAttribute(&m_centerOfGravity[0], "centerOfGravity", nDim);
447 // grid.getAttribute(&m_boundingBox[0], "boundingBox", 2*nDim);
448 // grid.getAttribute(&m_reductionFactor, "reductionFactor", 1);
449 // grid.getAttribute(&m_decisiveDirection, "decisiveDirection", 1);
450 if(grid.hasAttribute("boundingBox")) grid.getAttribute(&m_boundingBox[0], "boundingBox", 2 * nDim);
451 if(grid.hasAttribute("reductionFactor")) grid.getAttribute(&m_reductionFactor, "reductionFactor", 1);
452 if(grid.hasAttribute("decisiveDirection")) grid.getAttribute(&m_decisiveDirection, "decisiveDirection", 1);
453 if(nDim != gridDim) mTerm(1, AT_, "Number of dimensions mismatch with grid file.");
455 if(grid_.paraViewPlugin()) {
456 // No property file read for plugin, thus maxRfnmntLvl is not set
458 } else {
459 TERMM(1, "Error: specified default property maxRfnmntLvl < maxLevel! (" + std::to_string(m_maxRfnmntLvl) + " < "
460 + std::to_string(m_maxLevel) + ")");
461 }
462 }
463 if(m_maxUniformRefinementLevel < m_minLevel || m_maxUniformRefinementLevel > m_maxLevel) {
464 mTerm(1, AT_, "Warning: maxUniformRefinementLevel invalid.");
465 }
466
467 logGridInfo(m_minLevel, m_lengthLevel0, m_boundingBox, m_centerOfGravity, "grid information read from grid file");
468
469 grid_.m_hasMultiSolverBoundingBox = grid.hasAttribute("multiSolverBoundingBox");
470 // Read multisolver grid information if given in the grid file
471 if(grid_.m_hasMultiSolverBoundingBox) {
472 std::vector<MFloat> multiSolverBoundingBox(2 * nDim);
473 grid.getAttribute(&multiSolverBoundingBox[0], "multiSolverBoundingBox", 2 * nDim);
474 std::copy_n(&multiSolverBoundingBox[0], 2 * nDim, &(grid_.m_targetGridBoundingBox[0]));
475
476 std::vector<MFloat> multiSolverCoG(nDim);
477 grid.getAttribute(&multiSolverCoG[0], "multiSolverCenterOfGravity", nDim);
478
479 MInt multiSolverMinLevel = -1;
480 grid.getAttribute(&multiSolverMinLevel, "multiSolverMinLevel");
481 TERMM_IF_COND(multiSolverMinLevel < 0,
482 "ERROR: invalid multiSolverMinLevel " + std::to_string(multiSolverMinLevel));
483
484 MFloat multiSolverLength0 = -1.0;
485 grid.getAttribute(&multiSolverLength0, "multiSolverLengthLevel0");
486
487 // Determine center of gravity and length level-0 from the given multisolver bounding box
488 MFloat lengthLevel0 = 0.0;
489 for(MInt i = 0; i < nDim; i++) {
490 grid_.m_targetGridCenterOfGravity[i] = 0.5 * (multiSolverBoundingBox[nDim + i] + multiSolverBoundingBox[i]);
491 TERMM_IF_NOT_COND(approx(grid_.m_targetGridCenterOfGravity[i], multiSolverCoG[i], MFloatEps),
492 "center of gravity mismatch");
493
494 // Note: same as during grid generation
495 const MFloat dist =
496 (F1 + F1 / FPOW2(30)) * std::fabs(multiSolverBoundingBox[nDim + i] - multiSolverBoundingBox[i]);
497 // const MFloat dist = std::fabs(multiSolverBoundingBox[nDim + i] -
498 // multiSolverBoundingBox[i]);
499 lengthLevel0 = std::max(lengthLevel0, dist);
500 }
501 // TODO labels:GRID,IO slice grid multisolver bounding box can be smaller than lengthLevel0!
502 TERMM_IF_NOT_COND(approx(lengthLevel0, multiSolverLength0, MFloatEps),
503 "length level 0 mismatch: " + to_string(lengthLevel0) + " != " + to_string(multiSolverLength0));
504
505 grid_.m_targetGridLengthLevel0 = lengthLevel0;
506 grid_.m_targetGridMinLevel = multiSolverMinLevel;
507
508 // Perform sanity checks to ensure min-level cells match and the same Hilbert order is
509 // obtained
511 &grid_.m_targetGridCenterOfGravity[0], grid_.m_targetGridLengthLevel0,
512 grid_.m_targetGridMinLevel);
513
514 logGridInfo(grid_.m_targetGridMinLevel, grid_.m_targetGridLengthLevel0, &multiSolverBoundingBox[0],
515 &(grid_.m_targetGridCenterOfGravity[0]), "multisolver grid information from grid file");
516 } else {
517 // Note: for converted old grid files with bad bounding box/center of gravity information, use
518 // the center of gravity read from the grid file for treeIdToCoordinates, it is overwritten
519 // later with the values computed from the geometry bounding box
520 copy(m_centerOfGravity, m_centerOfGravity + nDim, grid_.m_targetGridCenterOfGravity);
521
522 if(grid.hasAttribute("boundingBox")) {
523 copy_n(&m_boundingBox[0], 2 * nDim, &(grid_.m_targetGridBoundingBox[0]));
524 }
525
526 grid_.m_targetGridLengthLevel0 = m_lengthLevel0;
527 grid_.m_targetGridMinLevel = m_minLevel;
528 }
529
530 // Ensure that number of solver in grid file matches number of solvers in grid class
531 MInt noSolvers = -1;
532 grid.getAttribute(&noSolvers, "noSolvers");
533
534 if(noSolvers < m_tree.noSolvers()) {
535 ASSERT(grid_.m_addSolverToGrid, "");
536 }
537
538 MLong _32BitOffsetBuffer = m_32BitOffset;
539 if(grid.hasAttribute("bitOffset")) {
540 grid.getAttribute(&m_32BitOffset, "bitOffset");
541 } else {
542 m_32BitOffset = 0;
543 }
544 // If no restartGrid is loaded, all globalIds in the gridFile start with 0. The offset is therefore introduced in
545 // the solver as soon as the globalIds assigned for the first time. If a restartGird is loaded, all globalIds start
546 // with the 32BitOffset and partitioning takes place with offset.
547
548 logDuration(attTimeStart, "Read attributes etc");
549
550
551 // 2. partition the grid
552 const MFloat partitionTimeStart = wallTime();
553 if(domainId() == 0) std::cerr << " * partition grid on " << noDomains() << " domains... ";
554 m_log << " * partition grid on " << noDomains() << " domains; ";
555 mAlloc(m_domainOffsets, noDomains() + 1, "m_domainOffsets", static_cast<MLong>(0), AT_);
556 if(noDomains() == 1) { // Serial run
558 mAlloc(m_localPartitionCellGlobalIds, m_noPartitionCells, "m_localPartitionCellGlobalIds", static_cast<MLong>(-1),
559 AT_);
560 mAlloc(m_localPartitionCellLocalIds, m_noPartitionCells, "m_localPartitionCellLocalIds", -1, AT_);
561 grid.setOffset(m_noPartitionCellsGlobal, 0);
562 grid.readArray(m_localPartitionCellGlobalIds, "partitionCellsGlobalId");
564 m_domainOffsets[0] = 0;
568
572 } else if(m_loadGridPartition) { // Partition partition cells according to target grid
573 MLong noDataToRead = (domainId() == 0) ? m_noPartitionCellsGlobal : 0;
574 MLongScratchSpace partitionCellsGlobalId(mMax(1L, noDataToRead), AT_, "partitionCellsGlobalId");
575 grid.setOffset(noDataToRead, 0);
576 grid.readArray(partitionCellsGlobalId.data(), "partitionCellsGlobalId");
577 loadDonorGridPartition(&partitionCellsGlobalId[0], m_noPartitionCellsGlobal);
578 mAlloc(m_localPartitionCellGlobalIds, m_noPartitionCells, "m_localPartitionCellGlobalIds", static_cast<MLong>(-1),
579 AT_);
580 mAlloc(m_localPartitionCellLocalIds, m_noPartitionCells, "m_localPartitionCellLocalIds", -1, AT_);
582 grid.readArray(m_localPartitionCellGlobalIds, "partitionCellsGlobalId");
583 } else if(grid_.m_loadPartition) {
584 // Load partition from file
585 MString gridPartitionFileName = "";
586 gridPartitionFileName = Context::getBasicProperty<MString>("partitionFileName", AT_);
587
588 MLongScratchSpace partitionCellOffsets(noDomains() + 1, FUN_, "partitionCellOffsets");
589 grid_.loadPartitionFile(gridPartitionFileName, &partitionCellOffsets[0]);
590 partitionCellOffsets[noDomains()] = m_noPartitionCellsGlobal;
591
592 m_noPartitionCells = partitionCellOffsets[domainId() + 1] - partitionCellOffsets[domainId()];
593 ASSERT(m_noPartitionCells > 0, "Number of local partition cells needs to be > 0");
594
595 m_localPartitionCellOffsets[0] = partitionCellOffsets[domainId()];
596 m_localPartitionCellOffsets[1] = partitionCellOffsets[domainId() + 1];
598
599 mAlloc(m_localPartitionCellGlobalIds, m_noPartitionCells, "m_localPartitionCellGlobalIds", static_cast<MLong>(-1),
600 FUN_);
601 mAlloc(m_localPartitionCellLocalIds, m_noPartitionCells, "m_localPartitionCellLocalIds", -1, FUN_);
602
604 grid.readArray(m_localPartitionCellGlobalIds, "partitionCellsGlobalId");
605
606 const MLong globalIdOffsetsLocal = (domainId() > 0) ? m_localPartitionCellGlobalIds[0] : 0;
607 MPI_Allgather(&globalIdOffsetsLocal, 1, MPI_LONG, m_domainOffsets, 1, MPI_LONG, mpiComm(), AT_,
608 "globalIdOffsetsLocal", "m_domainOffsets");
610
612 } else if(grid_.m_partitionParallelSplit) {
613 // Evenly distribute all partition-cells among domains for partitioning the grid
614 MLong noLocalPartitionCells = std::floor(m_noPartitionCellsGlobal / noDomains());
615 MLong offsetPartitionCells = domainId() * noLocalPartitionCells;
616 const MLong missingPartitionCells = m_noPartitionCellsGlobal - noLocalPartitionCells * noDomains();
617 if(domainId() < missingPartitionCells) {
618 noLocalPartitionCells++;
619 offsetPartitionCells += domainId();
620 } else {
621 offsetPartitionCells += missingPartitionCells;
622 }
623
624 // Storage for local partition-cell workloads
625 MFloatScratchSpace partitionCellsWorkLoad(noLocalPartitionCells, AT_, "partitionCellsWorkload");
626
627 // Read local part of min-cell workloads from grid
628 grid.setOffset(noLocalPartitionCells, offsetPartitionCells);
629 grid.readArray(&partitionCellsWorkLoad[0], "partitionCellsWorkload");
630
631 MLongScratchSpace partitionCellOffsets(noDomains() + 1, FUN_, "partitionCellOffsets");
632 // Partition min-cells according to workload -> partition cell offsets
633 maia::grid::partitionParallelSplit(&partitionCellsWorkLoad[0], noLocalPartitionCells, offsetPartitionCells,
634 static_cast<MLong>(noDomains()), static_cast<MLong>(domainId()), mpiComm(),
635 &partitionCellOffsets[0]);
636 partitionCellOffsets[noDomains()] = m_noPartitionCellsGlobal;
637
638 m_noPartitionCells = partitionCellOffsets[domainId() + 1] - partitionCellOffsets[domainId()];
639
640 m_localPartitionCellOffsets[0] = partitionCellOffsets[domainId()];
641 m_localPartitionCellOffsets[1] = partitionCellOffsets[domainId() + 1];
643
644 mAlloc(m_localPartitionCellGlobalIds, m_noPartitionCells, "m_localPartitionCellGlobalIds", static_cast<MLong>(-1),
645 FUN_);
646 mAlloc(m_localPartitionCellLocalIds, m_noPartitionCells, "m_localPartitionCellLocalIds", -1, FUN_);
647
649 grid.readArray(m_localPartitionCellGlobalIds, "partitionCellsGlobalId");
650
651 const MLong globalIdOffsetsLocal = (domainId() > 0) ? m_localPartitionCellGlobalIds[0] : 0;
652 MPI_Allgather(&globalIdOffsetsLocal, 1, MPI_LONG, m_domainOffsets, 1, MPI_LONG, mpiComm(), AT_,
653 "globalIdOffsetsLocal", "m_domainOffsets");
655
657 } else { // Parallel partitioning
658 const MInt tmpCount =
659 (domainId() == noDomains() - 1)
662 const MLong tmpOffset = domainId() * (m_noPartitionCellsGlobal / noDomains());
663 MFloatScratchSpace partitionCellsWorkload(tmpCount, AT_, "partitionCellsWorkload");
664 MLongScratchSpace partitionCellsGlobalId(tmpCount, AT_, "partitionCellsGlobalId");
665 MLongScratchSpace partitionCellOffsets(noDomains() + 1, AT_, "partitionCellOffsets");
666 MLongScratchSpace globalIdOffsets(noDomains() + 1, AT_, "globalIdOffsets");
667 grid.setOffset(tmpCount, tmpOffset);
668 grid.readArray(partitionCellsWorkload.data(), "partitionCellsWorkload");
669 grid.readArray(partitionCellsGlobalId.data(), "partitionCellsGlobalId");
670 domainPartitioningParallel(tmpCount, tmpOffset, &partitionCellsWorkload[0], &partitionCellsGlobalId[0],
671 totalWorkload, &partitionCellOffsets[0], &globalIdOffsets[0]);
672 }
674 if(domainId() == 0) std::cerr << std::endl;
675 logDuration(partitionTimeStart, "Partition");
676
677 if(domainId() == 0) std::cerr << " * create data structure for " << m_noCellsGlobal << " cells" << std::endl;
678 m_log << " * create data structure for " << m_noCellsGlobal << " cells" << std::endl;
679
680 // 3. Read cellInfo (8bit unsigned integer, bits 0-3: noChildIds, bits 4-6: position in childIds of parent cell, bit
681 // 7: cell is on minLevel)
682 const MFloat readCellInfoTimeStart = wallTime();
683 std::vector<MUchar> cellInfo(m_noInternalCells);
685 grid.readArray(cellInfo.data(), "cellInfo");
686 logDuration(readCellInfoTimeStart, "Read cell info");
687
688
689 // 4. Check cellInfo for a partition level shift and in this case correct the domain offsets
690 const MFloat correctPlsTimeStart = wallTime();
691 const MInt noPartitionLevelAncestorsGlobal = correctDomainOffsetsAtPartitionLevelShifts(cellInfo);
692 TERMM_IF_COND(noPartitionLevelAncestorsGlobal != m_noPartitionLevelAncestorsGlobal,
693 "Wrong number of partition level ancestors: " + std::to_string(noPartitionLevelAncestorsGlobal)
694 + " != " + std::to_string(m_noPartitionLevelAncestorsGlobal));
695 logDuration(correctPlsTimeStart, "Correct offsets PLS");
696
697
698 // 5. Reset data structure
699 m_tree.clear();
701 for(MInt i = 0; i < m_noInternalCells; ++i) {
702 resetCell(i);
703 }
704
705 // Read solver use info from grid
706 const MFloat readSolverInfoTimeStart = wallTime();
707 {
708 MUcharScratchSpace solver(m_noInternalCells, AT_, "solver");
709 if(noSolvers == 1 && !g_multiSolverGrid) {
710 // If number of solvers is one, all cells belong to it
711 std::fill(solver.begin(), solver.end(), 1);
712 } else {
713 // Otherwise read info from grid
714 // Note: the number of internal cells might have changed due to
715 // correctDomainOffsetsAtPartitionLevelShifts() and thus we have to set the offset again for
716 // reading the solver bits!
718 grid.readArray(&solver[0], "solver");
719 }
720
721 for(MInt cellId = 0; cellId < m_noInternalCells; cellId++) {
722 m_tree.solverFromBits(cellId, solver[cellId]);
723 }
724
725 // set new solver flag for all cells belonging to the reference solver!
726 if(grid_.m_addSolverToGrid) {
727 const MInt newSolverId = noSolvers;
728 noSolvers++;
729 // loop over all cells and set flag to cells which also have the referenceSolverId
730 for(MInt cellId = 0; cellId < m_noInternalCells; cellId++) {
731 if(m_tree.solver(cellId, grid_.m_referenceSolver)) {
732 m_tree.solver(cellId, newSolverId) = true;
733 }
734 }
735 }
736 }
737 logDuration(readSolverInfoTimeStart, "Read solver info");
738
739 if(domainId() == 0) {
741 std::cerr << " * setup grid connectivity on level " << m_minLevel << std::endl;
742 else
743 std::cerr << " * setup grid connectivity from level " << m_minLevel << " to " << m_maxLevel << std::endl;
744 }
746 m_log << " * setup grid connectivity on level " << m_minLevel << std::endl;
747 else
748 m_log << " * setup grid connectivity from level " << m_minLevel << " to " << m_maxLevel << std::endl;
749
750
751 // If no restartGrid is loaded, introduce the offset for the first time. Otherwise no further shift is neccessary.
752 if(m_32BitOffset > 0)
753 m_32BitOffset = 0;
754 else
755 m_32BitOffset = _32BitOffsetBuffer;
756
757 // 6. Set globalId and mapping
758 for(MInt i = 0; i < noDomains() + 1; ++i) {
760 }
761 m_globalToLocalId.clear();
762 for(MInt i = 0; i < m_noInternalCells; ++i) {
765 }
766
767
768 // 7. Determine min level cells on this domain (identified by the last bit of cellInfo)
769 MInt noMinLevelCells = 0;
770 std::vector<MInt> localMinLevelCells;
771 ScratchSpace<MInt> localMinLevelId(m_noInternalCells, AT_, "localMinLevelId");
772 localMinLevelId.fill(-1);
773 for(MInt i = 0; i < m_noInternalCells; ++i) {
774 MUint tmpBit = static_cast<MUint>(cellInfo[i]);
775 MUint isMinLevel = (tmpBit >> 7) & 1;
776 if(isMinLevel) {
777 localMinLevelId(i) = noMinLevelCells;
778 a_level(i) = m_minLevel;
779 a_parentId(i) = -1;
780 localMinLevelCells.push_back(i);
781 noMinLevelCells++;
782 }
783 }
784
785
786 // 8. Read treeId and nghbrIds of local min level cells
787 const MFloat readMinCellInfoTimeStart = wallTime();
788 MLongScratchSpace minLevelCellsTreeId(mMax(1, noMinLevelCells), AT_, "minLevelCellsTreeId");
789 {
790 MLongScratchSpace minLevelCellsNghbrIds(noMinLevelCells, m_noDirs, AT_, "minLevelCellsNghbrIds");
791 MInt minLevelCellOffset = 0;
792 MPI_Exscan(&noMinLevelCells, &minLevelCellOffset, 1, MPI_INT, MPI_SUM, mpiComm(), AT_, "noMinLevelCells",
793 "minLevelCellOffset");
794
795 grid.setOffset(noMinLevelCells, minLevelCellOffset);
796 grid.readArray(minLevelCellsTreeId.data(), "minLevelCellsTreeId");
797 grid.setOffset(m_noDirs * noMinLevelCells, m_noDirs * minLevelCellOffset);
798 grid.readArray(minLevelCellsNghbrIds.data(), "minLevelCellsNghbrIds");
799
800 for(MInt i = 0; i < noMinLevelCells; i++) {
801 MInt cellId = localMinLevelCells[i];
802 for(MInt j = 0; j < m_noDirs; j++) {
803 a_neighborId(cellId, j) = (minLevelCellsNghbrIds(i, j) > -1) ? minLevelCellsNghbrIds(i, j) + m_32BitOffset
804 : minLevelCellsNghbrIds(i, j);
805 }
806 }
807 }
808 logDuration(readMinCellInfoTimeStart, "Read min cell info");
809
810
811 // 9. Compute coordinates of local min level cells
812 for(MInt i = 0; i < noMinLevelCells; i++) {
813 maia::grid::hilbert::treeIdToCoordinates<nDim>(&a_coordinate(localMinLevelCells[i], 0), minLevelCellsTreeId[i],
814 static_cast<MLong>(grid_.m_targetGridMinLevel),
815 &(grid_.m_targetGridCenterOfGravity[0]),
816 grid_.m_targetGridLengthLevel0);
817 }
818 for(MInt i = 0; i < m_noPartitionCells; i++) {
820 }
821
822 // Reintroduced the offset since it is used in communicatePartitionLevelAncestorData.
823 if(m_restart) m_32BitOffset = _32BitOffsetBuffer;
824
825
826 // 10. Exchange partition level ancestors, if any, determine their connectivity, and append them at the back of the
827 // cell collector
828 const MFloat exchangePlaTimeStart = wallTime();
829 {
830 // exchange and assemble data of all partition level ancestors
831 // afterwards, the tree between the minLevel and the partition level is set (level, childId, noChildren, parentId,
832 // coordinate, and noOffsprings)
833 communicatePartitionLevelAncestorData(noMinLevelCells, localMinLevelCells.data(), &localMinLevelId[0],
834 &minLevelCellsTreeId[0], &cellInfo[0]);
835
836 for(MInt i = m_noInternalCells; i < m_tree.size(); ++i) {
837 ASSERT(a_hasProperty(i, Cell::IsPartLvlAncestor), "Error: Cell not marked as partition level ancestor.");
838 a_hasProperty(i, Cell::IsHalo) = true;
839 }
840 }
841 logDuration(exchangePlaTimeStart, "Exchange partition level ancestors");
842
843 // Set the offset to 0 in case a restartGrid is used. traverseAndSetupSubtree has already the offset of the
844 // restartGrid.
845 if(m_restart) m_32BitOffset = 0;
846
847 // 11. Recursively setup the local subtrees (level, childIds, noChildren, parentId, coordinates, and noOffspring)
848 // starting from each partition cell
849 const MFloat setupSubtreesTimeStart = wallTime();
850 for(MInt i = 0; i < m_noPartitionCells; i++) {
852 if(a_level(cellId) < m_minLevel || a_level(cellId) > m_minLevel + m_maxPartitionLevelShift) {
853 TERMM(1, "Wrong level for partition cell: level " + std::to_string(a_level(cellId)) + ", partitionCellGlobalId "
854 + std::to_string(m_localPartitionCellGlobalIds[i]) + " i" + std::to_string(i));
855 }
856 traverseAndSetupSubtree(cellId, cellInfo.data());
857 a_hasProperty(cellId, Cell::IsPartitionCell) = true;
858 }
859 logDuration(setupSubtreesTimeStart, "Setup local grid subtrees");
860
861 // Reintroduced the offset since it is used in propagateNeighborsFromMinLevelToMaxLevel()
862 if(m_restart) m_32BitOffset = _32BitOffsetBuffer;
863
864 // 12. Set neighborIds on each level > minLevel
865 const MFloat neighborsTimeStart = wallTime();
867 logDuration(neighborsTimeStart, "Propagate neighbors");
868
869 // Finally, set the offset to the original value.
870 m_32BitOffset = _32BitOffsetBuffer;
871
872 // 13. Convert global to local ids
873 if(noDomains() > 1) {
874 grid_.createGlobalToLocalIdMapping();
875 grid_.changeGlobalToLocalIds();
876 }
877
878 for(MInt i = 0; i < m_noPartitionCells; i++) {
880 }
881
882
883 // Fix for old converted grids with bad bounding box/center of gravity information
884 if(!grid_.m_hasMultiSolverBoundingBox && !g_multiSolverGrid) {
885 MBool boundingBoxDiff = false;
886 for(MInt i = 0; i < 2 * nDim; i++) {
887 if(!approx(grid_.m_boundingBoxBackup[i], m_boundingBox[i], MFloatEps)) {
888 boundingBoxDiff = true;
889 }
890 }
891
892 // this part may be removed if transition to new grid format is complete and no grids
893 // written with the grid-converter are used anymore. Remove the following line once
894 // labels:GRID transition to new grid format is complete. This hack is necessary as the grid files
895 // obtained from the converter have a bad bounding box information
896 copy_n(&grid_.m_boundingBoxBackup[0], 2 * nDim, &m_boundingBox[0]);
897
898 // Recompute center of gravity
899 MBool centerOfGravityDiff = false;
900 for(MInt dir = 0; dir < nDim; dir++) {
901 const MFloat tmpCenter = m_boundingBox[dir] + 0.5 * (m_boundingBox[dir + nDim] - m_boundingBox[dir]);
902
903 if(!approx(tmpCenter, m_centerOfGravity[dir], MFloatEps)) {
904 centerOfGravityDiff = true;
905 m_centerOfGravity[dir] = tmpCenter;
906 }
907 }
908 copy(m_centerOfGravity, m_centerOfGravity + nDim, grid_.m_targetGridCenterOfGravity);
909
910 if(boundingBoxDiff || centerOfGravityDiff) {
911 cerr0 << std::endl
912 << "WARNING: the grid information from the grid file have been updated/corrected, "
913 "which should only be necessary in case of a converted old grid file with bad "
914 "bounding box/center of gravity information (see m_log for the changes)."
915 << std::endl
916 << std::endl;
918 "updated/corrected grid information with geometry bounding box");
919 }
920 }
921
922 if(grid_.m_addSolverToGrid && !g_multiSolverGrid) {
923 g_multiSolverGrid = true;
924 }
925
926 // done
927 if(domainId() == 0) std::cerr << "done." << std::endl;
928 m_log << "done." << std::endl;
929 logDuration(gridTimeStart, "Load grid total");
930 }
931
932
933 //-----------------------------------------------------------------------
934
935
941 TRACE();
942
943 constexpr MInt noInternalConnections = (nDim == 2) ? 4 : 12;
944 constexpr MInt connectionDirs[12] = {1, 1, 3, 3, 1, 1, 3, 3, 5, 5, 5, 5};
945 constexpr MInt childs0[12] = {0, 2, 0, 1, 4, 6, 4, 5, 0, 1, 2, 3};
946 constexpr MInt childs1[12] = {1, 3, 2, 3, 5, 7, 6, 7, 4, 5, 6, 7};
947 constexpr MInt dirStencil[3][8] = {{0, 1, 0, 1, 0, 1, 0, 1}, {2, 2, 3, 3, 2, 2, 3, 3}, {4, 4, 4, 4, 5, 5, 5, 5}};
948 constexpr MInt sideIds[3][8] = {{0, 1, 0, 1, 0, 1, 0, 1}, {0, 0, 1, 1, 0, 0, 1, 1}, {0, 0, 0, 0, 1, 1, 1, 1}};
949 constexpr MInt otherSide[2] = {1, 0};
950 constexpr MInt revDir[6] = {1, 0, 3, 2, 5, 4};
951
952 std::map<MLong, MInt> exchangeCells;
953 for(MInt level = m_minLevel; level < m_maxLevel; ++level) {
954 exchangeCells.clear();
955 MInt noExchangeCells = 0;
956 for(MInt i = 0; i < m_tree.size(); ++i) {
957 if(level == a_level(i)) {
958 for(MInt d = 0; d < m_noDirs; d++) {
959 if(a_hasNeighbor(i, d) > 0 && a_neighborId(i, d) > -1) {
960 MLong nghbrId = a_neighborId(i, d);
961 if(nghbrId < m_domainOffsets[domainId()] || nghbrId >= m_domainOffsets[domainId() + 1]) {
962 if(exchangeCells.count(nghbrId) == 0) {
963 MInt cpu = -1;
964 for(MInt c = 0; c < noDomains(); c++) {
965 if(nghbrId >= m_domainOffsets[c] && nghbrId < m_domainOffsets[c + 1]) {
966 cpu = c;
967 c = noDomains();
968 }
969 }
970 if(cpu < 0 || cpu == domainId()) {
971 mTerm(1, AT_, "Neighbor domain not found " + std::to_string(cpu) + " " + std::to_string(nghbrId));
972 }
973 exchangeCells[nghbrId] = cpu;
974 noExchangeCells++;
975 }
976 }
977 }
978 }
979 }
980 }
981 MInt noCellsToQuery = exchangeCells.size();
982 MLongScratchSpace queryGlobalIds(mMax(1, noCellsToQuery), AT_, "queryGlobalIds");
983 MInt cnt = 0;
984 for(auto it = exchangeCells.begin(); it != exchangeCells.end(); it++) {
985 queryGlobalIds[cnt] = it->first;
986 it->second = cnt;
987 cnt++;
988 }
989 MLongScratchSpace recvChildIds(mMax(1, noCellsToQuery), m_maxNoChilds, AT_, "recvChildIds");
990 queryGlobalData(noCellsToQuery, &queryGlobalIds[0], &recvChildIds[0], &a_childId(0, 0), m_maxNoChilds);
991
992 for(MInt i = 0; i < m_tree.size(); ++i) {
993 if(level == a_level(i)) {
994 for(MInt child = 0; child < m_maxNoChilds; child++) {
995 if(a_childId(i, child) < 0) continue;
996 MInt childId = globalIdToLocalId(a_childId(i, child));
997 if(childId < 0) continue;
998 ASSERT(a_level(childId) == level + 1,
999 "wrong child level cell" + std::to_string(i) + " child" + std::to_string(childId));
1000 MInt nodeId[3];
1001 MInt revNodeId[3];
1002 for(MInt j = 0; j < nDim; j++) {
1003 nodeId[j] = sideIds[j][child] * IPOW2(j);
1004 revNodeId[j] = otherSide[sideIds[j][child]] * IPOW2(j);
1005 }
1006 for(MInt d = 0; d < nDim; d++) {
1007 const MInt dir = dirStencil[d][child];
1008 if(a_hasNeighbor(i, dir) == 0) {
1009 a_neighborId(childId, dir) = -1;
1010 } else {
1011 const MInt childNode = child - nodeId[d] + revNodeId[d];
1012 const MLong nghbr0 = a_neighborId(i, dir);
1013 MInt nghbrId = globalIdToLocalId(nghbr0);
1014 MLong childNghbrId = -1;
1015 if(nghbrId > -1) {
1016 childNghbrId = a_childId(nghbrId, childNode);
1017 } else {
1018 if(exchangeCells.count(nghbr0) == 0) {
1019 mTerm(1, AT_, "Exchange cell not found.");
1020 }
1021 MInt idx = exchangeCells[nghbr0];
1022 if(queryGlobalIds[idx] != nghbr0) mTerm(1, AT_, "Cell inconsistency.");
1023 childNghbrId = recvChildIds(idx, childNode);
1024 }
1025 if(childNghbrId < 0) { // may happen due to cells deleted at the boundaries
1026 a_neighborId(childId, dir) = -1;
1027 } else {
1028 a_neighborId(childId, dir) = childNghbrId;
1029 MInt childNghbrLocalId = globalIdToLocalId(childNghbrId);
1030 if(childNghbrLocalId > -1) {
1031 a_neighborId(childNghbrLocalId, revDir[dir]) = a_childId(i, child);
1032 if(a_neighborId(childNghbrLocalId, revDir[dir]) >= m_noCellsGlobal + m_32BitOffset)
1033 mTerm(1, AT_, "Nghbr inconsistency.");
1034 }
1035 }
1036 }
1037 }
1038 }
1039 for(MInt c = 0; c < noInternalConnections; c++) {
1040 MInt dir = connectionDirs[c];
1041 MLong child0 = a_childId(i, childs0[c]);
1042 MLong child1 = a_childId(i, childs1[c]);
1043 MInt localId0 = globalIdToLocalId(child0);
1044 MInt localId1 = globalIdToLocalId(child1);
1045 if(localId0 > -1) {
1046 a_neighborId(localId0, dir) = child1;
1047 if(a_neighborId(localId0, dir) >= m_noCellsGlobal + m_32BitOffset) mTerm(1, AT_, "Nghbr inconsistency.");
1048 }
1049 if(localId1 > -1) {
1050 a_neighborId(localId1, revDir[dir]) = child0;
1051 if(a_neighborId(localId1, revDir[dir]) >= m_noCellsGlobal + m_32BitOffset)
1052 mTerm(1, AT_, "Nghbr inconsistency.");
1053 }
1054 }
1055 }
1056 }
1057 }
1058 }
1059
1060
1061 //-----------------------------------------------------------------------
1062
1063
1064 inline MInt globalIdToLocalId(const MLong globalId) { return grid_.globalIdToLocalId(globalId); }
1065
1066
1067 //-----------------------------------------------------------------------
1068
1069
1070 MInt findNeighborDomainId(MLong globalId) { return grid_.findNeighborDomainId(globalId); }
1071
1072
1073 //-----------------------------------------------------------------------
1074
1075
1076 template <class ITERATOR, typename U>
1077 MInt findIndex(ITERATOR first, ITERATOR last, const U& val) {
1078 return grid_.findIndex(first, last, val);
1079 }
1080
1081
1082 //-----------------------------------------------------------------------
1083
1084
1089 void collectDistributedGroupData(const MInt noLocalData, const MLong localDataOffset, const MLong* const intData,
1090 const MFloat* const fltData, const MInt noGroups, const MLong* const groupOffsets,
1091 const MInt noRanksPerGroup, const MLong noGlobalData, const MInt noRecvData,
1092 MLong* const recvInt, MFloat* const recvFlt, MPI_Comm comm, MInt rank) {
1093 MInt recvCount = 0;
1094 MInt locCnt = 0;
1095 MInt noReqs = 0;
1096 ScratchSpace<MPI_Request> requests(2 * mMin(noRecvData, noGroups * noRanksPerGroup) + 3 * noGroups, AT_,
1097 "requests");
1098 MLongScratchSpace sendBuf(noGroups, 2, AT_, "sendBuf");
1099 requests.fill(MPI_REQUEST_NULL);
1100
1101 // 1. all ranks send their local data to the corresponding group masters as determined by the groupOffsets
1102 while(locCnt < noLocalData) {
1103 MInt sendCnt = 0;
1104 MInt group = 0;
1105 while(!(localDataOffset + locCnt >= groupOffsets[group] && localDataOffset + locCnt < groupOffsets[group + 1]))
1106 group++;
1107 ASSERT(group < noGroups, "");
1108 while(locCnt + sendCnt < noLocalData && localDataOffset + locCnt + sendCnt >= groupOffsets[group]
1109 && localDataOffset + locCnt + sendCnt < groupOffsets[group + 1])
1110 sendCnt++;
1111 MInt locOffset = localDataOffset + locCnt - groupOffsets[group];
1112 MInt ndom = group * noRanksPerGroup;
1113 if(ndom == rank) {
1114 ASSERT(locOffset + sendCnt <= noRecvData, "");
1115 std::copy(&fltData[locCnt], &fltData[locCnt] + sendCnt, &recvFlt[locOffset]);
1116 std::copy(&intData[locCnt], &intData[locCnt] + sendCnt, &recvInt[locOffset]);
1117 recvCount += sendCnt;
1118 } else {
1119 ASSERT(groupOffsets[group] + locOffset + sendCnt <= noGlobalData, "");
1120 sendBuf(group, 0) = sendCnt;
1121 sendBuf(group, 1) = locOffset;
1122 MPI_Isend(&sendBuf(group, 0), 2, MPI_LONG, ndom, 0, comm, &requests[noReqs++], AT_,
1123 "sendBuf(group"); // send data count and offset in local group array
1124#if defined(HOST_Klogin)
1125 MPI_Isend(const_cast<MLong*>(&intData[locCnt]), sendCnt, MPI_LONG, ndom, 1, comm, &requests[noReqs++], AT_,
1126 "const_cast<MLong*>(&intData[locCnt])"); // send partition cell ids
1127 MPI_Isend(const_cast<MFloat*>(&fltData[locCnt]), sendCnt, MPI_DOUBLE, ndom, 2, comm, &requests[noReqs++], AT_,
1128 "const_cast<MFloat*>(&fltData[locCnt])"); // send partition cell workloads
1129#else
1130 MPI_Isend(&intData[locCnt], sendCnt, MPI_LONG, ndom, 1, comm, &requests[noReqs++], AT_,
1131 "intData[locCnt]"); // send partition cell ids
1132 MPI_Isend(&fltData[locCnt], sendCnt, MPI_DOUBLE, ndom, 2, comm, &requests[noReqs++], AT_,
1133 "fltData[locCnt]"); // send partition cell workloads
1134#endif
1135 }
1136 locCnt += sendCnt;
1137 }
1138
1139 // 2. the group masters receive all partition data residing in their group
1140 const MFloat time0 = MPI_Wtime();
1141 MFloat time1 = MPI_Wtime();
1142 while(recvCount < noRecvData) {
1143 MPI_Status status;
1144 MInt flag;
1145 MPI_Iprobe(MPI_ANY_SOURCE, 0, comm, &flag, &status); // wait for messages (asynchronously)
1146 if(flag) {
1147 MInt source = status.MPI_SOURCE;
1148 MLong recvBuf[2];
1149 MPI_Recv(recvBuf, 2, MPI_LONG, source, 0, comm, &status, AT_,
1150 "recvBuf"); // receive data count and offset in local group array
1151 MInt recvSize = recvBuf[0];
1152 MLong locOffset = recvBuf[1];
1153 ASSERT(locOffset + recvSize <= noRecvData, "");
1154 MPI_Irecv(&recvInt[locOffset], recvSize, MPI_LONG, source, 1, comm, &requests[noReqs++], AT_,
1155 "recvInt[locOffset]"); // receive partition cell ids
1156 MPI_Irecv(&recvFlt[locOffset], recvSize, MPI_DOUBLE, source, 2, comm, &requests[noReqs++], AT_,
1157 "recvFlt[locOffset]"); // receive partition cell workloads
1158 recvCount += recvSize;
1159 }
1160 if((MInt)((MPI_Wtime() - time0) / 10) > (MInt)((time1 - time0) / 10)) {
1161 std::cerr << "Rank " << rank << " already waiting " << MPI_Wtime() - time0
1162 << " seconds for incoming partition data..." << std::endl;
1163 }
1164 time1 = MPI_Wtime();
1165 }
1166
1167 if(noReqs) MPI_Waitall(noReqs, &requests[0], MPI_STATUSES_IGNORE, AT_); // finish asynchronous communication
1168 }
1169
1170
1171 //-----------------------------------------------------------------------
1172
1173
1177 MInt correctDomainOffsetsAtPartitionLevelShifts(std::vector<MUchar>& cellInfo) {
1178 TRACE();
1179
1180 MIntScratchSpace isPartitionLevelAncestor(m_noInternalCells, AT_, "isPartitionLevelAncestor");
1181 isPartitionLevelAncestor.fill(1);
1182 MInt noPartitionLevelAncestorsGlobal = 0;
1183 MInt noPartitionLevelAncestorsLocal = m_noInternalCells;
1184
1185 // Loop over all partition cells and flag all offspring cells (flag 0), only partition level
1186 // ancestor cells will not be traversed (flag 1), the local number of partion level ancestors is
1187 // then the number of internal cells minus the number of traversed cells
1188 for(MInt i = 0; i < m_noPartitionCells; i++) {
1190 noPartitionLevelAncestorsLocal -=
1191 traverseAndFlagSubtree(cellId, cellInfo.data(), m_noInternalCells,
1192 &isPartitionLevelAncestor[0]); // flag all parent cells of partition level
1193 }
1194
1195 // Determine global number of partition level ancestor cells
1196 m_noPartitionLevelAncestors = noPartitionLevelAncestorsLocal;
1197 MPI_Allreduce(&noPartitionLevelAncestorsLocal, &noPartitionLevelAncestorsGlobal, 1, MPI_INT, MPI_SUM, mpiComm(),
1198 AT_, "noPartitionLevelAncestorsLocal", "noPartitionLevelAncestorsGlobal");
1199
1200 if(noPartitionLevelAncestorsGlobal > 0) {
1201 MInt domainShift = 0;
1202 MInt lastId = m_noInternalCells - 1;
1203
1204 // Increase the domain shift as long as the last cell is still a partition level ancestor (no
1205 // descendant partition cell on this domain!)
1206 while(isPartitionLevelAncestor[lastId]) {
1207 domainShift++;
1208 lastId--;
1209 }
1210
1211 if(domainId() == noDomains() - 1 && domainShift) {
1212 TERMM(1, "The last domain cannot have a domain shift since there is no following domain.");
1213 }
1214
1215 // Gather the domain shifts
1216 MIntScratchSpace domainOffsetsDelta(noDomains(), AT_, "domainOffsetsDelta");
1217 MPI_Allgather(&domainShift, 1, MPI_INT, domainOffsetsDelta.data(), 1, MPI_INT, mpiComm(), AT_, "domainShift",
1218 "domainOffsetsDelta.data()");
1219
1220 MPI_Request req = MPI_REQUEST_NULL;
1221 ScratchSpace<MUchar> sendBuf(domainShift, AT_, "sendBuf");
1222
1223 if(domainShift) {
1224 // Send the partition level ancestors (without a descendant partition cell on this domain)
1225 // to the next domain and delete these cells on this domain
1226 std::copy(&cellInfo[m_noInternalCells - domainShift], &cellInfo[m_noInternalCells - domainShift] + domainShift,
1227 &sendBuf[0]);
1228 MPI_Issend(&sendBuf[0], domainShift, MPI_UNSIGNED_CHAR, domainId() + 1, 657, mpiComm(), &req, AT_,
1229 "sendBuf[0]");
1230 m_noInternalCells -= domainShift;
1231 cellInfo.resize(m_noInternalCells);
1232 }
1233 if(domainId() > 0 && domainOffsetsDelta[domainId() - 1]) {
1234 // Receive the partition level ancestors from the previous domain and store at the beginning
1235 // of the cellInfo array
1236 const MInt delta = domainOffsetsDelta[domainId() - 1];
1237 cellInfo.resize(m_noInternalCells + delta);
1238 memmove(&cellInfo[delta], &cellInfo[0], m_noInternalCells * sizeof(MUchar));
1239 MPI_Recv(&cellInfo[0], delta, MPI_UNSIGNED_CHAR, domainId() - 1, 657, mpiComm(), MPI_STATUS_IGNORE, AT_,
1240 "cellInfo[0]");
1241 m_noInternalCells += delta;
1242 }
1243
1244 // Correct global domain offsets
1245 for(MInt i = 0; i < noDomains(); ++i) {
1246 m_domainOffsets[i + 1] -= domainOffsetsDelta[i];
1247 }
1248 MPI_Wait(&req, MPI_STATUS_IGNORE, AT_);
1249 }
1250
1251 return noPartitionLevelAncestorsGlobal;
1252 }
1253
1254
1255 //-----------------------------------------------------------------------
1256
1257
1262 void communicatePartitionLevelAncestorData(const MInt noLocalMinLevelCells, const MInt* const localMinLevelCells,
1263 const MInt* const localMinLevelId, const MLong* const minLevelCellsTreeId,
1264 const MUchar* const cellInfo) {
1265 TRACE();
1266
1267 using namespace std;
1268
1275
1277 return;
1278 }
1279
1280 // Mark partiton cells
1281 MIntScratchSpace isPartitionCell(m_noInternalCells, AT_, "isPartitionCell");
1282 isPartitionCell.fill(0);
1283 for(MInt i = 0; i < m_noPartitionCells; i++) {
1285 isPartitionCell(cellId) = 1;
1286 }
1287
1288 // Determine global id of first min level cell on this domain
1289 MLong firstMinLevelCell = std::numeric_limits<MLong>::max();
1290 for(MInt i = 0; i < noLocalMinLevelCells; i++) {
1291 firstMinLevelCell = mMin(firstMinLevelCell, a_globalId(localMinLevelCells[i]));
1292 }
1293
1294 // Gather first min level cell ids of all domains
1295 MLongScratchSpace minLevelCellGlobalIdOffsets(noDomains() + 1, AT_, "minLevelCellGlobalIdOffsets");
1296 MPI_Allgather(&firstMinLevelCell, 1, MPI_LONG, minLevelCellGlobalIdOffsets.data(), 1, MPI_LONG, mpiComm(), AT_,
1297 "firstMinLevelCell", "minLevelCellGlobalIdOffsets.data()");
1298 minLevelCellGlobalIdOffsets[noDomains()] = m_noCellsGlobal + m_32BitOffset;
1299
1300 for(MInt cpu = noDomains() - 1; cpu >= 0; cpu--) { // some domains may not have any min level cells at all
1301 if(minLevelCellGlobalIdOffsets[cpu] > m_noCellsGlobal + m_32BitOffset) {
1302 minLevelCellGlobalIdOffsets[cpu] = minLevelCellGlobalIdOffsets[cpu + 1];
1303 }
1304 }
1305
1306 std::vector<std::tuple<MLong, MInt, MInt>> partitionLevelAncestorsOnMinLevel;
1307 std::vector<std::tuple<MLong, MInt, MInt>> partitionLevelAncestorsOther;
1308 MIntScratchSpace isPartitionLevelAncestor(m_noInternalCells, AT_, "isPartitionLevelAncestor");
1309 isPartitionLevelAncestor.fill(1);
1310
1311 MInt noPartitionLevelAncestorsGlobal = 0;
1312 MInt noPartitionLevelAncestorsLocal = m_noInternalCells;
1313 TERMM_IF_NOT_COND(m_noInternalCells == m_tree.size(),
1314 "Error: tree should only contain internal cells at this moment.");
1315
1316 // Determine partition level ancestor cells and set the corresponding cell property
1317 for(MInt i = 0; i < m_noPartitionCells; i++) {
1319 noPartitionLevelAncestorsLocal -=
1320 traverseAndFlagSubtree(cellId, cellInfo, m_tree.size(),
1321 &isPartitionLevelAncestor[0]); // flag all parent cells of partition level
1322 }
1323
1324 m_noPartitionLevelAncestors = noPartitionLevelAncestorsLocal;
1325 // Determine global number of partition level ancestor cells
1326 MPI_Allreduce(&noPartitionLevelAncestorsLocal, &noPartitionLevelAncestorsGlobal, 1, MPI_INT, MPI_SUM, mpiComm(),
1327 AT_, "noPartitionLevelAncestorsLocal", "noPartitionLevelAncestorsGlobal");
1328 m_noPartitionLevelAncestorsGlobal = noPartitionLevelAncestorsGlobal;
1329
1330 for(MInt i = 0; i < m_noInternalCells; i++) {
1331 a_hasProperty(i, Cell::IsPartLvlAncestor) = isPartitionLevelAncestor(i);
1332 }
1333
1334 for(MInt i = 0; i < m_noPartitionCells; i++) {
1336 if(localMinLevelId[cellId] < 0) {
1337 isPartitionLevelAncestor[cellId] = 1; // also flag partition cells which are not on min level
1338 }
1339 }
1340
1341 for(MInt cellId = 0; cellId < m_noInternalCells; cellId++) {
1342 const MLong globalId = a_globalId(cellId);
1343 if(isPartitionLevelAncestor(cellId)) {
1344 if(localMinLevelId[cellId] > -1) {
1345 // Partition level ancestor is a min cell
1346 partitionLevelAncestorsOnMinLevel.push_back(std::make_tuple(globalId, cellId, domainId()));
1347 } else {
1348 // Partition level ancestor is not a min-cell, find the domain which has the corresponding
1349 // parent min-cell
1350 MInt ndom = -1;
1351 for(MInt cpu = 0; cpu < noDomains(); cpu++) {
1352 if(globalId >= minLevelCellGlobalIdOffsets[cpu] && globalId < minLevelCellGlobalIdOffsets[cpu + 1]) {
1353 ndom = cpu;
1354 break;
1355 }
1356 }
1357 TERMM_IF_COND(ndom < 0 || ndom >= noDomains(), "domain for globalId not found " + std::to_string(ndom));
1358 partitionLevelAncestorsOther.push_back(std::make_tuple(globalId, cellId, ndom));
1359 }
1360 }
1361 }
1362
1363 MIntScratchSpace noQuery(noDomains(), AT_, "noQuery");
1364 MIntScratchSpace queryOffsets(noDomains() + 1, AT_, "queryOffsets");
1365 noQuery.fill(0);
1366
1367 // Count the number of queries per domain
1368 for(MUint i = 0; i < partitionLevelAncestorsOther.size(); i++) {
1369 noQuery[get<2>(partitionLevelAncestorsOther[i])]++;
1370 }
1371 queryOffsets(0) = 0;
1372 for(MInt c = 0; c < noDomains(); c++) {
1373 queryOffsets(c + 1) = queryOffsets(c) + noQuery[c];
1374 }
1375
1376 MLongScratchSpace queryGlobalId(queryOffsets[noDomains()], AT_, "queryGlobalId");
1377 ScratchSpace<MUchar> queryCellInfo(queryOffsets[noDomains()], AT_, "queryCellInfo");
1378 MIntScratchSpace queryIsPartitionCell(queryOffsets[noDomains()], AT_, "queryIsPartitionCell");
1379 noQuery.fill(0);
1380
1381 // Gather the local partition level ancestor data that needs to be communicated
1382 for(MUint i = 0; i < partitionLevelAncestorsOther.size(); i++) {
1383 MLong globalId = get<0>(partitionLevelAncestorsOther[i]);
1384 MInt cellId = get<1>(partitionLevelAncestorsOther[i]);
1385 MInt cpu = get<2>(partitionLevelAncestorsOther[i]);
1386 queryGlobalId(queryOffsets(cpu) + noQuery(cpu)) = globalId;
1387 queryCellInfo(queryOffsets(cpu) + noQuery(cpu)) = cellInfo[cellId];
1388 queryIsPartitionCell(queryOffsets(cpu) + noQuery(cpu)) = isPartitionCell[cellId];
1389 // it->second = queryOffsets(cpu) + noQuery(cpu);
1390 noQuery[cpu]++;
1391 }
1392
1393 MIntScratchSpace noCollect(noDomains(), AT_, "noCollect");
1394 MIntScratchSpace collectOffsets(noDomains() + 1, AT_, "collectOffsets");
1395
1396 // Communicate the number of queries of all domains with each other
1397 MPI_Alltoall(&noQuery[0], 1, MPI_INT, &noCollect[0], 1, MPI_INT, mpiComm(), AT_, "noQuery[0]", "noCollect[0]");
1398 // Compute offsets
1399 collectOffsets(0) = 0;
1400 for(MInt c = 0; c < noDomains(); c++) {
1401 collectOffsets(c + 1) = collectOffsets(c) + noCollect[c];
1402 }
1403
1404 // Buffers for received data
1405 MLongScratchSpace collectGlobalId(collectOffsets[noDomains()], AT_, "collectGlobalId");
1406 ScratchSpace<MUchar> collectCellInfo(collectOffsets[noDomains()], AT_, "collectCellInfo");
1407 MIntScratchSpace collectIsPartitionCell(collectOffsets[noDomains()], AT_, "collectIsPartitionCell");
1408 ScratchSpace<MPI_Request> queryReq(3 * noDomains(), AT_, "queryReq");
1409 queryReq.fill(MPI_REQUEST_NULL);
1410
1411 // Send the local partition level ancestor data to the domain with the corresponding min-cells
1412 MInt queryCnt = 0;
1413 for(MInt c = 0; c < noDomains(); c++) {
1414 if(noQuery[c] == 0) continue;
1415 MPI_Issend(&queryGlobalId[queryOffsets[c]], noQuery[c], MPI_LONG, c, 456, mpiComm(), &queryReq[queryCnt++], AT_,
1416 "queryGlobalId[queryOffsets[c]]");
1417 MPI_Issend(&queryCellInfo[queryOffsets[c]], noQuery[c], MPI_UNSIGNED_CHAR, c, 457, mpiComm(),
1418 &queryReq[queryCnt++], AT_, "queryCellInfo[queryOffsets[c]]");
1419 MPI_Issend(&queryIsPartitionCell[queryOffsets[c]], noQuery[c], MPI_INT, c, 458, mpiComm(), &queryReq[queryCnt++],
1420 AT_, "queryIsPartitionCell[queryOffsets[c]]");
1421 }
1422
1423 // Receive the data if there is any
1424 MInt collectCnt = 0;
1425 for(MInt c = 0; c < noDomains(); c++) {
1426 if(noCollect[c] == 0) continue;
1427 MPI_Recv(&collectGlobalId[collectOffsets[c]], noCollect[c], MPI_LONG, c, 456, mpiComm(), MPI_STATUS_IGNORE, AT_,
1428 "collectGlobalId[collectOffsets[c]]");
1429 MPI_Recv(&collectCellInfo[collectOffsets[c]], noCollect[c], MPI_UNSIGNED_CHAR, c, 457, mpiComm(),
1430 MPI_STATUS_IGNORE, AT_, "collectCellInfo[collectOffsets[c]]");
1431 MPI_Recv(&collectIsPartitionCell[collectOffsets[c]], noCollect[c], MPI_INT, c, 458, mpiComm(), MPI_STATUS_IGNORE,
1432 AT_, "collectIsPartitionCell[collectOffsets[c]]");
1433 collectCnt++;
1434 }
1435
1436 if(queryCnt > 0) MPI_Waitall(queryCnt, &queryReq[0], MPI_STATUSES_IGNORE, AT_);
1437
1438 // Collect the local min-level partition level ancestor data
1439 std::vector<std::tuple<MLong, MUchar, MInt>> collectData;
1440 for(MUint i = 0; i < partitionLevelAncestorsOnMinLevel.size(); i++) {
1441 MLong globalId = get<0>(partitionLevelAncestorsOnMinLevel[i]);
1442 MInt localId = get<1>(partitionLevelAncestorsOnMinLevel[i]);
1443 ASSERT(domainId() == get<2>(partitionLevelAncestorsOnMinLevel[i]), "");
1444 collectData.push_back(std::make_tuple(globalId, cellInfo[localId], isPartitionCell[localId]));
1445 }
1446
1447 // ... and that of all descendent partition level ancestors (local and on other domains)
1448 for(MInt c = 0; c < noDomains(); c++) {
1449 for(MInt d = 0; d < noCollect[c]; d++) {
1450 MInt id = collectOffsets[c] + d;
1451 collectData.push_back(std::make_tuple(collectGlobalId[id], collectCellInfo[id], collectIsPartitionCell[id]));
1452 }
1453 }
1454
1455 sort(collectData.begin(), collectData.end()); // sort by globalId to retain depth-first ordering starting from min
1456 // level, truncated at partition level
1457
1458 constexpr MInt noData = 5 + m_maxNoChilds + m_noDirs;
1459 MLongScratchSpace collectMinLevelTreeId(collectData.size(), AT_, "collectMinLevelTreeId");
1460 MLongScratchSpace collectParentId(collectData.size(), AT_, "collectParentId");
1461 MIntScratchSpace collectLevel(collectData.size(), AT_, "collectLevel");
1462 MIntScratchSpace collectNoChildren(collectData.size(), AT_, "collectNoChildren");
1463 MIntScratchSpace collectNoOffspring(collectData.size(), AT_, "collectNoOffspring");
1464 MLongScratchSpace collectChildIds(collectData.size(), m_maxNoChilds, AT_, "collectChildIds");
1465 MLongScratchSpace collectNghbrIds(collectData.size(), m_noDirs, AT_, "collectNghbrIds");
1466 std::vector<MInt> newCells;
1467 MInt newCellsMaxLevel = m_minLevel;
1468 std::map<MLong, MInt> collectMap;
1469 collectMinLevelTreeId.fill(-1);
1470 collectParentId.fill(-1);
1471 collectLevel.fill(-1);
1472 collectNoChildren.fill(-1);
1473 collectNoOffspring.fill(-1);
1474 collectChildIds.fill(-1);
1475 collectNghbrIds.fill(-1);
1476
1477 // Create mapping: part. lvl ancestor globalId -> local position in collectData
1478 for(MUint i = 0; i < collectData.size(); i++) {
1479 collectMap[get<0>(collectData[i])] = i;
1480 }
1481
1482 // Collect data of min-level partition level ancestors
1483 for(MUint i = 0; i < partitionLevelAncestorsOnMinLevel.size(); i++) {
1484 MLong globalId = get<0>(partitionLevelAncestorsOnMinLevel[i]);
1485 MInt localId = get<1>(partitionLevelAncestorsOnMinLevel[i]);
1486 MInt id = collectMap[globalId];
1487 collectLevel(id) = m_minLevel;
1488 collectParentId(id) = -1;
1489 for(MInt j = 0; j < m_noDirs; j++) {
1490 collectNghbrIds(id, j) = a_neighborId(localId, j);
1491 }
1492 MInt minId = localMinLevelId[localId];
1493 ASSERT(minId > -1, "");
1494 collectMinLevelTreeId(id) = minLevelCellsTreeId[minId];
1495 collectNoOffspring(id) = (minId == noLocalMinLevelCells - 1)
1496 ? minLevelCellGlobalIdOffsets[domainId() + 1] - globalId
1497 : a_globalId(localMinLevelCells[minId + 1]) - globalId;
1498 }
1499
1500 // Setup the truncated subtrees between the min-level partition level ancestors and the
1501 // partition cells (i.e. fill the collect* data buffers)
1502 for(MUint i = 0; i < partitionLevelAncestorsOnMinLevel.size(); i++) {
1503 MInt counter = collectMap[get<0>(partitionLevelAncestorsOnMinLevel[i])];
1504 traverseAndSetupTruncatedSubtree(counter, collectData, &collectParentId[0], &collectLevel[0],
1505 &collectNoChildren[0], &collectChildIds[0], &collectNoOffspring[0]);
1506 }
1507
1508 // Set the child ids and the number of offsprings for the min-level partition level ancestors
1509 for(MUint i = 0; i < partitionLevelAncestorsOnMinLevel.size(); i++) {
1510 MLong globalId = get<0>(partitionLevelAncestorsOnMinLevel[i]);
1511 MInt localId = get<1>(partitionLevelAncestorsOnMinLevel[i]);
1512 MInt id = collectMap[globalId];
1513 a_noOffsprings(localId) = collectNoOffspring(id);
1514 for(MInt j = 0; j < m_maxNoChilds; j++) {
1515 a_childId(localId, j) = collectChildIds(id, j);
1516 }
1517 }
1518
1519
1520 MIntScratchSpace returnOffsets(collectCnt + 1, AT_, "returnOffsets");
1521 std::vector<MLong> returnData;
1522
1523 if(collectCnt > 0) {
1524 returnOffsets(0) = 0;
1525 collectCnt = 0;
1526 for(MInt c = 0; c < noDomains(); c++) {
1527 if(noCollect[c] > 0) {
1528 // Create map of cells to return to this domain
1529 std::map<MLong, MInt> returnChain;
1530
1531 for(MInt d = 0; d < noCollect[c]; d++) {
1532 const MInt id0 = collectOffsets[c] + d; // position in collected data buffers
1533 MLong globalId = collectGlobalId[id0];
1534 MInt id1 = collectMap[globalId]; // local position in collectData
1535 if(collectIsPartitionCell[id0]) { // do not send back partition cells to save some communicaton overhead
1536 id1 = collectMap[collectParentId[id1]];
1537 globalId = get<0>(collectData[id1]);
1538 }
1539 while(globalId > -1) {
1540 if(returnChain.count(globalId) == 0) { // cell is not already flagged for return
1541 returnChain[globalId] = id1; // add to map and continue with parent cell if existing
1542 if(collectParentId[id1] > -1) {
1543 id1 = collectMap[collectParentId[id1]];
1544 globalId = get<0>(collectData[id1]);
1545 } else {
1546 globalId = -1;
1547 }
1548 } else {
1549 globalId = -1;
1550 }
1551 }
1552 }
1553
1554 for(auto it = returnChain.begin(); it != returnChain.end(); it++) {
1555 MLong globalId = it->first;
1556 MInt id1 = it->second;
1557
1558 // Setup the (missing) cells on the current domain
1559 if(c == domainId()) {
1560 MInt localId = -1;
1561 auto it1 = m_globalToLocalId.find(globalId);
1562 if(it1 == m_globalToLocalId.end()) {
1563 // Cell is not an internal cells (not existing), append to tree
1564 localId = m_tree.size();
1565 m_tree.append();
1566 resetCell(localId);
1567 a_globalId(localId) = globalId;
1568 a_hasProperty(localId, Cell::IsPartLvlAncestor) = true;
1569 m_globalToLocalId[globalId] = localId;
1570 } else {
1571 // Cell is an internal cells and exists, perform sanity checks
1572 localId = it1->second;
1573 ASSERT(a_globalId(localId) == globalId, "");
1574 ASSERT(a_hasProperty(localId, Cell::IsPartLvlAncestor), "");
1575 }
1576 a_parentId(localId) = collectParentId(id1);
1577 a_noOffsprings(localId) = collectNoOffspring(id1);
1578 a_level(localId) = collectLevel(id1);
1579
1580 for(MInt i = 0; i < m_maxNoChilds; i++) {
1581 a_childId(localId, i) = collectChildIds(id1, i);
1582 auto it2 = m_globalToLocalId.find(a_childId(localId, i));
1583 // Set the parent id if the child cell exists
1584 if(it2 != m_globalToLocalId.end()) {
1585 a_parentId(it2->second) = globalId;
1586 }
1587 }
1588
1589 newCells.push_back(localId);
1590 newCellsMaxLevel = mMax(newCellsMaxLevel, a_level(localId));
1591 } else {
1592 // Assemble the cell information to return to another domain
1593 MInt offs = returnData.size();
1594 returnData.resize(offs + noData);
1595 returnData[offs++] = globalId;
1596 returnData[offs++] = collectMinLevelTreeId(id1);
1597 returnData[offs++] = collectLevel(id1);
1598 returnData[offs++] = collectParentId(id1);
1599 returnData[offs++] = collectNoOffspring(id1);
1600 for(MInt i = 0; i < m_maxNoChilds; i++) {
1601 returnData[offs++] = collectChildIds(id1, i);
1602 }
1603 for(MInt i = 0; i < m_noDirs; i++) {
1604 returnData[offs++] = collectNghbrIds(id1, i);
1605 }
1606 }
1607 }
1608 if(c != domainId()) {
1609 returnOffsets(collectCnt + 1) = returnData.size();
1610 collectCnt++;
1611 }
1612 }
1613 }
1614 }
1615
1616 ScratchSpace<MPI_Request> returnReq(collectCnt, AT_, "returnReq");
1617 ScratchSpace<MInt> noReturnDataBuffer(collectCnt, AT_, "noReturnDataBuffer");
1618 returnReq.fill(MPI_REQUEST_NULL);
1619 MInt returnCnt = 0;
1620
1621 // Send the amount of cell information which will be transfered back to the other domains
1622 if(collectCnt > 0) {
1623 for(MInt c = 0; c < noDomains(); c++) {
1624 if(c == domainId()) continue;
1625 if(noCollect[c] == 0) continue;
1626 // Note: use persistent buffer which does not go out of scope until the send is finished
1627 noReturnDataBuffer[returnCnt] = returnOffsets(returnCnt + 1) - returnOffsets(returnCnt);
1628 MPI_Issend(&noReturnDataBuffer[returnCnt], 1, MPI_INT, c, 459, mpiComm(), &returnReq[returnCnt], AT_,
1629 "&noReturnDataBuffer[returnCnt]");
1630 returnCnt++;
1631 }
1632 }
1633
1634 MIntScratchSpace noReceiveData(std::max(queryCnt, 1), AT_, "noReceiveData");
1635 MIntScratchSpace receiveOffs(queryCnt + 1, AT_, "receiveOffs");
1636 queryCnt = 0;
1637 receiveOffs(0) = 0;
1638 // Receive the cell information counts
1639 for(MInt c = 0; c < noDomains(); c++) {
1640 if(c == domainId()) continue;
1641 if(noQuery[c] == 0) continue;
1642 MPI_Recv(&noReceiveData[queryCnt], 1, MPI_INT, c, 459, mpiComm(), MPI_STATUS_IGNORE, AT_,
1643 "noReceiveData[queryCnt]");
1644 receiveOffs(queryCnt + 1) = receiveOffs(queryCnt) + noReceiveData[queryCnt];
1645 queryCnt++;
1646 }
1647
1648 if(returnCnt > 0) MPI_Waitall(returnCnt, &returnReq[0], MPI_STATUSES_IGNORE, AT_);
1649
1650 MLongScratchSpace receiveData(std::max(receiveOffs(queryCnt), 1), AT_, "receiveData");
1651
1652 returnReq.fill(MPI_REQUEST_NULL);
1653 returnCnt = 0;
1654 // Send the cell information and receive on corresponding domains
1655 if(collectCnt > 0) {
1656 for(MInt c = 0; c < noDomains(); c++) {
1657 if(c == domainId()) continue;
1658 if(noCollect[c] == 0) continue;
1659 MInt noReturnData = returnOffsets(returnCnt + 1) - returnOffsets(returnCnt);
1660 MInt offs = returnOffsets(returnCnt);
1661 MPI_Issend(&returnData[offs], noReturnData, MPI_LONG, c, 460, mpiComm(), &returnReq[returnCnt++], AT_,
1662 "returnData[offs]");
1663 }
1664 }
1665 queryCnt = 0;
1666 for(MInt c = 0; c < noDomains(); c++) {
1667 if(c == domainId()) continue;
1668 if(noQuery[c] == 0) continue;
1669 MPI_Recv(&receiveData(receiveOffs(queryCnt)), noReceiveData(queryCnt), MPI_LONG, c, 460, mpiComm(),
1670 MPI_STATUS_IGNORE, AT_, "receiveData(receiveOffs(queryCnt))");
1671 queryCnt++;
1672 }
1673
1674 if(returnCnt > 0) MPI_Waitall(returnCnt, &returnReq[0], MPI_STATUSES_IGNORE, AT_);
1675
1676 queryCnt = 0;
1677 for(MInt c = 0; c < noDomains(); c++) {
1678 if(c == domainId()) continue;
1679 if(noQuery[c] == 0) continue;
1680 MInt offs = receiveOffs(queryCnt);
1681 // Loop over all received cells from this domain and add to tree or just set its information
1682 while(offs < receiveOffs(queryCnt + 1)) {
1683 MLong globalId = receiveData(offs++);
1684 MInt localId = -1;
1685 auto it = m_globalToLocalId.find(globalId);
1686 if(it == m_globalToLocalId.end()) {
1687 // Cell is not already existing, append to tree
1688 localId = m_tree.size();
1689 m_tree.append();
1690 resetCell(localId);
1691 a_globalId(localId) = globalId;
1692 a_hasProperty(localId, Cell::IsPartLvlAncestor) = true;
1693 m_globalToLocalId[globalId] = localId;
1694 } else {
1695 // Cell exists
1696 localId = it->second;
1697 ASSERT(a_globalId(localId) == globalId, "");
1698 ASSERT(a_hasProperty(localId, Cell::IsPartLvlAncestor), "");
1699 }
1700 MLong treeId = receiveData(offs++);
1701 a_level(localId) = receiveData(offs++);
1702 a_parentId(localId) = receiveData(offs++);
1703 a_noOffsprings(localId) = receiveData(offs++);
1704
1705 newCells.push_back(localId);
1706 newCellsMaxLevel = mMax(newCellsMaxLevel, a_level(localId));
1707
1708 for(MInt i = 0; i < m_maxNoChilds; i++) {
1709 a_childId(localId, i) = receiveData(offs++);
1710 auto it1 = m_globalToLocalId.find(a_childId(localId, i));
1711 // Set the parent id if the child cell exists
1712 if(it1 != m_globalToLocalId.end()) {
1713 a_parentId(it1->second) = globalId;
1714 }
1715 }
1716 for(MInt i = 0; i < m_noDirs; i++) {
1717 a_neighborId(localId, i) = receiveData(offs++);
1718 }
1719 // Compute the cell coordinates of the min-level cells
1720 if(a_level(localId) == m_minLevel) {
1721 maia::grid::hilbert::treeIdToCoordinates<nDim>(
1722 &a_coordinate(localId, 0), treeId, (MLong)grid_.m_targetGridMinLevel,
1723 &grid_.m_targetGridCenterOfGravity[0], grid_.m_targetGridLengthLevel0);
1724 }
1725 }
1726 queryCnt++;
1727 }
1728
1730
1731 for(MInt i = 0; i < m_noPartitionCells; i++) {
1733 MInt cellId = globalId - m_domainOffsets[domainId()];
1734 if(localMinLevelId[cellId] < 0) { // partition cell is not on min-level
1735 MInt parentId = m_globalToLocalId[a_parentId(cellId)];
1736 a_level(cellId) = a_level(parentId) + 1; // set the level of the partition cell
1737 }
1738 }
1739
1740 static const MFloat childStencil[8][3] = {{-F1, -F1, -F1}, {F1, -F1, -F1}, {-F1, F1, -F1}, {F1, F1, -F1},
1741 {-F1, -F1, F1}, {F1, -F1, F1}, {-F1, F1, F1}, {F1, F1, F1}};
1742 // Loop up starting from the min-level and compute the cell coordinates of the child cells
1743 for(MInt lvl = m_minLevel; lvl <= newCellsMaxLevel; lvl++) {
1744 for(MUint i = 0; i < newCells.size(); i++) {
1745 if(a_level(newCells[i]) == lvl) {
1746 for(MInt child = 0; child < m_maxNoChilds; child++) {
1747 if(a_childId(newCells[i], child) > -1) {
1748 auto it = m_globalToLocalId.find(a_childId(newCells[i], child));
1749 if(it != m_globalToLocalId.end()) {
1750 for(MInt j = 0; j < nDim; ++j) {
1751 a_coordinate(it->second, j) =
1752 a_coordinate(newCells[i], j) + childStencil[child][j] * F1B2 * cellLengthAtLevel(lvl + 1);
1753 }
1754 }
1755 }
1756 }
1757 }
1758 }
1759 }
1760
1761 for(MInt cellId = m_noInternalCells; cellId < m_tree.size(); cellId++) {
1762 a_hasProperty(cellId, Cell::IsHalo) = true;
1763 }
1764
1765 for(MUint i = 0; i < newCells.size(); i++) {
1766 MInt cellId = newCells[i];
1767 ASSERT(a_hasProperty(cellId, Cell::IsPartLvlAncestor), "");
1768 // Store partition level ancestor cell ids and their child ids
1769 m_partitionLevelAncestorIds.push_back(cellId);
1770 for(MInt child = 0; child < m_maxNoChilds; child++) {
1771 m_partitionLevelAncestorChildIds.push_back(a_childId(cellId, child));
1772 }
1773
1774 MInt ndom = findNeighborDomainId(a_globalId(cellId));
1775 if(ndom == domainId()) {
1776 MLong lastId = a_globalId(cellId) + a_noOffsprings(cellId) - 1;
1777 MInt firstDom = domainId() + 1;
1778 MInt lastDom = findNeighborDomainId(lastId);
1779 for(MInt d = firstDom; d <= lastDom; d++) {
1780 MInt idx =
1782 if(idx < 0) {
1785 m_partitionLevelAncestorHaloCells.push_back(std::vector<MInt>());
1786 m_partitionLevelAncestorWindowCells.push_back(std::vector<MInt>());
1787 }
1788 }
1789 } else {
1790 MInt idx =
1792 if(idx < 0) {
1795 m_partitionLevelAncestorHaloCells.push_back(std::vector<MInt>());
1796 m_partitionLevelAncestorWindowCells.push_back(std::vector<MInt>());
1797 }
1798 }
1799 }
1800
1803 .end()); // sort by ascending neighbor domain id to avoid send/recv deadlocks
1804
1805 for(MUint i = 0; i < newCells.size(); i++) {
1806 MInt cellId = newCells[i];
1807 ASSERT(a_hasProperty(cellId, Cell::IsPartLvlAncestor), "");
1808 MInt ndom = findNeighborDomainId(a_globalId(cellId));
1809 if(ndom == domainId()) {
1810 MLong lastId = a_globalId(cellId) + a_noOffsprings(cellId) - 1;
1811 MInt firstDom = domainId() + 1;
1812 MInt lastDom = findNeighborDomainId(lastId);
1813 for(MInt d = firstDom; d <= lastDom; d++) {
1814 MInt idx =
1816 ASSERT(idx > -1, "");
1817 m_partitionLevelAncestorWindowCells[idx].push_back(cellId);
1818 }
1819 } else {
1820 MInt idx =
1822 ASSERT(idx > -1, "");
1823 m_partitionLevelAncestorHaloCells[idx].push_back(cellId);
1824 }
1825 }
1826
1827 // note that window/halo cells are already sorted by ascending globalIds and ascending neighborDomainIds!
1828 for(MUint i = 1; i < m_partitionLevelAncestorNghbrDomains.size(); i++) {
1830 mTerm(1, AT_, "Invalid sorting of neighbor domains.");
1831 }
1832 }
1833 for(MUint i = 0; i < m_partitionLevelAncestorNghbrDomains.size(); i++) {
1834 for(MUint j = 1; j < m_partitionLevelAncestorHaloCells[i].size(); j++) {
1836 mTerm(1, AT_, "Invalid sorting of halo cells.");
1837 }
1838 }
1839 for(MUint j = 1; j < m_partitionLevelAncestorWindowCells[i].size(); j++) {
1841 mTerm(1, AT_, "Invalid sorting of window cells.");
1842 }
1843 }
1844 }
1845 }
1846
1847
1848 //-----------------------------------------------------------------------
1849
1850
1855 void queryGlobalData(const MInt noCellsToQuery, const MLong* const queryGlobalIds, MLong* const receiveData,
1856 const MLong* const dataSource, const MInt dataWidth) {
1857 TRACE();
1858
1859 MIntScratchSpace noRecv(noDomains(), AT_, "noRecv");
1860 MIntScratchSpace noSend(noDomains(), AT_, "noSend");
1861 MIntScratchSpace recvOffsets(noDomains() + 1, AT_, "recvOffsets");
1862 MIntScratchSpace sendOffsets(noDomains() + 1, AT_, "sendOffsets");
1863 MIntScratchSpace queryDomains(noCellsToQuery, AT_, "queryDomains");
1864 MIntScratchSpace originalId(noCellsToQuery, AT_, "originalId");
1865 noRecv.fill(0);
1866 queryDomains.fill(-1);
1867 originalId.fill(-1);
1868
1869 for(MInt i = 0; i < noCellsToQuery; i++) {
1870 MLong cellId = queryGlobalIds[i];
1871 MInt cpu = -1;
1872 for(MInt c = 0; c < noDomains(); c++) {
1873 if(cellId >= m_domainOffsets[c] && cellId < m_domainOffsets[c + 1]) {
1874 cpu = c;
1875 c = noDomains();
1876 }
1877 }
1878 // if ( cpu < 0 || cpu == domainId() ) {
1879 if(cpu < 0) {
1880 mTerm(1, AT_, "Neighbor domain not found.");
1881 }
1882 queryDomains[i] = cpu;
1883 noRecv[cpu]++;
1884 }
1885
1886 recvOffsets(0) = 0;
1887 for(MInt c = 0; c < noDomains(); c++) {
1888 recvOffsets(c + 1) = recvOffsets(c) + noRecv[c];
1889 }
1890 MLongScratchSpace recvIds(recvOffsets[noDomains()], AT_, "recvIds");
1891 noRecv.fill(0);
1892
1893 for(MInt i = 0; i < noCellsToQuery; i++) {
1894 MLong cellId = queryGlobalIds[i];
1895 MInt cpu = queryDomains[i];
1896 recvIds(recvOffsets(cpu) + noRecv(cpu)) = cellId;
1897 originalId(recvOffsets(cpu) + noRecv(cpu)) = i;
1898 noRecv[cpu]++;
1899 }
1900 MPI_Alltoall(&noRecv[0], 1, MPI_INT, &noSend[0], 1, MPI_INT, mpiComm(), AT_, "noRecv[0]", "noSend[0]");
1901 sendOffsets(0) = 0;
1902 for(MInt c = 0; c < noDomains(); c++) {
1903 sendOffsets(c + 1) = sendOffsets(c) + noSend[c];
1904 }
1905
1906 MLongScratchSpace recvData(recvOffsets[noDomains()], dataWidth, AT_, "recvChildIds");
1907 MLongScratchSpace sendData(sendOffsets[noDomains()], dataWidth, AT_, "sendChildIds");
1908 MLongScratchSpace sendIds(sendOffsets[noDomains()], AT_, "sendIds");
1909 ScratchSpace<MPI_Request> sendReq(noDomains(), AT_, "sendReq");
1910
1911 sendReq.fill(MPI_REQUEST_NULL);
1912 MInt recvCnt = 0;
1913 for(MInt c = 0; c < noDomains(); c++) {
1914 if(noRecv[c] == 0) continue;
1915 MPI_Issend(&recvIds[recvOffsets[c]], noRecv[c], MPI_LONG, c, 123, mpiComm(), &sendReq[recvCnt], AT_,
1916 "recvIds[recvOffsets[c]]");
1917 recvCnt++;
1918 }
1919 for(MInt c = 0; c < noDomains(); c++) {
1920 if(noSend[c] == 0) continue;
1921 MPI_Recv(&sendIds[sendOffsets[c]], noSend[c], MPI_LONG, c, 123, mpiComm(), MPI_STATUS_IGNORE, AT_,
1922 "sendIds[sendOffsets[c]]");
1923 }
1924 if(recvCnt > 0) MPI_Waitall(recvCnt, &sendReq[0], MPI_STATUSES_IGNORE, AT_);
1925
1926
1927 for(MInt k = 0; k < sendOffsets[noDomains()]; k++) {
1928 if(sendIds[k] < m_domainOffsets[domainId()] || sendIds[k] >= m_domainOffsets[domainId() + 1])
1929 mTerm(1, AT_, "Invalid exchange.");
1930 auto it = m_globalToLocalId.find(sendIds[k]);
1931 if(it == m_globalToLocalId.end()) mTerm(1, AT_, "Invalid exchange2.");
1932 MInt cellId = it->second;
1933 for(MInt c = 0; c < dataWidth; c++) {
1934 sendData(k, c) = dataSource[cellId * dataWidth + c];
1935 }
1936 }
1937
1938 sendReq.fill(MPI_REQUEST_NULL);
1939 MInt sendCnt = 0;
1940 for(MInt c = 0; c < noDomains(); c++) {
1941 if(noSend[c] == 0) continue;
1942 MPI_Issend(&sendData(sendOffsets[c], 0), noSend[c] * dataWidth, MPI_LONG, c, 124, mpiComm(), &sendReq[sendCnt],
1943 AT_, "sendData(sendOffsets[c]");
1944 sendCnt++;
1945 }
1946 for(MInt c = 0; c < noDomains(); c++) {
1947 if(noRecv[c] == 0) continue;
1948 MPI_Recv(&recvData(recvOffsets[c], 0), noRecv[c] * dataWidth, MPI_LONG, c, 124, mpiComm(), MPI_STATUS_IGNORE, AT_,
1949 "recvData(recvOffsets[c]");
1950 }
1951 if(sendCnt > 0) MPI_Waitall(sendCnt, &sendReq[0], MPI_STATUSES_IGNORE, AT_);
1952
1953 for(MInt k = 0; k < recvOffsets[noDomains()]; k++) {
1954 MInt cellId = originalId[k];
1955 for(MInt c = 0; c < dataWidth; c++) {
1956 receiveData[cellId * dataWidth + c] = recvData[k * dataWidth + c];
1957 }
1958 }
1959 }
1960
1961
1962 //-----------------------------------------------------------------------
1963
1964
1969 void traverseAndSetupSubtree(const MInt& cellId, const MUchar* const cellInfo) {
1970 static constexpr MFloat childStencil[8][3] = {{-F1, -F1, -F1}, {F1, -F1, -F1}, {-F1, F1, -F1}, {F1, F1, -F1},
1971 {-F1, -F1, F1}, {F1, -F1, F1}, {-F1, F1, F1}, {F1, F1, F1}};
1972 const MUint childCnt =
1973 static_cast<MUint>(cellInfo[cellId]) & 15u; // bitwise operations on char evoke integer promotion
1974 a_noOffsprings(cellId) = 1;
1975 std::fill(&a_childId(cellId, 0), &a_childId(cellId, 0) + m_maxNoChilds, -1);
1976 //---
1977 for(MUint child = 0; child < childCnt; child++) {
1978 MInt childId = cellId + a_noOffsprings(cellId);
1979 ASSERT(childId < m_tree.size(),
1980 "child out of range: " + std::to_string(childId) + " " + std::to_string(m_tree.size()));
1981 MUint position = (static_cast<MUint>(cellInfo[childId]) >> 4u) & 7u;
1982 a_level(childId) = a_level(cellId) + 1;
1983 a_childId(cellId, (MInt)position) = a_globalId(cellId) + a_noOffsprings(cellId);
1984 a_parentId(childId) = a_globalId(cellId);
1985 for(MInt j = 0; j < nDim; ++j) {
1986 a_coordinate(childId, j) =
1987 a_coordinate(cellId, j) + childStencil[(MUint)position][j] * F1B2 * cellLengthAtCell(childId);
1988 }
1989 traverseAndSetupSubtree(childId, cellInfo);
1990 a_noOffsprings(cellId) += a_noOffsprings(childId);
1991 }
1992 }
1993
1994
1995 //-----------------------------------------------------------------------
1996
1997
2002 MInt traverseAndFlagSubtree(const MUint& cellId, const MUchar* const cellInfo, const MInt N, MInt* const flag) {
2003 const MUint childCnt =
2004 static_cast<MUint>(cellInfo[cellId]) & 15u; // bitwise operations on char evoke integer promotion
2005 flag[cellId] = 0;
2006 MInt noFlagged = 1;
2007 MUint childId = cellId;
2008 MUint totalNoChilds = childCnt;
2009 //---
2010 while(totalNoChilds) {
2011 childId++;
2012 ASSERT((MInt)childId < N, "child out of range: " + std::to_string(childId) + " " + std::to_string(N));
2013 MUint isMinLevel = (static_cast<MUint>(cellInfo[childId]) >> 7) & 1;
2014 ASSERT(!isMinLevel, "");
2015 flag[childId] = 0;
2016 noFlagged++;
2017 totalNoChilds--;
2018 totalNoChilds += static_cast<MUint>(cellInfo[childId]) & 15u;
2019 }
2020 return noFlagged;
2021 }
2022
2023
2024 //-----------------------------------------------------------------------
2025
2026
2030 void traverseAndSetupTruncatedSubtree(MInt& counter, const std::vector<std::tuple<MLong, MUchar, MInt>>& collectData,
2031 MLong* const collectParentId, MInt* const collectLevel,
2032 MInt* const collectNoChildren, MLong* const collectChildIds,
2033 MInt* const collectNoOffspring) {
2034 using namespace std;
2035
2036 ASSERT(counter < (signed)collectData.size(), "");
2037 const MLong globalId = get<0>(collectData[counter]);
2038 const MInt counterParent = counter;
2039 const MInt isPartitionCell = get<2>(collectData[counterParent]);
2040 const MUint childCnt = static_cast<MUint>(get<1>(collectData[counterParent]))
2041 & 15u; // bitwise operations on char evoke integer promotion
2042 if(isPartitionCell) return;
2043 std::fill(&collectChildIds[m_maxNoChilds * counterParent],
2044 &collectChildIds[m_maxNoChilds * counterParent] + m_maxNoChilds, -1);
2045 //---
2046 for(MUint child = 0; child < childCnt; child++) {
2047 counter++;
2048 ASSERT(counter < (signed)collectData.size(), "");
2049 const MLong childId = get<0>(collectData[counter]);
2050 const MUint position = (static_cast<MUint>(get<1>(collectData[counter])) >> 4u) & 7u;
2051 collectLevel[counter] = collectLevel[counterParent] + 1;
2052 collectChildIds[m_maxNoChilds * counterParent + (MInt)position] = childId;
2053 collectParentId[counter] = globalId;
2054 MLong nextId = -1;
2055 if(child < childCnt - 1) {
2056 MUint totalNoChilds = 1;
2057 MInt tmpCnt = 0;
2058 while(totalNoChilds) {
2059 ASSERT(counter + tmpCnt < (signed)collectData.size(), "");
2060 if(!get<2>(collectData[counter + tmpCnt])) {
2061 totalNoChilds += static_cast<MUint>(get<1>(collectData[counter + tmpCnt])) & 15u;
2062 }
2063 tmpCnt++;
2064 totalNoChilds--;
2065 ASSERT(counter + tmpCnt < (signed)collectData.size(), "");
2066 nextId = get<0>(collectData[counter + tmpCnt]);
2067 }
2068 } else {
2069 nextId = globalId + collectNoOffspring[counterParent];
2070 }
2071 collectNoOffspring[counter] = nextId - childId;
2072 traverseAndSetupTruncatedSubtree(counter, collectData, collectParentId, collectLevel, collectNoChildren,
2073 collectChildIds, collectNoOffspring);
2074 }
2075 collectNoChildren[counterParent] = childCnt;
2076 }
2077
2078
2079 //-----------------------------------------------------------------------
2080
2081
2086 template <class CELLTYPE>
2087 void saveGrid(const MChar* fileName, Collector<CELLTYPE>* cpu_cells,
2088 const std::vector<std::vector<MInt>>& cpu_haloCells,
2089 const std::vector<std::vector<MInt>>& cpu_windowCells,
2090 const std::vector<std::vector<MInt>>& cpu_azimuthalHaloCells,
2091 const std::vector<MInt>& cpu_azimuthalUnmappedHaloCells,
2092 const std::vector<std::vector<MInt>>& cpu_azimuthalWindowCells, MInt* const recalcIdTree) {
2093 grid_.setLevel();
2094
2095 const MInt cpu_noCells = grid_.m_tree.size();
2096
2097 MLong tmpCnt = 0; // Variable used for various counting activities.
2098
2099 /* Mark all halo cells and communicate the no. of real cells for each domain
2100 * -------------------------------------------------------------------------
2101 *
2102 * MInt noHaloCellsTotal == The number of halo cells this domain has.
2103 * MIntScratchSpace noCellsPar[cpuCnt] == Contains for each domain the number of cell without halos.
2104 */
2105
2106 std::vector<std::vector<MInt>> partitionLevelAncestorHaloCells;
2107 std::vector<std::vector<MInt>> partitionLevelAncestorWindowCells;
2108 partitionLevelAncestorHaloCells.resize(noNeighborDomains());
2109 partitionLevelAncestorWindowCells.resize(noNeighborDomains());
2110
2111 MIntScratchSpace noCellsPar(noDomains(), AT_, "noCellsPar");
2112 MIntScratchSpace offspringPrefix(noDomains(), AT_, "offspringPrefix");
2113 MInt noHaloCellsTotal = 0;
2114 MInt noWindowCellsTotal = 0;
2115 MInt noHalo0 = cpu_noCells - m_noInternalCells;
2116 if(noNeighborDomains() > 0) {
2117 for(MInt i = 0; i < noNeighborDomains(); ++i) {
2118 noHaloCellsTotal += (signed)cpu_haloCells[i].size();
2119 noWindowCellsTotal += (signed)cpu_windowCells[i].size();
2120 for(MInt j = 0; j < (signed)cpu_haloCells[i].size(); ++j) {
2121 if(a_hasProperty(cpu_haloCells[i][j], Cell::IsPartLvlAncestor)) {
2122 partitionLevelAncestorHaloCells[i].push_back(cpu_haloCells[i][j]);
2123 }
2124 }
2125 for(MInt j = 0; j < (signed)cpu_windowCells[i].size(); ++j) {
2126 if(a_hasProperty(cpu_windowCells[i][j], Cell::IsPartLvlAncestor)) {
2127 partitionLevelAncestorWindowCells[i].push_back(cpu_windowCells[i][j]);
2128 }
2129 }
2130 }
2131 }
2132 // Azimuthal periodicity
2133 if(grid_.m_azimuthalPer) {
2134 if(noAzimuthalNeighborDomains() > 0) {
2135 for(MInt i = 0; i < noAzimuthalNeighborDomains(); ++i) {
2136 noHaloCellsTotal += (signed)cpu_azimuthalHaloCells[i].size();
2137 noWindowCellsTotal += (signed)cpu_azimuthalWindowCells[i].size();
2138 }
2139 noHaloCellsTotal += (signed)cpu_azimuthalUnmappedHaloCells.size();
2140 }
2141 }
2142 ASSERT(noHalo0 == noHaloCellsTotal, std::to_string(noHalo0) + " != " + std::to_string(noHaloCellsTotal));
2143
2144 // Note: the following code was originally used. However, since right now only the FV solver uses
2145 // this method, it was made the default behavior.
2146 // // only valid for gcell collector !!!
2147 // noCellsPar.p[domainId()] = cpu_noCells - noHaloCellsTotal;
2148 // MInt noInternalCells = cpu_noCells;
2149
2150 // if (std::is_same<CELLTYPE, FvCell>::value) {
2151 // noCellsPar.p[domainId()] = m_noInternalCells ;
2152 // noInternalCells = m_noInternalCells;
2153 // }
2154
2155 // NOTE: for minLevelIncrease skip all cells below the m_newMinLevel!
2156 // the default is -1
2157 MInt changeSize = grid_.m_newMinLevel > 0 ? m_noInternalCells : 1;
2158 MIntScratchSpace changeId2(changeSize, AT_, "changeId2");
2159 changeId2.fill(-1);
2160
2161 MInt noInternalCells = m_noInternalCells;
2162 if(grid_.m_newMinLevel > 0) { // re-count noInternalCells
2163 noInternalCells = 0;
2164 for(MInt cellId = 0; cellId < cpu_noCells; cellId++) {
2165 if(a_level(cellId) < grid_.m_newMinLevel) continue;
2166 if(a_hasProperty(cpu_cells, cellId, Cell::IsHalo)) continue;
2167 changeId2[cellId] = noInternalCells;
2168 noInternalCells++;
2169 }
2170 }
2171 noCellsPar.p[domainId()] = noInternalCells;
2172
2173 if(grid_.m_newMinLevel < 0) {
2174 ASSERT(!a_hasProperty(cpu_cells, m_noInternalCells - 1, Cell::IsHalo), "");
2175 if(noNeighborDomains() > 0) {
2176 ASSERT(a_hasProperty(cpu_cells, m_noInternalCells, Cell::IsHalo), "");
2177 }
2178 }
2179
2180 if(1 < noDomains()) {
2181 tmpCnt = noCellsPar.p[domainId()];
2182 MInt sndBuf = static_cast<MInt>(tmpCnt);
2183 MInt* rcvBuf = noCellsPar.getPointer();
2184 MPI_Allgather(&sndBuf, 1, MPI_INT, rcvBuf, 1, MPI_INT, mpiComm(), AT_, "sndBuf", "rcvBuf");
2185 tmpCnt = sndBuf;
2186 }
2187
2188 // Calculate noOffsprings and workload
2189 calculateNoOffspringsAndWorkload(cpu_cells, cpu_noCells, cpu_haloCells, cpu_windowCells,
2190 partitionLevelAncestorWindowCells, partitionLevelAncestorHaloCells);
2191
2192 // Create minLevelCells std::vector of all most coarse cells
2193 std::vector<MInt> minLevelCells;
2194 for(MInt i = 0; i < m_noInternalCells; ++i) {
2195 if(a_hasProperty(cpu_cells, i, Cell::IsHalo)) { // If we have a halo cell, continue with the next cell.
2196 continue;
2197 }
2198 // increase the minLevel by one when writing the restartGrid
2199 if(a_level(i) == grid_.m_newMinLevel) {
2200 minLevelCells.push_back(i);
2201 }
2202 if(grid_.m_newMinLevel > 0) continue;
2203
2204 if(a_parentId(i) == -1) {
2205 ASSERT(a_level(i) == m_minLevel, std::to_string(a_level(i)) + " " + std::to_string(m_minLevel));
2206 minLevelCells.push_back(i);
2207 }
2208 }
2209
2210 // Calculating hilbert id
2211 const MInt minLevelCellsCnt = minLevelCells.size();
2212 const MInt minLevel = grid_.m_newMinLevel > 0 ? grid_.m_newMinLevel : m_minLevel;
2213
2214
2215 MIntScratchSpace minLevelCells_id(mMax(1, minLevelCellsCnt), AT_, "minLevelCells_id");
2216 MLongScratchSpace minLevelCells_hId(mMax(1, minLevelCellsCnt), AT_, "minLevelCells_hId");
2217 minLevelCells_hId.fill(0);
2218 for(MInt i = 0; i < minLevelCellsCnt; ++i) {
2219 minLevelCells_id[i] = minLevelCells[i];
2220 minLevelCells_hId[i] = generateHilbertIndex(minLevelCells[i], minLevel);
2221 }
2222
2223 // sorting minLevel cells after hilbert id
2224 sortAfterHilbertIds(minLevelCells_hId.getPointer(), minLevelCells_id.getPointer(), minLevelCellsCnt);
2225
2226 // count offSprings for minLevel cells!
2227 MInt offspringSum = 0;
2228 for(MInt i = 0; i < minLevelCellsCnt; ++i) {
2229 ASSERT(a_noOffsprings(cpu_cells, minLevelCells_id.p[i]) > -1, "minLevelCells_id.p[i]) " << minLevelCells_id.p[i]);
2230 offspringSum += a_noOffsprings(cpu_cells, minLevelCells_id.p[i]);
2231 }
2232 MPI_Allgather(&offspringSum, 1, MPI_INT, offspringPrefix.getPointer(), 1, MPI_INT, mpiComm(), AT_, "offspringSum",
2233 "offspringPrefix.getPointer()");
2234
2235
2236 // Calculating Offset
2237 tmpCnt = m_32BitOffset;
2238 for(MInt i = 0; i < domainId(); ++i) {
2239 // tmpCnt += noCellsPar.p[i];
2240 tmpCnt += offspringPrefix.p[i];
2241 }
2242
2243
2244 // Calculating the new id (sorted grid-cellId) for all minCells
2245 MLongScratchSpace minLevelCells_newId(mMax(1, minLevelCellsCnt), AT_, "minLevelCells_newId");
2246 for(MInt i = 0; i < minLevelCellsCnt; ++i) {
2247 minLevelCells_newId.p[i] = tmpCnt;
2248 tmpCnt += a_noOffsprings(cpu_cells, minLevelCells_id.p[i]);
2249 }
2250
2251
2252 if(grid_.m_newMinLevel < 0) {
2253 for(MInt i = 0; i < minLevelCellsCnt; ++i) {
2254 ASSERT(minLevelCells_newId.p[i] == a_globalId(minLevelCells_id.p[i]),
2255 "minLevelCells_newId.p[i] " << minLevelCells_newId.p[i] << "a_globalId(minLevelCells_id.p[i] "
2256 << a_globalId(minLevelCells_id.p[i]));
2257 }
2258 }
2259
2260 // Creating new order of ids for own cells
2261
2262 // changeId from unsorted to sorted grid cells after globalId!
2263 MLongScratchSpace changeId(cpu_noCells, AT_, "changeId");
2264 // old unsorted ordering
2265 MIntScratchSpace oldId(noCellsPar[domainId()], AT_, "oldId");
2266 // new sorted ordering
2267 MLongScratchSpace newId(noCellsPar[domainId()], AT_, "newId");
2268
2269 tmpCnt = -1;
2270 tmpCnt = createTreeOrderingOfIds(cpu_cells, m_noInternalCells, oldId.getPointer(), newId.getPointer(),
2271 minLevelCells_id.getPointer(), minLevelCells_newId.getPointer(), minLevelCellsCnt,
2272 changeId.getPointer(), partitionLevelAncestorWindowCells,
2273 partitionLevelAncestorHaloCells);
2274
2275 ASSERT(tmpCnt == noCellsPar[domainId()], "tmpCnt: " << tmpCnt << " noCellsPar: " << noCellsPar[domainId()]);
2276
2277 // Fill this so that it can be used in the solvers to write their restart-File in the
2278 // same ordering!
2279 if(recalcIdTree) {
2280 for(MInt i = 0; i < tmpCnt; i++) {
2281 ASSERT(i < tmpCnt, "index out of range");
2282 recalcIdTree[i] = oldId[i];
2283 ASSERT(recalcIdTree[i] > -1 && recalcIdTree[i] < m_noInternalCells, "recalcId out of range");
2284 }
2285 }
2286
2287 // Create the changeId array in wich: changeId.p[oldId] = newId.
2288 // changeId is -2 for halo-cells!
2289 for(MInt i = 0; i < cpu_noCells; ++i) {
2290 changeId[i] = -2;
2291 }
2292
2293 for(MInt i = 0; i < noCellsPar[domainId()]; ++i) {
2294 changeId[oldId[i]] = newId[i];
2295 }
2296
2297 // Communicate new id of the halo cells.
2298 if(1 < noDomains() && (noWindowCellsTotal > 0) && (noHaloCellsTotal > 0)) {
2299 // Creating ScratchSpace for the communication
2300 MLongScratchSpace sndBuf(noWindowCellsTotal, AT_, "sndBuf");
2301 MLongScratchSpace rcvBuf(noHaloCellsTotal, AT_, "rcvBuf");
2302 MIntScratchSpace sndDsp(noNeighborDomains(), AT_, "sndDsp");
2303 MIntScratchSpace rcvDsp(noNeighborDomains(), AT_, "rcvDsp");
2304 MIntScratchSpace sndCnt(noNeighborDomains(), AT_, "sndCnt");
2305 MIntScratchSpace rcvCnt(noNeighborDomains(), AT_, "rcvCnt");
2306
2307 tmpCnt = 0;
2308 for(MInt i = 0; i < noNeighborDomains(); ++i) {
2309 // Fill the send buffer with the new ids of the own window cells.
2310 for(MInt j = 0; j < (signed)cpu_windowCells[i].size(); ++j) {
2311 sndBuf.p[tmpCnt] = changeId.p[cpu_windowCells[i][j]];
2312 ++tmpCnt;
2313 }
2314
2315 // Calculate the offsets for sending and receiving datas.
2316 if(0 == i) {
2317 sndDsp.p[0] = 0;
2318 rcvDsp.p[0] = 0;
2319 } else {
2320 sndDsp.p[i] = sndDsp.p[i - 1] + ((signed)cpu_windowCells[i - 1].size());
2321 rcvDsp.p[i] = rcvDsp.p[i - 1] + ((signed)cpu_haloCells[i - 1].size());
2322 }
2323
2324 sndCnt.p[i] = (signed)cpu_windowCells[i].size();
2325 rcvCnt.p[i] = (signed)cpu_haloCells[i].size();
2326 }
2327
2328 // Note: it might be required to use a unique mpi-tag for the communication below to avoid any
2329 // conflicting messages and possibly deadlocks in case there are still other open MPI requests
2330 const MInt mpiTag = 0;
2331 if(0 < noNeighborDomains()) {
2332 MPI_Request* mpiReq;
2333 mpiReq = new MPI_Request[noNeighborDomains()];
2334 MInt cntS = 0;
2335 for(MInt i = 0; i < noNeighborDomains(); i++) {
2336 mpiReq[i] = MPI_REQUEST_NULL;
2337 MInt bufSize = (signed)cpu_windowCells[i].size();
2338 if(bufSize == 0) continue;
2339 MPI_Issend(&(sndBuf[cntS]), bufSize, MPI_LONG, m_nghbrDomains[i], mpiTag, mpiComm(), &mpiReq[i], AT_,
2340 "(sndBuf[cntS])");
2341 cntS += bufSize;
2342 }
2343 MPI_Status status;
2344 MInt cntR = 0;
2345 for(MInt i = 0; i < noNeighborDomains(); i++) {
2346 MInt bufSize = (signed)cpu_haloCells[i].size();
2347 if(bufSize == 0) continue;
2348 MPI_Recv(&(rcvBuf[cntR]), bufSize, MPI_LONG, m_nghbrDomains[i], mpiTag, mpiComm(), &status, AT_,
2349 "(rcvBuf[cntR])");
2350 cntR += bufSize;
2351 }
2352 for(MInt i = 0; i < noNeighborDomains(); i++) {
2353 if((signed)cpu_windowCells[i].size() == 0) continue;
2354 MPI_Wait(&mpiReq[i], &status, AT_);
2355 }
2356 }
2357
2358
2359 // Use the new datas of the receive buffer to complete the missing ids in changeId.
2360 tmpCnt = 0;
2361 for(MInt i = 0; i < noNeighborDomains(); ++i) {
2362 for(MInt j = 0; j < (signed)cpu_haloCells[i].size(); ++j) {
2363 changeId.p[cpu_haloCells[i][j]] = rcvBuf.p[tmpCnt];
2364 ++tmpCnt;
2365 }
2366 }
2367 }
2368
2369 /* Filtering partitionCells using m_partitionCellOffspringThreshold
2370 * ------------------------------------------
2371 * To ensure that later we have an 'optimal' distribution of cells on all domains, we now filters out cells that are
2372 * too big (too many offsprings) or too heavy (too much workload) using a threshold value defined through the
2373 * property partitionCellOffspringThreshold.
2374 *
2375 * Choosing a right number for partitionCellOffspringThreshold is a user choice... the lower the value, the more
2376 * domains can be used for calcution but also more cells gets filtered and the amount of partitionCells used for the
2377 * distribution increases, which can result in increases in time while loading the grid and a bigger grid file.
2378 * Choosing a value too high will reduce the amount of domains usable for calculation on the grid but could increase
2379 * the speed at which the grid gets loaded.
2380 *
2381 * 1. Calculate the maximum number of all cells our grid has and the threshold for workload and noOffsprings.
2382 * 2. Set a vector containing all minLevelCells so far.
2383 * 3. First filter step: noOffsprings
2384 * - If a cell has more offsprings than the offspringsThreshold or a cell has more Offspring than the max number
2385 * of cells allowed on a single domain. Delete it and replace it through her childrens.
2386 * - Re-filter each child.
2387 * - Continue with next cell.
2388 * 4. Second filter step: workload
2389 * - If a cell has more workload than the workload threshold. Delete it and replace it through her childrens.
2390 * - Re-filter each child.
2391 * - Continue with next cell.
2392 * 5. Compute the level difference each cell has to the minLevelCells from the beginning. This value is used in the
2393 * distribution to check if we have a real partitionCell or a splitted one.
2394 * 6. Communicate partitionCellsFiltered.size() to all domains and calculate the number of all filtered cells,
2395 * needed later for offset calculation on file writing.
2396 *
2397 * ZfSInt noCellsParMax == addition of all cells on all domains.
2398 * MInt offSpringThreshold == How much offsprings a cell can have before becoming filtered and splitted into her
2399 * childrens. MFloat workloadThreshold == How much weight a cell can have before becoming filtered and splitted
2400 * into her childrens. MInt maxCells == maximum number of cells a single domains can hold. iNTscratchSpace
2401 * partitionCellLevelDiff == holds the level difference between the cells and the partitionCells. MIntScratchSpace
2402 * partitionCellsFilteredSizePar == holds the amount of filtered cells for each domains. MInt
2403 * partitionCellsFilteredSizeParMax == addition of all partitionCellsFilteredSize on all domains.
2404 *
2405 */
2406
2407 // Calculating Offset
2408 tmpCnt = m_32BitOffset;
2409 for(MInt i = 0; i < domainId(); ++i) {
2410 tmpCnt += noCellsPar.p[i];
2411 }
2412
2413 for(MInt i = 0; i < noCellsPar[domainId()]; ++i) {
2414 ASSERT(changeId[oldId[i]] >= tmpCnt && changeId[oldId[i]] < tmpCnt + noCellsPar.p[domainId()], "");
2415 }
2416
2417 // Setting thresholds
2418 const MInt offspringsThreshold = m_partitionCellOffspringThreshold;
2419 const MFloat workloadThreshold = m_partitionCellWorkloadThreshold;
2420
2421 std::vector<MLong> partitionCellsFiltered;
2422
2423 if(!grid_.m_updatedPartitionCells && grid_.m_updatePartitionCellsOnRestart) {
2424 std::vector<MInt> partitionCellsGlobalIds;
2425
2426 // 2. Vector containing all minLevelCells
2427 for(MInt i = 0; i < minLevelCellsCnt; ++i) {
2428 ASSERT(minLevelCells_newId.p[i] >= tmpCnt && minLevelCells_newId.p[i] < tmpCnt + noCellsPar.p[domainId()],
2429 "minLevelCells_newId:" << minLevelCells_newId.p[i] << " tmpCnt: " << tmpCnt
2430 << " noCellsPar: " << noCellsPar.p[domainId()]);
2431 ASSERT(minLevelCells_newId.p[i] == changeId[oldId.p[minLevelCells_newId.p[i] - tmpCnt]], "");
2432 }
2433
2434 // 3.Filter steps: noOffsprings/workload
2435 std::vector<MInt> tmpPartitionCells;
2436 for(MInt cellId = 0; cellId < cpu_noCells; ++cellId) {
2437 // if (a_parentId(cpu_cells, cellId) == -1) {
2438 if(a_level(cellId) == minLevel && !a_hasProperty(cpu_cells, cellId, Cell::IsPeriodic)) {
2439 // ASSERT( a_level(cpu_cells, cellId) == m_minLevel, "" );
2440 if(!grid_.m_newMinLevel) {
2441 ASSERT(a_parentId(cellId) == -1, "");
2442 }
2443 tmpPartitionCells.push_back(cellId);
2444 }
2445 }
2446 // TODO labels:GRID,ADAPTATION @ansgar add multisolver check for leaf cells!
2447 while(!tmpPartitionCells.empty()) {
2448 MInt cellId = tmpPartitionCells.front();
2449 tmpPartitionCells.erase(tmpPartitionCells.begin());
2450 if(a_noOffsprings(cpu_cells, cellId) > offspringsThreshold
2451 || a_workload(cpu_cells, cellId) > workloadThreshold) {
2452 MInt cnt = 0;
2453 for(MUint j = 0; j < IPOW2(nDim); ++j) {
2454 if(a_childId(cellId, j) > -1) {
2455 tmpPartitionCells.insert(tmpPartitionCells.begin() + cnt, a_childId(cellId, j));
2456 cnt++;
2457 }
2458 }
2459 } else if(!a_hasProperty(cpu_cells, cellId, Cell::IsHalo)) {
2460 partitionCellsFiltered.push_back(changeId.p[cellId]);
2461 partitionCellsGlobalIds.push_back(grid_.a_globalId(cellId));
2462 }
2463 }
2464
2465 // update partition cell offsets and partition-cell localIds which are required when writing LPT-restart files!
2466 MLong noPartitionCellsGlobal = partitionCellsFiltered.size();
2467 MPI_Allreduce(MPI_IN_PLACE, &noPartitionCellsGlobal, 1, type_traits<MLong>::mpiType(), MPI_SUM, mpiComm(), AT_,
2468 "MPI_IN_PLACE", "m_noPartitionCellsGlobal");
2469 if(noPartitionCellsGlobal != grid_.m_noPartitionCellsGlobal) {
2470 cerr0 << "Partition cell update in restart-file! " << noPartitionCellsGlobal << " "
2471 << grid_.m_noPartitionCellsGlobal << std::endl;
2472 }
2473
2474 MInt noLocalPartitionCells = partitionCellsFiltered.size();
2475
2476 if(grid_.m_localPartitionCellLocalIdsRestart != nullptr) {
2477 mDeallocate(grid_.m_localPartitionCellLocalIdsRestart);
2478 }
2479 mAlloc(grid_.m_localPartitionCellLocalIdsRestart, noLocalPartitionCells, "m_localPartitionCellLocalIdsRestart",
2480 -1, AT_);
2481
2482 // sort by globalIds
2483 sort(partitionCellsGlobalIds.begin(), partitionCellsGlobalIds.end());
2484
2485 MIntScratchSpace localPartitionCellCounts(noDomains(), AT_, "localPartitionCellCounts");
2486 // determine offset within GLOBAL!!! partition cells
2487 MPI_Allgather(&noLocalPartitionCells, 1, MPI_INT, &localPartitionCellCounts[0], 1, MPI_INT, mpiComm(), AT_,
2488 "noLocalPartitionCells", "localPartitionCellCounts[0]");
2489
2490 // sum up all previous domains partition cells
2491 MInt offset = 0;
2492 for(MInt dId = 0; dId < domainId(); dId++) {
2493 offset += localPartitionCellCounts[dId];
2494 }
2495
2496 // Set local partition cell offset, the next offset and the number of global partition cells
2497 grid_.m_localPartitionCellOffsetsRestart[0] = offset;
2498 grid_.m_localPartitionCellOffsetsRestart[1] = offset + noLocalPartitionCells;
2499 grid_.m_localPartitionCellOffsetsRestart[2] = noPartitionCellsGlobal;
2500
2501 for(MInt i = 0; i < noLocalPartitionCells; i++) {
2502 grid_.m_localPartitionCellLocalIdsRestart[i] = grid_.globalIdToLocalId(partitionCellsGlobalIds[i]);
2503 }
2504
2505 } else {
2506 for(MInt cellId = 0; cellId < cpu_noCells; ++cellId) {
2507 if(a_hasProperty(cellId, Cell::IsPeriodic) || a_hasProperty(cellId, Cell::IsHalo)) {
2508 continue;
2509 }
2510 if(a_level(cellId) < grid_.m_newMinLevel) continue;
2511
2512 if(a_hasProperty(cellId, Cell::IsPartitionCell)) {
2513 partitionCellsFiltered.push_back(changeId.p[cellId]);
2514 }
2515 }
2516 }
2517
2518 sort(partitionCellsFiltered.begin(), partitionCellsFiltered.end());
2519
2520 // 5. Compute level difference
2521 MIntScratchSpace partitionCellLevelDiff(partitionCellsFiltered.size(), AT_, "partitionCellLevelDiff");
2522 for(MInt i = 0; i < (signed)partitionCellsFiltered.size(); ++i) {
2523 partitionCellLevelDiff.p[i] = a_level(oldId.p[partitionCellsFiltered[i] - tmpCnt]) - minLevel;
2524 }
2525
2526 // 6. Communicate partitionCellsFiltered.size() into partitionCellsFilteredSizePar
2527 MIntScratchSpace partitionCellsFilteredSizePar(noDomains(), AT_, "partitionCellsFilteredSizePar");
2528 partitionCellsFilteredSizePar.p[domainId()] = partitionCellsFiltered.size();
2529
2530 if(1 < noDomains()) {
2531 tmpCnt = partitionCellsFilteredSizePar.p[domainId()];
2532 MInt sendVar = static_cast<MInt>(tmpCnt);
2533 MInt* rcvBuf = partitionCellsFilteredSizePar.getPointer();
2534 MPI_Allgather(&sendVar, 1, MPI_INT, rcvBuf, 1, MPI_INT, mpiComm(), AT_, "sendVar", "rcvBuf");
2535 tmpCnt = sendVar;
2536 }
2537
2538 /* Write to parallel file
2539 * ----------------------
2540 */
2541
2542 writeParallelGridFile(fileName, cpu_cells, partitionCellsFilteredSizePar.data(), noCellsPar.data(), oldId.data(),
2543 changeId.data(), partitionCellsFiltered, partitionCellLevelDiff.data(), changeId2.data());
2544 }
2545
2546
2561 void sortAfterHilbertIds(MLong* array2Sort, MInt* followUp1, MInt cell2SortCnt) {
2562 TRACE();
2563 // Comb Sort... (perhaps later change to a better sort)
2564 MInt gap = cell2SortCnt;
2565 MBool swapped = true;
2566 while((gap > 1) || swapped) {
2567 gap = MInt(gap / 1.247330950103979);
2568 gap = mMax(gap, 1);
2569 MInt i = 0;
2570 swapped = false;
2571 while(i + gap < cell2SortCnt) {
2572 if(array2Sort[i] > array2Sort[i + gap]) {
2573 std::swap(array2Sort[i], array2Sort[i + gap]);
2574 std::swap(followUp1[i], followUp1[i + gap]);
2575 swapped = true;
2576 }
2577 ++i;
2578 }
2579 }
2580 }
2581
2582
2583 // -----------------------------------------------------------------------------
2584
2585
2599 template <typename CELLTYPE>
2601 const std::vector<std::vector<MInt>>& cpu_haloCells,
2602 const std::vector<std::vector<MInt>>& cpu_windowCells,
2603 const std::vector<std::vector<MInt>>& partitionLevelAncestorWindowCells,
2604 const std::vector<std::vector<MInt>>& partitionLevelAncestorHaloCells) {
2605 TRACE();
2606
2607 MInt recvSize = 0;
2608 for(MInt d = 0; d < noNeighborDomains(); d++) {
2609 recvSize += (signed)partitionLevelAncestorWindowCells[d].size();
2610 }
2611 MIntScratchSpace recvBuffer(mMax(1, recvSize), AT_, "recvBuffer");
2612 MFloatScratchSpace recvBuffer2(mMax(1, recvSize), AT_, "recvBuffer2");
2613 MIntScratchSpace tmpNoOffs(input_noCells, AT_, "tmpNoOffs");
2614 MFloatScratchSpace tmpWorkload(input_noCells, AT_, "tmpWorkload");
2615
2616 MInt noChilds = IPOW2(nDim);
2617
2618 for(MInt i = 0; i < input_noCells; ++i) {
2619 if(a_level(i) < grid_.m_newMinLevel) {
2620 a_noOffsprings(input_cells, i) = 0;
2621 a_workload(input_cells, i) = 0.0;
2622 continue;
2623 }
2624 if(!a_hasProperty(input_cells, i, Cell::IsHalo)) {
2625 a_noOffsprings(input_cells, i) = 1;
2626 a_workload(input_cells, i) = a_weight(i);
2627 ASSERT(!std::isnan(a_weight(i)), "cellId " + std::to_string(i) + " g" + std::to_string(a_globalId(i)));
2628 } else {
2629 a_noOffsprings(input_cells, i) = 0;
2630 a_workload(input_cells, i) = 0.0;
2631 }
2632 }
2633
2634 const MInt minLevel = (grid_.m_newMinLevel > 0) ? grid_.m_newMinLevel : m_minLevel;
2635
2636 for(MInt level_ = m_maxLevel; level_ >= minLevel; --level_) {
2637 for(MInt i = 0; i < input_noCells; ++i) {
2638 if(level_ == a_level(i)) {
2639 if(0 != a_noChildren(i)) {
2640 for(MInt j = 0; j < noChilds; ++j) {
2641 if(-1 != a_childId(i, j)) {
2642 a_noOffsprings(input_cells, i) += a_noOffsprings(input_cells, a_childId(i, j));
2643 a_workload(input_cells, i) += a_workload(input_cells, a_childId(i, j));
2644 }
2645 }
2646 }
2647 }
2648 }
2649 }
2650
2651
2653 for(MInt d = 0; d < noNeighborDomains(); d++) {
2654 for(MInt j = 0; j < (signed)partitionLevelAncestorHaloCells[d].size(); j++) {
2655 tmpNoOffs[partitionLevelAncestorHaloCells[d][j]] =
2656 a_noOffsprings(input_cells, partitionLevelAncestorHaloCells[d][j]);
2657 tmpWorkload[partitionLevelAncestorHaloCells[d][j]] =
2658 a_workload(input_cells, partitionLevelAncestorHaloCells[d][j]);
2659 }
2660 }
2661 maia::mpi::reverseExchangeData(m_nghbrDomains, partitionLevelAncestorHaloCells, partitionLevelAncestorWindowCells,
2662 mpiComm(), &tmpNoOffs[0], &recvBuffer[0]);
2663 maia::mpi::reverseExchangeData(m_nghbrDomains, partitionLevelAncestorHaloCells, partitionLevelAncestorWindowCells,
2664 mpiComm(), &tmpWorkload[0], &recvBuffer2[0]);
2665 MInt cnt = 0;
2666 for(MInt d = 0; d < noNeighborDomains(); d++) {
2667 for(MInt j = 0; j < (signed)partitionLevelAncestorWindowCells[d].size(); j++) {
2668 MInt cellId = partitionLevelAncestorWindowCells[d][j];
2669 a_noOffsprings(input_cells, cellId) += recvBuffer[cnt];
2670 a_workload(input_cells, cellId) += recvBuffer2[cnt];
2671 cnt++;
2672 }
2673 }
2674
2675 for(MInt d = 0; d < noNeighborDomains(); d++) {
2676 for(MInt j = 0; j < (signed)partitionLevelAncestorWindowCells[d].size(); j++) {
2677 tmpNoOffs[partitionLevelAncestorWindowCells[d][j]] =
2678 a_noOffsprings(input_cells, partitionLevelAncestorWindowCells[d][j]);
2679 tmpWorkload[partitionLevelAncestorWindowCells[d][j]] =
2680 a_workload(input_cells, partitionLevelAncestorWindowCells[d][j]);
2681 }
2682 }
2683 maia::mpi::exchangeData(m_nghbrDomains, partitionLevelAncestorHaloCells, partitionLevelAncestorWindowCells,
2684 mpiComm(), &tmpNoOffs[0]);
2685 maia::mpi::exchangeData(m_nghbrDomains, partitionLevelAncestorHaloCells, partitionLevelAncestorWindowCells,
2686 mpiComm(), &tmpWorkload[0]);
2687 for(MInt d = 0; d < noNeighborDomains(); d++) {
2688 for(MInt j = 0; j < (signed)partitionLevelAncestorHaloCells[d].size(); j++) {
2689 a_noOffsprings(input_cells, partitionLevelAncestorHaloCells[d][j]) =
2690 tmpNoOffs[partitionLevelAncestorHaloCells[d][j]];
2691 a_workload(input_cells, partitionLevelAncestorHaloCells[d][j]) =
2692 tmpWorkload[partitionLevelAncestorHaloCells[d][j]];
2693 }
2694 }
2695 }
2696
2697 for(MInt i = 0; i < noNeighborDomains(); i++) {
2698 for(MInt j = 0; j < (signed)cpu_windowCells[i].size(); j++) {
2699 tmpNoOffs[cpu_windowCells[i][j]] = a_noOffsprings(input_cells, cpu_windowCells[i][j]);
2700 tmpWorkload[cpu_windowCells[i][j]] = a_workload(input_cells, cpu_windowCells[i][j]);
2701 }
2702 }
2703 if(noNeighborDomains() > 0) {
2704 maia::mpi::exchangeData(m_nghbrDomains, cpu_haloCells, cpu_windowCells, mpiComm(), &tmpNoOffs[0]);
2705 maia::mpi::exchangeData(m_nghbrDomains, cpu_haloCells, cpu_windowCells, mpiComm(), &tmpWorkload[0]);
2706 }
2707 for(MInt i = 0; i < noNeighborDomains(); i++) {
2708 for(MInt j = 0; j < (signed)cpu_haloCells[i].size(); j++) {
2709 a_noOffsprings(input_cells, cpu_haloCells[i][j]) = tmpNoOffs[cpu_haloCells[i][j]];
2710 a_workload(input_cells, cpu_haloCells[i][j]) = tmpWorkload[cpu_haloCells[i][j]];
2711 }
2712 }
2713 }
2714
2715
2716 // -----------------------------------------------------------------------------
2717
2718
2735 template <typename CELLTYPE>
2736 MInt createTreeOrderingOfIds(Collector<CELLTYPE>* input_cells, MInt noInternalCells, MInt* oldId, MLong* newId,
2737 MInt* minLevelCells_id, MLong* minLevelCells_newId, MInt minLevelCellsCnt, MLong* tmp_id,
2738 std::vector<std::vector<MInt>>& partitionLevelAncestorWindowCells,
2739 std::vector<std::vector<MInt>>& partitionLevelAncestorHaloCells) {
2740 TRACE();
2741
2742 std::fill(&tmp_id[0], &tmp_id[0] + a_noCells(input_cells), -1);
2743 MIntScratchSpace childOffsprings(a_noCells(input_cells), IPOW2(nDim), AT_, "childOffsprings");
2744 childOffsprings.fill(0);
2745 for(MInt cellId = 0; cellId < grid_.m_tree.size(); cellId++) {
2746 if(a_level(cellId) < grid_.m_newMinLevel) continue;
2747 if(!a_hasProperty(input_cells, cellId, Cell::IsHalo)) {
2748 for(MInt k = 0; k < IPOW2(nDim); ++k) {
2749 if(a_childId(cellId, k) > -1) {
2750 childOffsprings(cellId, k) = a_noOffsprings(input_cells, a_childId(cellId, k));
2751 }
2752 }
2753 }
2754 }
2755
2756 if(m_maxPartitionLevelShift > 0) {
2757 // gather child no offsprings here...
2758 MIntScratchSpace noSend(noNeighborDomains(), AT_, "noSend");
2759 MIntScratchSpace noRecv(noNeighborDomains(), AT_, "noRecv");
2760 noSend.fill(0);
2761 noRecv.fill(0);
2762 for(MInt d = 0; d < noNeighborDomains(); d++) {
2763 for(MInt j = 0; j < (signed)partitionLevelAncestorHaloCells[d].size(); j++) {
2764 MInt haloId = partitionLevelAncestorHaloCells[d][j];
2765 for(MUint k = 0; k < IPOW2(nDim); ++k) {
2766 MInt childId = a_childId(haloId, k);
2767 if(childId > -1) {
2768 if(!a_hasProperty(input_cells, childId, Cell::IsHalo)) {
2769 noSend(d)++;
2770 }
2771 }
2772 }
2773 }
2774 }
2775 MIntScratchSpace sendOffsets(noNeighborDomains() + 1, AT_, "sendOffsets");
2776 sendOffsets(0) = 0;
2777 for(MInt d = 0; d < noNeighborDomains(); d++) {
2778 sendOffsets(d + 1) = sendOffsets(d) + noSend(d);
2779 }
2780 MIntScratchSpace sendData(sendOffsets(noNeighborDomains()), 3, AT_, "sendData");
2781 noSend.fill(0);
2782 for(MInt d = 0; d < noNeighborDomains(); d++) {
2783 for(MInt j = 0; j < (signed)partitionLevelAncestorHaloCells[d].size(); j++) {
2784 MInt haloId = partitionLevelAncestorHaloCells[d][j];
2785 for(MUint k = 0; k < IPOW2(nDim); ++k) {
2786 MInt childId = a_childId(haloId, k);
2787 if(childId > -1) {
2788 if(!a_hasProperty(input_cells, childId, Cell::IsHalo)) {
2789 sendData(sendOffsets(d) + noSend(d), 0) = a_noOffsprings(input_cells, childId);
2790 sendData(sendOffsets(d) + noSend(d), 1) = j;
2791 sendData(sendOffsets(d) + noSend(d), 2) = k;
2792 noSend(d)++;
2793 }
2794 }
2795 }
2796 }
2797 }
2798
2799 ScratchSpace<MPI_Request> sendReq(noNeighborDomains(), AT_, "sendReq");
2800 sendReq.fill(MPI_REQUEST_NULL);
2801 MInt sendCnt = 0;
2802 for(MInt c = 0; c < noNeighborDomains(); c++) {
2803 MPI_Issend(&noSend[c], 1, MPI_INT, m_nghbrDomains[c], 12343, mpiComm(), &sendReq[sendCnt], AT_, "noSend[c]");
2804 sendCnt++;
2805 }
2806 for(MInt c = 0; c < noNeighborDomains(); c++) {
2807 MPI_Recv(&noRecv[c], 1, MPI_INT, m_nghbrDomains[c], 12343, mpiComm(), MPI_STATUS_IGNORE, AT_, "noRecv[c]");
2808 }
2809 if(sendCnt > 0) MPI_Waitall(sendCnt, &sendReq[0], MPI_STATUSES_IGNORE, AT_);
2810
2811 MIntScratchSpace recvOffsets(noNeighborDomains() + 1, AT_, "recvOffsets");
2812 recvOffsets(0) = 0;
2813 for(MInt d = 0; d < noNeighborDomains(); d++) {
2814 recvOffsets(d + 1) = recvOffsets(d) + noRecv(d);
2815 }
2816 MIntScratchSpace recvData(recvOffsets(noNeighborDomains()), 3, AT_, "recvData");
2817
2818 sendReq.fill(MPI_REQUEST_NULL);
2819 sendCnt = 0;
2820 for(MInt c = 0; c < noNeighborDomains(); c++) {
2821 if(noSend[c] == 0) continue;
2822 MPI_Issend(&sendData(sendOffsets[c], 0), 3 * noSend[c], MPI_INT, m_nghbrDomains[c], 12344, mpiComm(),
2823 &sendReq[sendCnt], AT_, "sendData(sendOffsets[c]");
2824 sendCnt++;
2825 }
2826 for(MInt c = 0; c < noNeighborDomains(); c++) {
2827 if(noRecv[c] == 0) continue;
2828 MPI_Recv(&recvData(recvOffsets[c], 0), 3 * noRecv[c], MPI_INT, m_nghbrDomains[c], 12344, mpiComm(),
2829 MPI_STATUS_IGNORE, AT_, "recvData(recvOffsets[c]");
2830 }
2831 if(sendCnt > 0) MPI_Waitall(sendCnt, &sendReq[0], MPI_STATUSES_IGNORE, AT_);
2832
2833 for(MInt d = 0; d < noNeighborDomains(); d++) {
2834 for(MInt k = 0; k < noRecv[d]; k++) {
2835 MInt idx = recvData(recvOffsets[d] + k, 1);
2836 MInt child = recvData(recvOffsets[d] + k, 2);
2837 ASSERT(child > -1 && child < IPOW2(nDim), "");
2838 MInt windowId = partitionLevelAncestorWindowCells[d][idx];
2839 childOffsprings(windowId, child) = recvData(recvOffsets[d] + k, 0);
2840 }
2841 }
2842
2843
2844 if(noNeighborDomains() > 0) {
2845 maia::mpi::exchangeData(m_nghbrDomains, partitionLevelAncestorHaloCells, partitionLevelAncestorWindowCells,
2846 mpiComm(), &childOffsprings[0], IPOW2(nDim));
2847 }
2848 }
2849
2850 for(MInt i = 0; i < minLevelCellsCnt; ++i) {
2851 tmp_id[minLevelCells_id[i]] = minLevelCells_newId[i];
2852 ASSERT(tmp_id[minLevelCells_id[i]] > -1, "");
2853 }
2854
2855 if(m_maxPartitionLevelShift > 0 && noDomains() > 1) {
2856 maia::mpi::exchangeData(m_nghbrDomains, partitionLevelAncestorHaloCells, partitionLevelAncestorWindowCells,
2857 mpiComm(), &tmp_id[0]);
2858 }
2859
2860 const MInt minLevel = (grid_.m_newMinLevel > 0) ? grid_.m_newMinLevel : m_minLevel;
2861
2862 for(MInt level = minLevel; level < m_maxLevel; level++) {
2863 for(MInt cellId = 0; cellId < grid_.m_tree.size(); cellId++) {
2864 if(a_level(cellId) == level) {
2865 // skipping all halo cells except for partition-level-ancestors
2866 if(tmp_id[cellId] < 0) continue;
2867 MLong cntId = tmp_id[cellId] + 1;
2868 for(MInt k = 0; k < IPOW2(nDim); ++k) {
2869 MInt childId = a_childId(cellId, k);
2870 if(childId > -1) {
2871 tmp_id[childId] = cntId;
2872 }
2873 cntId += childOffsprings(cellId, k);
2874 }
2875 }
2876 }
2877 }
2878 std::map<MLong, MInt> cellMap;
2879 for(MInt cellId = 0; cellId < noInternalCells; cellId++) {
2880 if(a_level(cellId) < grid_.m_newMinLevel) continue;
2881 if(!a_hasProperty(input_cells, cellId, Cell::IsHalo)) {
2882 ASSERT(tmp_id[cellId] > -1, "");
2883 cellMap.insert(std::make_pair(tmp_id[cellId], cellId));
2884 }
2885 }
2886
2887 MInt tmpCnt = 0;
2888 for(auto it = cellMap.begin(); it != cellMap.end(); ++it) {
2889 oldId[tmpCnt] = it->second;
2890 newId[tmpCnt] = it->first;
2891 tmpCnt++;
2892 }
2893
2894 return tmpCnt;
2895 }
2896
2897
2898 // -----------------------------------------------------------------------------
2899
2900
2905 template <typename CELLTYPE>
2906 void writeParallelGridFile(const MChar* fileName, Collector<CELLTYPE>* input_cells,
2907 MInt* partitionCellsFilteredSizePar, MInt* noCellsPar, MInt* oldId, MLong* changeId,
2908 const std::vector<MLong>& partitionCellsFiltered, MInt* partitionCellLevelDiff,
2909 MInt* changeId2) {
2910 TRACE();
2911
2912 // Create File.
2913 using namespace maia::parallel_io;
2914 ParallelIo grid(fileName, PIO_REPLACE, mpiComm());
2915
2916 // Calculate Offsets
2917 MLongScratchSpace partitionOffset(noDomains(), AT_, "partitionOffset");
2918 MLongScratchSpace allOffset(noDomains(), AT_, "allOffset");
2919
2920 MLong partitionCellsFilteredSizeParMax = 0;
2921 MLong noCellsParMax = 0;
2922 MLong tmp = 0;
2923
2924 partitionOffset[0] = 0;
2925 allOffset[0] = m_32BitOffset;
2926
2927 for(MInt i = 1; i < noDomains(); ++i) {
2928 partitionOffset[i] = partitionOffset[i - 1] + partitionCellsFilteredSizePar[i - 1];
2929 allOffset[i] = allOffset[i - 1] + noCellsPar[i - 1];
2930 }
2931
2932 for(MInt i = 0; i < noDomains(); ++i) {
2933 partitionCellsFilteredSizeParMax += (MLong)partitionCellsFilteredSizePar[i];
2934 noCellsParMax += (MLong)noCellsPar[i];
2935 tmp += (MLong)noCellsPar[i];
2936 }
2937 // test whether grid can be written out
2938 m_log << "total number of cells to be written out " << tmp << std::endl;
2939
2940 MInt minLevel = std::numeric_limits<MInt>::max();
2941 MInt maxLevel = -1;
2942
2943
2944 const MInt noCells = grid_.m_newMinLevel > 0 ? m_noInternalCells : noCellsPar[domainId()];
2945
2946 MInt newCellCount = 0;
2947 for(MInt i = 0; i < noCells; ++i) {
2948 MInt cellId = i;
2949 if(grid_.m_newMinLevel > 0) cellId = changeId2[i];
2950 if(a_level(oldId[cellId]) < grid_.m_newMinLevel) continue;
2951 minLevel = mMin(minLevel, a_level(oldId[cellId]));
2952 maxLevel = mMax(maxLevel, a_level(oldId[cellId]));
2953 newCellCount++;
2954 }
2955
2956 if(grid_.m_newMinLevel > 0) {
2957 std::cerr << "new-cout " << newCellCount << " old " << noCellsPar[domainId()] << std::endl;
2958 ASSERT(newCellCount == noCellsPar[domainId()], "");
2959 }
2960
2961 MPI_Allreduce(&m_minLevel, &minLevel, 1, MPI_INT, MPI_MIN, mpiComm(), AT_, "m_minLevel", "minLevel");
2962 MPI_Allreduce(&m_maxLevel, &maxLevel, 1, MPI_INT, MPI_MAX, mpiComm(), AT_, "m_maxLevel", "maxLevel");
2963
2964
2965 MInt noMinLevelCells = 0;
2966 MLong noLeafCells = 0;
2967
2968 if(grid_.m_newMinLevel > 0) minLevel = grid_.m_newMinLevel;
2969
2970 std::vector<std::pair<MLong, MInt>> minLevelCells;
2971 MIntScratchSpace isMinLevelCell(noCellsPar[domainId()], AT_, "isMinLevelCell");
2972 isMinLevelCell.fill(0);
2973 for(MInt i = 0; i < noCells; ++i) {
2974 if(grid_.m_newMinLevel > 0 && changeId2[i] < 0) continue;
2975 MInt cellId = i;
2976 if(grid_.m_newMinLevel > 0) cellId = changeId2[i];
2977
2978 if(a_level(oldId[cellId]) == minLevel) {
2979 if(grid_.m_newMinLevel < 0) {
2980 ASSERT(a_parentId(oldId[cellId]) == -1, "");
2981 }
2982 minLevelCells.push_back(std::make_pair(changeId[oldId[cellId]], oldId[cellId]));
2983 isMinLevelCell(cellId) = 1;
2984 noMinLevelCells++;
2985 }
2986 if(a_noChildren(oldId[cellId]) == 0) noLeafCells++;
2987 }
2988 MPI_Allreduce(MPI_IN_PLACE, &noLeafCells, 1, MPI_LONG, MPI_SUM, mpiComm(), AT_, "MPI_IN_PLACE", "noLeafCells");
2989 // sort( minLevelCells.begin(), minLevelCells.end() );
2990
2991 MIntScratchSpace noMinLevelCellsPerDomain(noDomains(), AT_, "noMinLevelCellsPerDomain");
2992 MPI_Allgather(&noMinLevelCells, 1, MPI_INT, noMinLevelCellsPerDomain.getPointer(), 1, MPI_INT, mpiComm(), AT_,
2993 "noMinLevelCells", "noMinLevelCellsPerDomain.getPointer()");
2994 MLong minLevelCellOffset = 0;
2995 for(MInt d = 0; d < domainId(); d++) {
2996 minLevelCellOffset += (MLong)noMinLevelCellsPerDomain[d];
2997 }
2998 MLong noTotalMinLevelCells = 0;
2999 for(MInt d = 0; d < noDomains(); d++) {
3000 noTotalMinLevelCells += (MLong)noMinLevelCellsPerDomain[d];
3001 }
3002
3003 std::set<MLong> partitionLevelAncestorIds;
3004 MInt partitionLevelShift = 0;
3005 MInt maxPartitionLevelShift = 0;
3006 for(MInt i = 0; i < partitionCellsFilteredSizePar[domainId()]; ++i) {
3007 MInt levelDiff = a_level(oldId[partitionCellsFiltered[i] - allOffset[domainId()]]) - minLevel;
3008 ASSERT(levelDiff == partitionCellLevelDiff[i], "");
3009 partitionLevelShift = mMax(levelDiff, partitionLevelShift);
3010 }
3011 if(partitionLevelShift) {
3012 for(MInt i = 0; i < partitionCellsFilteredSizePar[domainId()]; ++i) {
3013 MInt cellId = oldId[partitionCellsFiltered[i] - allOffset[domainId()]];
3014 MInt parentId = a_parentId(cellId);
3015 while(parentId > -1 && a_level(cellId) >= grid_.m_newMinLevel) {
3016 if(!a_hasProperty(input_cells, parentId, Cell::IsHalo)) {
3017 partitionLevelAncestorIds.insert(changeId[parentId]);
3018 parentId = a_parentId(parentId);
3019 } else {
3020 parentId = -1;
3021 }
3022 }
3023 }
3024 }
3025 MLong totalNoPartitionLevelAncestors = 0;
3026 MLong localPartitionLevelAncestorCount = (signed)partitionLevelAncestorIds.size();
3027 MPI_Allreduce(&localPartitionLevelAncestorCount, &totalNoPartitionLevelAncestors, 1, MPI_LONG, MPI_SUM, mpiComm(),
3028 AT_, "localPartitionLevelAncestorCount", "totalNoPartitionLevelAncestors");
3029 MPI_Allreduce(&partitionLevelShift, &maxPartitionLevelShift, 1, MPI_INT, MPI_MAX, mpiComm(), AT_,
3030 "partitionLevelShift", "maxPartitionLevelShift");
3031
3032
3033 MLong maxNoOffsprings = 0;
3034 MFloat maxNoCPUs = F0;
3035 MFloat totalWorkload = F0;
3036 MFloat maxWorkload = F0;
3037 MLong partitionCellOffspringThreshold = (MLong)m_partitionCellOffspringThreshold;
3038 for(MInt i = 0; i < partitionCellsFilteredSizePar[domainId()]; ++i) {
3039 maxNoOffsprings =
3040 mMax((MLong)a_noOffsprings(input_cells, oldId[partitionCellsFiltered[i] - allOffset[domainId()]]),
3041 maxNoOffsprings);
3042 totalWorkload += a_workload(input_cells, oldId[partitionCellsFiltered[i] - allOffset[domainId()]]);
3043 maxWorkload =
3044 mMax(maxWorkload, a_workload(input_cells, oldId[partitionCellsFiltered[i] - allOffset[domainId()]]));
3045 }
3046 MPI_Allreduce(MPI_IN_PLACE, &maxNoOffsprings, 1, MPI_LONG, MPI_MAX, mpiComm(), AT_, "MPI_IN_PLACE",
3047 "maxNoOffsprings");
3048 MPI_Allreduce(MPI_IN_PLACE, &maxWorkload, 1, MPI_DOUBLE, MPI_MAX, mpiComm(), AT_, "MPI_IN_PLACE", "maxWorkload");
3049 MPI_Allreduce(MPI_IN_PLACE, &totalWorkload, 1, MPI_DOUBLE, MPI_SUM, mpiComm(), AT_, "MPI_IN_PLACE",
3050 "totalWorkload");
3051 // maxNoCPUs = noCellsParMax / maxNoOffsprings;
3052 maxNoCPUs = totalWorkload / maxWorkload;
3053 MFloat avgWorkload = totalWorkload / ((MFloat)partitionCellsFilteredSizeParMax);
3054 MFloat avgOffspring = ((MFloat)noCellsParMax) / ((MFloat)partitionCellsFilteredSizeParMax);
3055
3056 MInt noSolvers = m_tree.noSolvers();
3057
3058 if(g_multiSolverGrid) {
3059 MLong solverCount;
3060 for(MInt b = 0; b < noSolvers; b++) {
3061 solverCount = 0;
3062 for(MInt i = 0; i < m_noInternalCells; i++) {
3063 if(a_level(i) < grid_.m_newMinLevel) continue;
3064 if(m_tree.solver(i, b) == true) {
3065 solverCount++;
3066 }
3067 }
3068 MPI_Allreduce(MPI_IN_PLACE, &solverCount, 1, MPI_LONG, MPI_SUM, mpiComm(), AT_, "MPI_IN_PLACE", "solverCount");
3069 grid.setAttributes(&solverCount, "noCells_" + std::to_string(b), 1);
3070 }
3071 }
3072
3073 MInt nodims = nDim;
3074 grid.setAttributes(&nodims, "nDim", 1);
3075 grid.setAttributes(&noSolvers, "noSolvers", 1);
3076 grid.setAttributes(&globalTimeStep, "globalTimeStep", 1);
3077 grid.setAttributes(&noCellsParMax, "noCells", 1);
3078 grid.setAttributes(&noLeafCells, "noLeafCells", 1);
3079 grid.setAttributes(&noTotalMinLevelCells, "noMinLevelCells", 1);
3080 grid.setAttributes(&partitionCellsFilteredSizeParMax, "noPartitionCells", 1);
3081 grid.setAttributes(&totalNoPartitionLevelAncestors, "noPartitionLevelAncestors", 1);
3082 grid.setAttributes(&minLevel, "minLevel", 1);
3083 grid.setAttributes(&maxLevel, "maxLevel", 1);
3084 grid.setAttributes(&m_maxUniformRefinementLevel, "maxUniformRefinementLevel", 1);
3085 grid.setAttributes(&maxPartitionLevelShift, "maxPartitionLevelShift", 1);
3086 grid.setAttributes(&m_lengthLevel0, "lengthLevel0", 1);
3087 grid.setAttributes(&m_centerOfGravity[0], "centerOfGravity", nDim);
3088 grid.setAttributes(&m_boundingBox[0], "boundingBox", 2 * nDim);
3089
3090 // Add additional multisolver information if grid is reordered by a different Hilbert curve
3091 if(grid_.m_hasMultiSolverBoundingBox) {
3092 grid.setAttributes(&grid_.m_targetGridLengthLevel0, "multiSolverLengthLevel0", 1);
3093 grid.setAttributes(&grid_.m_targetGridMinLevel, "multiSolverMinLevel", 1);
3094 grid.setAttributes(&(grid_.m_targetGridCenterOfGravity[0]), "multiSolverCenterOfGravity", nDim);
3095 grid.setAttributes(&(grid_.m_targetGridBoundingBox[0]), "multiSolverBoundingBox", 2 * nDim);
3096 }
3097
3098 grid.setAttributes(&m_reductionFactor, "reductionFactor", 1);
3099 grid.setAttributes(&m_decisiveDirection, "decisiveDirection", 1);
3100 grid.setAttributes(&totalWorkload, "totalWorkload", 1);
3101 grid.setAttributes(&maxWorkload, "partitionCellMaxWorkload", 1);
3102 grid.setAttributes(&avgWorkload, "partitionCellAverageWorkload", 1);
3103 grid.setAttributes(&maxNoOffsprings, "partitionCellMaxNoOffspring", 1);
3104 grid.setAttributes(&avgOffspring, "partitionCellAverageNoOffspring", 1);
3105 grid.setAttributes(&m_partitionCellWorkloadThreshold, "partitionCellWorkloadThreshold", 1);
3106 grid.setAttributes(&partitionCellOffspringThreshold, "partitionCellOffspringThreshold", 1);
3107 grid.setAttributes(&maxNoCPUs, "maxNoBalancedCPUs", 1);
3108 if(m_32BitOffset > 0) {
3109 grid.setAttributes(&m_32BitOffset, "bitOffset", 1);
3110 }
3111
3112 grid.defineArray(PIO_LONG, "partitionCellsGlobalId", partitionCellsFilteredSizeParMax);
3113 grid.defineArray(PIO_FLOAT, "partitionCellsWorkload", partitionCellsFilteredSizeParMax);
3114
3115 grid.defineArray(PIO_LONG, "minLevelCellsTreeId", noTotalMinLevelCells);
3116 grid.defineArray(PIO_LONG, "minLevelCellsNghbrIds", 2 * nDim * noTotalMinLevelCells);
3117
3118 grid.defineArray(PIO_UCHAR, "cellInfo", noCellsParMax);
3119 if(m_tree.noSolvers() > 1 || g_multiSolverGrid) {
3120 grid.defineArray(PIO_UCHAR, "solver", noCellsParMax);
3121 }
3122
3123 m_log << "header definition finished" << std::endl;
3124
3125 // Writing Data.
3126
3127 // settings Offset to write partitionCells data.
3128 grid.setOffset(partitionCellsFilteredSizePar[domainId()], partitionOffset[domainId()]);
3129
3130 // partitionCellsId
3131 grid.writeArray(&partitionCellsFiltered[0], "partitionCellsGlobalId");
3132
3133 // partitionCellsWorkLoad.
3134 {
3135 MFloatScratchSpace tmp_partitionCellsWorkLoad(partitionCellsFilteredSizePar[domainId()], AT_,
3136 "tmp_partitionCellsWorkLoad");
3137 for(MInt i = 0; i < partitionCellsFilteredSizePar[domainId()]; ++i) {
3138 tmp_partitionCellsWorkLoad[i] =
3139 a_workload(input_cells, oldId[partitionCellsFiltered[i] - allOffset[domainId()]]);
3140 }
3141 grid.writeArray(tmp_partitionCellsWorkLoad.data(), "partitionCellsWorkload");
3142 }
3143
3144
3145 // partitionCellsNghbrIds.
3146 {
3147 grid.setOffset(m_noDirs * noMinLevelCells, m_noDirs * minLevelCellOffset);
3148 MLongScratchSpace tmp_nghbrIds(noMinLevelCells, m_noDirs, AT_, "tmp_nghbrIds");
3149 for(MInt i = 0; i < (signed)minLevelCells.size(); i++) {
3150 for(MInt j = 0; j < m_noDirs; j++) {
3151 if(a_neighborId(minLevelCells[i].second, j) > -1) {
3152 tmp_nghbrIds(i, j) = changeId[a_neighborId(minLevelCells[i].second, j)];
3153 } else {
3154 tmp_nghbrIds(i, j) = -1;
3155 }
3156 }
3157 }
3158 grid.writeArray(tmp_nghbrIds.data(), "minLevelCellsNghbrIds");
3159 }
3160
3161 // partitionCellsCoordinates.
3162 {
3163 grid.setOffset(noMinLevelCells, minLevelCellOffset);
3164 MLongScratchSpace tmp_treeId(noMinLevelCells, AT_, "tmp_treeId");
3165 for(MUint i = 0; i < minLevelCells.size(); i++) {
3166 maia::grid::hilbert::coordinatesToTreeId<nDim>(
3167 tmp_treeId[i], &a_coordinate(minLevelCells[i].second, 0), (MLong)grid_.m_targetGridMinLevel,
3168 &(grid_.m_targetGridCenterOfGravity[0]), grid_.m_targetGridLengthLevel0);
3169 }
3170 grid.writeArray(tmp_treeId.data(), "minLevelCellsTreeId");
3171 }
3172
3173 // cellInfo
3174 {
3175 ScratchSpace<MUchar> tmp_cellInfo(noCellsPar[domainId()], AT_, "tmp_cellInfo");
3176 MInt cellCount = 0;
3177 for(MInt i = 0; i < noCells; ++i) {
3178 MInt cellId = i;
3179 if(grid_.m_newMinLevel > 0) cellId = changeId2[i];
3180 if(a_level(oldId[cellId]) < grid_.m_newMinLevel) continue;
3181 MUint noChilds = (MUint)a_noChildren(oldId[cellId]);
3182 MUint isMinLevel = (MUint)isMinLevelCell(cellId);
3183 MUint position = 0;
3184 MInt parentId = a_parentId(oldId[cellId]);
3185 if(parentId > -1 && a_level(oldId[cellId]) > grid_.m_newMinLevel) {
3186 for(MUint j = 0; j < (unsigned)m_maxNoChilds; j++) {
3187 if(a_childId(parentId, j) == oldId[cellId]) position = j;
3188 }
3189 }
3190 MUint tmpBit = noChilds | (position << 4) | (isMinLevel << 7);
3191 tmp_cellInfo[cellCount] = static_cast<MUchar>(tmpBit);
3192 cellCount++;
3193 }
3194 grid.setOffset(noCellsPar[domainId()], allOffset[domainId()] - m_32BitOffset);
3195 grid.writeArray(tmp_cellInfo.data(), "cellInfo");
3196 }
3197
3198
3199 // solver
3200 if(m_tree.noSolvers() > 1 || g_multiSolverGrid) {
3201 ScratchSpace<MUchar> tmptmp(m_tree.size(), AT_, "tmptmp");
3202 for(MInt i = 0; i < noCells; ++i) {
3203 MInt cellId = i;
3204 if(grid_.m_newMinLevel > 0) cellId = changeId2[i];
3205 if(a_level(oldId[cellId]) < grid_.m_newMinLevel) continue;
3206 MUint tmpBit = 0;
3207 for(MInt solver = 0; solver < m_tree.noSolvers(); solver++) {
3208 if(m_tree.solver(oldId[cellId], solver)) {
3209 tmpBit |= (1 << solver);
3210 }
3211 }
3212 tmptmp[cellId] = static_cast<MUchar>(tmpBit);
3213 }
3214 grid.setOffset(noCellsPar[domainId()], allOffset[domainId()] - m_32BitOffset);
3215 grid.writeArray(tmptmp.data(), "solver");
3216 }
3217 }
3218
3219
3220 // -----------------------------------------------------------------------------
3221};
3222
3223} // namespace grid
3224} // namespace maia
3225
3226//#if defined(MAIA_GCC_COMPILER)
3227//#pragma GCC diagnostic pop
3228//#endif
3229
3230#endif // CARTESIANGRIDIO_H
void mAlloc(T *&a, const MLong N, const MString &objectName, MString function)
allocates memory for one-dimensional array 'a' of size N
Definition: alloc.h:173
MBool mDeallocate(T *&a)
deallocates the memory previously allocated for element 'a'
Definition: alloc.h:544
static size_t getAvailableMemory()
Returns the amount of available memory in scratch.
Definition: scratch.h:148
This class is a ScratchSpace.
Definition: scratch.h:758
size_type size() const
Definition: scratch.h:302
pointer data()
Definition: scratch.h:289
T * getPointer() const
Deprecated: use begin() instead!
Definition: scratch.h:316
void fill(T val)
fill the scratch with a given value
Definition: scratch.h:311
pointer p
Deprecated: use [] instead!
Definition: scratch.h:315
iterator end()
Definition: scratch.h:281
iterator begin()
Definition: scratch.h:273
void propagateNeighborsFromMinLevelToMaxLevel()
Given neighbor information on the minLevel, create neighbor information on all higher levels.
static constexpr MInt m_noDirs
maia::grid::cell::BitsetType::reference a_hasProperty(const MInt cellId, const Cell p)
MInt & m_maxUniformRefinementLevel
MLong & a_neighborId(const MInt cellId, const MInt dir)
std::vector< MLong > & m_partitionLevelAncestorChildIds
void loadDonorGridPartition(const MLong *const partitionCellsId, const MInt noPartitionCells)
MLong & m_noPartitionCellsGlobal
MLong & m_noCellsGlobal
MInt a_noChildren(const MInt cellId)
MLong generateHilbertIndex(const MInt cellId, const MInt minLevel)
std::vector< MInt > & m_partitionLevelAncestorNghbrDomains
MInt createTreeOrderingOfIds(Collector< CELLTYPE > *input_cells, MInt noInternalCells, MInt *oldId, MLong *newId, MInt *minLevelCells_id, MLong *minLevelCells_newId, MInt minLevelCellsCnt, MLong *tmp_id, std::vector< std::vector< MInt > > &partitionLevelAncestorWindowCells, std::vector< std::vector< MInt > > &partitionLevelAncestorHaloCells)
Create a tree ordering of Ids.
static void save(Grid &g, const MChar *fileName, Collector< CELLTYPE > *cpu_cells, const std::vector< std::vector< MInt > > &cpu_haloCells, const std::vector< std::vector< MInt > > &cpu_windowCells, const std::vector< std::vector< MInt > > &cpu_azimuthalHaloCells, const std::vector< MInt > &cpu_azimuthalUnmappedHaloCells, const std::vector< std::vector< MInt > > &cpu_azimuthalWindowCells, MInt *const recalcIdTree)
void queryGlobalData(const MInt noCellsToQuery, const MLong *const queryGlobalIds, MLong *const receiveData, const MLong *const dataSource, const MInt dataWidth)
Generic communication of data given by dataSource, matched by queryGlobalIds, stored in receiveData.
void calculateNoOffspringsAndWorkload(Collector< CELLTYPE > *input_cells, MInt input_noCells, const std::vector< std::vector< MInt > > &cpu_haloCells, const std::vector< std::vector< MInt > > &cpu_windowCells, const std::vector< std::vector< MInt > > &partitionLevelAncestorWindowCells, const std::vector< std::vector< MInt > > &partitionLevelAncestorHaloCells)
Caluclate the number of offsprings and the workload for each cell.
MInt traverseAndFlagSubtree(const MUint &cellId, const MUchar *const cellInfo, const MInt N, MInt *const flag)
Recursively flag subtree cells from partition level to the leaf level.
MLong(& m_localPartitionCellOffsets)[3]
std::vector< std::vector< MInt > > & m_partitionLevelAncestorHaloCells
MLong & m_noPartitionLevelAncestorsGlobal
void writeParallelGridFile(const MChar *fileName, Collector< CELLTYPE > *input_cells, MInt *partitionCellsFilteredSizePar, MInt *noCellsPar, MInt *oldId, MLong *changeId, const std::vector< MLong > &partitionCellsFiltered, MInt *partitionCellLevelDiff, MInt *changeId2)
Save a grid file.
MInt correctDomainOffsetsAtPartitionLevelShifts(std::vector< MUchar > &cellInfo)
Check for partition level shifts and in this case make sure the first partition level ancestors are o...
MInt a_hasNeighbor(const MInt cellId, const MInt dir) const
MFloat(& m_centerOfGravity)[3]
static constexpr MInt m_maxNoChilds
MInt a_noCells(Collector< U > *NotUsed(cells_))
static void calculateNoOffspringsAndWorkload(Grid &g, Collector< CELLTYPE > *cpu_cells, MInt cpu_noCells, const std::vector< std::vector< MInt > > &cpu_haloCells, const std::vector< std::vector< MInt > > &cpu_windowCells, const std::vector< std::vector< MInt > > &partitionLevelAncestorWindowCells, const std::vector< std::vector< MInt > > &partitionLevelAncestorHaloCells)
MFloat & a_coordinate(const MInt cellId, const MInt dir)
MFloat cellLengthAtLevel(const MInt level) const
void saveGrid(const MChar *fileName, Collector< CELLTYPE > *cpu_cells, const std::vector< std::vector< MInt > > &cpu_haloCells, const std::vector< std::vector< MInt > > &cpu_windowCells, const std::vector< std::vector< MInt > > &cpu_azimuthalHaloCells, const std::vector< MInt > &cpu_azimuthalUnmappedHaloCells, const std::vector< std::vector< MInt > > &cpu_azimuthalWindowCells, MInt *const recalcIdTree)
Save the grid.
MFloat(& m_boundingBox)[6]
std::map< MLong, MInt > & m_globalToLocalId
MBool & m_loadGridPartition
MInt & m_noMinLevelCellsGlobal
static void propagateNeighborsFromMinLevelToMaxLevel(Grid &g)
MFloat a_weight(const MInt cellId)
MInt findNeighborDomainId(MLong globalId)
void collectDistributedGroupData(const MInt noLocalData, const MLong localDataOffset, const MLong *const intData, const MFloat *const fltData, const MInt noGroups, const MLong *const groupOffsets, const MInt noRanksPerGroup, const MLong noGlobalData, const MInt noRecvData, MLong *const recvInt, MFloat *const recvFlt, MPI_Comm comm, MInt rank)
Collect distributed data located in intData and fltData on the master rank (0) of each communicator/g...
typename Grid::Tree Tree
std::vector< std::vector< MInt > > & m_partitionLevelAncestorWindowCells
MInt & a_noOffsprings(Collector< U > *NotUsed(cells_), const MInt cellId)
void sortAfterHilbertIds(MLong *array2Sort, MInt *followUp1, MInt cell2SortCnt)
Sorting after hilbert id.
static void communicatePartitionLevelAncestorData(Grid &g, const MInt noLocalMinLevelCells, const MInt *const localMinLevelCells, const MInt *const localMinLevelId, const MLong *const minLevelCellsTreeId, const MUchar *const cellInfo)
MInt & m_noHaloPartitionLevelAncestors
void communicatePartitionLevelAncestorData(const MInt noLocalMinLevelCells, const MInt *const localMinLevelCells, const MInt *const localMinLevelId, const MLong *const minLevelCellsTreeId, const MUchar *const cellInfo)
The subtrees between the local minLevel and the partitionLevel is gathered on the domain which holds ...
static void load(Grid &g, MString const &fileName)
MInt & m_maxPartitionLevelShift
MLong *& m_localPartitionCellGlobalIds
typename Grid::MInt_dyn_array MInt_dyn_array
MPI_Comm mpiComm() const
MFloat & m_lengthLevel0
void traverseAndSetupSubtree(const MInt &cellId, const MUchar *const cellInfo)
Recursively setup subtree connection from partition level to the leaf level.
std::vector< MInt > & m_partitionLevelAncestorIds
maia::grid::cell::BitsetType::reference a_hasProperty(Collector< U > *NotUsed(cells_), const MInt cellId, const Cell p)
MInt findIndex(ITERATOR first, ITERATOR last, const U &val)
void loadGrid(const MString &fileName)
Load a grid file.
MInt & a_level(const MInt cellId)
MLong & a_parentId(const MInt cellId)
void resetCell(const MInt &cellId)
MInt noDomains() const
MLong & a_childId(const MInt cellId, const MInt pos)
MInt & m_decisiveDirection
MLong & m_32BitOffset
MLong *& m_domainOffsets
MInt solverId() const
void domainPartitioningParallel(const MInt tmpCount, const MLong tmpOffset, const MFloat *const partitionCellsWorkload, const MLong *const partitionCellsGlobalId, const MFloat totalWorkload, MLong *const partitionCellOffsets, MLong *const globalIdOffsets, const MBool computeOnlyPartition=false)
Parallel domain partitioning.
MInt *& m_localPartitionCellLocalIds
MInt & m_noInternalCells
static constexpr MInt nDim
static void partitionParallel(Grid &g, const MInt tmpCount, const MLong tmpOffset, const MFloat *const partitionCellsWorkload, const MLong *const partitionCellsGlobalId, const MFloat totalWorkload, MLong *const partitionCellOffsets, MLong *const globalIdOffsets, const MBool computeOnlyPartition=false)
MInt & m_noPartitionCells
std::vector< MInt > & m_nghbrDomains
MInt & m_partitionCellOffspringThreshold
MFloat cellLengthAtCell(const MInt cellId) const
MInt & a_noOffsprings(const MInt cellId)
void traverseAndSetupTruncatedSubtree(MInt &counter, const std::vector< std::tuple< MLong, MUchar, MInt > > &collectData, MLong *const collectParentId, MInt *const collectLevel, MInt *const collectNoChildren, MLong *const collectChildIds, MInt *const collectNoOffspring)
Recursively setup truncated subtree between minLevel and partitionLevel (used to setup partition leve...
MLong & a_globalId(const MInt cellId)
MInt noNeighborDomains() const
MInt noAzimuthalNeighborDomains() const
MInt domainId() const
MInt & m_noPartitionLevelAncestors
MFloat & m_partitionCellWorkloadThreshold
typename Tree::Cell Cell
MInt globalIdToLocalId(const MLong globalId)
MFloat & a_workload(Collector< U > *NotUsed(cells_), const MInt cellId)
MFloat & m_reductionFactor
void checkMultiSolverGridExtents(const MInt nDim, const MFloat *centerOfGravity, const MFloat lengthLevel0, const MInt minLevel, const MFloat *targetGridCenterOfGravity, const MFloat targetGridLengthLevel0, const MInt targetGridMinLevel)
Checks if the given grid extents and cell sizes match when creating a multisolver grid and correspond...
Definition: functions.cpp:112
void mTerm(const MInt errorCode, const MString &location, const MString &message)
Definition: functions.cpp:29
const MString const MString & message
Definition: functions.h:37
MBool approx(const T &, const U &, const T)
Definition: functions.h:272
constexpr T mMin(const T &x, const T &y)
Definition: functions.h:90
constexpr T mMax(const T &x, const T &y)
Definition: functions.h:94
MFloat wallTime()
Definition: functions.h:80
MInt globalTimeStep
MBool g_multiSolverGrid
InfoOutFile m_log
std::ostream cerr0
constexpr MLong IPOW2(MInt x)
constexpr MFloat FPOW2(MInt x)
int32_t MInt
Definition: maiatypes.h:62
uint32_t MUint
Definition: maiatypes.h:63
unsigned char MUchar
Definition: maiatypes.h:57
std::basic_string< char > MString
Definition: maiatypes.h:55
MInt noSolvers
Definition: maiatypes.h:73
double MFloat
Definition: maiatypes.h:52
int64_t MLong
Definition: maiatypes.h:64
bool MBool
Definition: maiatypes.h:58
char MChar
Definition: maiatypes.h:56
int MPI_Recv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Status *status, const MString &name, const MString &varname)
same as MPI_Recv
int MPI_Isend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request, const MString &name, const MString &varname)
same as MPI_Isend
int MPI_Allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPI_Comm comm, const MString &name, const MString &sndvarname, const MString &rcvvarname)
same as MPI_Allgatherv
int MPI_Irecv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Request *request, const MString &name, const MString &varname)
same as MPI_Irecv
int MPI_Issend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request, const MString &name, const MString &varname)
same as MPI_Issend
int MPI_Wait(MPI_Request *request, MPI_Status *status, const MString &name)
same as MPI_Wait
int MPI_Exscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, const MString &name, const MString &sndvarname, const MString &rcvvarname)
same as MPI_Exscan
int MPI_Waitall(int count, MPI_Request *request, MPI_Status *status, const MString &name)
same as MPI_Waitall
int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, const MString &name, const MString &sndvarname, const MString &rcvvarname)
same as MPI_Allreduce
int MPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, const MString &name, const MString &sndvarname, const MString &rcvvarname)
same as MPI_Alltoall
int MPI_Iprobe(int source, int tag, MPI_Comm comm, int *flag, MPI_Status *status, const MString &name)
Iprobe MPI to get status without actually receiving the message.
int MPI_Bcast(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm, const MString &name, const MString &varname)
same as MPI_Bcast
int MPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, const MString &name, const MString &sndvarname, const MString &rcvvarname)
same as MPI_Allgather
int MPI_Send(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, const MString &name, const MString &varname)
same as MPI_Send
void partitionParallelSplit(const WeightType *const localWeights, const IdType noLocalWeights, const IdType localWeightOffset, const IdType noDomains, const IdType domainId, const MPI_Comm mpiComm, IdType *const offsets)
Definition: partition.h:325
WeightType optimalPartitioningSerial(const WeightType *const weights, const IdType noWeights, const IdType noPartitions, IdType *const offsets)
Serial algorithm to find the optimal (not ideal) partitioning with given workloads based on a probing...
Definition: partition.h:61
void heuristicPartitioningParallel(const WeightType *const localWeights, const IdType noLocalWeights, const MLong localOffset, const MLong noGlobalWeights, const WeightType totalWorkload, const IdType noGlobalPartitions, const IdType globalPartitionId, const IdType noGroups, const MPI_Comm comm, MPI_Comm &commGroup, MPI_Comm &commMasters, IdType &groupId, IdType &groupLocalId, MLong *const groupOffsets)
Fully parallel partitioning into <noGroups> partitions based on a heuristical approach.
Definition: partition.h:225
void exchangeData(const MInt noNghbrDomains, const MInt *const nghbrDomains, const MInt *const noHaloCells, const MInt **const, const MInt *const noWindowCells, const MInt **const windowCells, const MPI_Comm comm, const U *const data, U *const haloBuffer, const MInt noDat=1)
Generic exchange of data.
Definition: mpiexchange.h:295
void reverseExchangeData(const MInt noNghbrDomains, const MInt *const nghbrDomains, const MInt *const noHaloCells, const MInt **const haloCells, const MInt *const noWindowCells, const MInt **const, const MPI_Comm comm, const U *const data, U *const windowBuffer, const MInt noDat=1)
Generic reverse exchange of data.
Definition: mpiexchange.h:400
Namespace for auxiliary functions/classes.
PARALLELIO_DEFAULT_BACKEND ParallelIo
Definition: parallelio.h:292
MFloat dist(const Point< DIM > &p, const Point< DIM > &q)
Definition: pointbox.h:54
void logDuration_(const MFloat timeStart, const MString module, const MString comment, const MPI_Comm comm, const MInt domainId, const MInt noDomains)
Output the min/max/average duration of a code section over the ranks in a communicator Note: only use...
Definition: timer.cpp:171