7#ifndef CARTESIANGRIDIO_H
8#define CARTESIANGRIDIO_H
27template <
typename Gr
id>
32 using Tree =
typename Grid::Tree;
33 using Cell =
typename Tree::Cell;
85 grid_.loadDonorGridPartition(partitionCellsId, noPartitionCells);
88 return grid_.generateHilbertIndex(cellId, minLevel);
99 return grid_.a_hasProperty(cellId, p);
109 return grid_.a_hasProperty(cellId, p);
114 return grid_.a_noOffsprings(cellId);
118 return grid_.a_workload(cellId);
167 template <
class CELLTYPE>
169 const std::vector<std::vector<MInt>>& cpu_haloCells,
170 const std::vector<std::vector<MInt>>& cpu_windowCells,
171 const std::vector<std::vector<MInt>>& cpu_azimuthalHaloCells,
172 const std::vector<MInt>& cpu_azimuthalUnmappedHaloCells,
173 const std::vector<std::vector<MInt>>& cpu_azimuthalWindowCells,
MInt*
const recalcIdTree) {
174 IO(g).
saveGrid(fileName, cpu_cells, cpu_haloCells, cpu_windowCells, cpu_azimuthalHaloCells,
175 cpu_azimuthalUnmappedHaloCells, cpu_azimuthalWindowCells, recalcIdTree);
177 template <
class CELLTYPE>
179 const std::vector<std::vector<MInt>>& cpu_haloCells,
180 const std::vector<std::vector<MInt>>& cpu_windowCells,
181 const std::vector<std::vector<MInt>>& partitionLevelAncestorWindowCells,
182 const std::vector<std::vector<MInt>>& partitionLevelAncestorHaloCells) {
184 partitionLevelAncestorWindowCells, partitionLevelAncestorHaloCells);
187 const MFloat*
const partitionCellsWorkload,
const MLong*
const partitionCellsGlobalId,
188 const MFloat totalWorkload,
MLong*
const partitionCellOffsets,
189 MLong*
const globalIdOffsets,
const MBool computeOnlyPartition =
false) {
191 partitionCellOffsets, globalIdOffsets, computeOnlyPartition);
195 const MInt*
const localMinLevelCells,
196 const MInt*
const localMinLevelId,
197 const MLong*
const minLevelCellsTreeId,
198 const MUchar*
const cellInfo) {
200 minLevelCellsTreeId, cellInfo);
228 const MFloat*
const partitionCellsWorkload,
const MLong*
const partitionCellsGlobalId,
229 const MFloat totalWorkload,
MLong*
const partitionCellOffsets,
230 MLong*
const globalIdOffsets,
const MBool computeOnlyPartition =
false) {
235 const MFloat safetyFac = 1.5;
236 const MInt maxLocalCells =
238 const MInt maxNoPartitionCellsPerGroup =
242 const MInt noGroups =
244 intermediateNoGroups_));
254 MPI_Comm mpiCommGroup =
mpiComm();
255 MPI_Comm mpiCommMasters = MPI_COMM_SELF;
261 domainId(), noGroups,
mpiComm(), mpiCommGroup, mpiCommMasters, groupId, groupLocalId, &groupOffsets[0]);
263 MLong groupPartitionOffset0 = groupOffsets[groupId];
264 MLong groupPartitionOffset1 = groupOffsets[groupId + 1];
265 MBool isGroupMaster = (groupLocalId == 0);
266 MInt noGroupRanks = 0;
267 MPI_Comm_size(mpiCommGroup, &noGroupRanks);
269 MInt groupPartitionCellsCnt = isGroupMaster ? groupPartitionOffset1 - groupPartitionOffset0 : 0;
270 ASSERT(groupPartitionCellsCnt >= 0,
"");
271 MFloatScratchSpace partitionCellsWorkloadLocal(
mMax(1, groupPartitionCellsCnt), AT_,
"partitionCellsWorkloadLocal");
272 MLongScratchSpace partitionCellsIdLocal(
mMax(1, groupPartitionCellsCnt), AT_,
"partitionCellsIdLocal");
277 &partitionCellsIdLocal[0], &partitionCellsWorkloadLocal[0],
mpiComm(),
domainId());
279 MLongScratchSpace partitionCellOffsetsLocal(noGroupRanks + 1, AT_,
"partitionCellOffsetsLocal");
280 MLongScratchSpace globalIdOffsetsLocal(noGroupRanks + 1, AT_,
"globalIdOffsetsLocal");
284 &partitionCellsWorkloadLocal[0],
static_cast<MLong>(groupPartitionCellsCnt),
static_cast<MLong>(noGroupRanks),
285 &partitionCellOffsetsLocal[0]);
286 MFloat maxDeviation = maxWorkloadGroup / idealWorkload;
287 MPI_Allreduce(MPI_IN_PLACE, &maxDeviation, 1, MPI_DOUBLE, MPI_MAX, mpiCommMasters, AT_,
"MPI_IN_PLACE",
290 std::cerr <<
"global workload imbalance is " << (maxDeviation - 1.0) * 100.0 <<
"% ";
291 m_log <<
"global workload imbalance is " << (maxDeviation - 1.0) * 100.0 <<
"% (using " << noGroups
292 <<
" coarse partition groups, each with at least " << noRanksPerGroup <<
" ranks);"
293 <<
" ideal workload is " << idealWorkload <<
" per rank." << std::endl;
298 for(
MLong i = 0; i < noGroupRanks; i++) {
299 globalIdOffsetsLocal[i] = partitionCellsIdLocal[partitionCellOffsetsLocal[i]];
300 partitionCellOffsetsLocal[i] += groupPartitionOffset0;
302 for(
MLong i = 0; i < noGroups; i++) {
303 recvDispl[i] = i * noRanksPerGroup;
304 recvCnt[i] = (i == noGroups - 1) ?
noDomains() - i * noRanksPerGroup : noRanksPerGroup;
307 MPI_Allgatherv(&globalIdOffsetsLocal[0], noGroupRanks, MPI_LONG, &globalIdOffsets[0], &recvCnt[0], &recvDispl[0],
308 MPI_LONG, mpiCommMasters, AT_,
"globalIdOffsetsLocal[0]",
"globalIdOffsets[0]");
309 MPI_Allgatherv(&partitionCellOffsetsLocal[0], noGroupRanks, MPI_LONG, &partitionCellOffsets[0], &recvCnt[0],
310 &recvDispl[0], MPI_LONG, mpiCommMasters, AT_,
"partitionCellOffsetsLocal[0]",
311 "partitionCellOffsets[0]");
314 for(
MInt i = 0; i < noGroupRanks; i++) {
315 partitionCellOffsetsLocal[i] -= groupPartitionOffset0;
321 partitionCellOffsets[0] = 0;
322 MPI_Bcast(&globalIdOffsets[0],
noDomains() + 1, MPI_LONG, 0, mpiCommGroup, AT_,
"globalIdOffsets[0]");
323 MPI_Bcast(&partitionCellOffsets[0],
noDomains() + 1, MPI_LONG, 0, mpiCommGroup, AT_,
"partitionCellOffsets[0]");
327 if(computeOnlyPartition) {
341 for(
MInt i = 1; i < noGroupRanks; i++) {
342 MLong sendOffset = partitionCellOffsetsLocal[i];
343 MLong sendCount = partitionCellOffsetsLocal[i + 1] - partitionCellOffsetsLocal[i];
344 MPI_Send(&partitionCellsIdLocal[sendOffset], sendCount, MPI_LONG, i, 66, mpiCommGroup, AT_,
345 "partitionCellsIdLocal[sendOffset]");
347 MInt sendCount = partitionCellOffsetsLocal[1] - partitionCellOffsetsLocal[0];
351 AT_,
"m_localPartitionCellGlobalIds[0]");
368 mTerm(1, AT_,
"Invalid domain offsets A.");
371 cout <<
"m_domainOffsets[i+1]: " <<
m_domainOffsets[i + 1] << std::endl;
372 mTerm(1, AT_,
"Invalid domain offsets B.");
390 auto logDuration = [
this](
const MFloat timeStart,
const MString comment) {
396 auto logGridInfo = [](
const MInt minLevel,
const MFloat lengthLevel0,
const MFloat*
const boundingBox,
400 <<
" * " <<
message <<
":" << endl
401 <<
" - minLevel = " << minLevel << endl
402 <<
" - boundingBox = ";
404 msg << setprecision(15) <<
"[" << boundingBox[i] <<
", " << boundingBox[
nDim + i] <<
"]";
409 msg << endl <<
" - centerOfGravity = [";
411 msg << setprecision(15) << centerOfGravity[i];
416 msg <<
"]" << endl <<
" - lengthLevel0 = " << setprecision(15) << lengthLevel0;
417 m_log << msg.str() << std::endl;
423 if(
domainId() == 0) std::cerr <<
"Loading grid file " << fileName << std::endl;
424 m_log <<
"Loading grid file..." << std::endl;
427 logDuration(openGridTimeStart,
"Open grid file");
432 MFloat totalWorkload = F0;
434 grid.getAttribute(&gridDim,
"nDim");
440 if(grid.hasAttribute(
"maxUniformRefinementLevel"))
442 grid.getAttribute(&totalWorkload,
"totalWorkload");
450 if(grid.hasAttribute(
"boundingBox")) grid.getAttribute(&
m_boundingBox[0],
"boundingBox", 2 *
nDim);
451 if(grid.hasAttribute(
"reductionFactor")) grid.getAttribute(&
m_reductionFactor,
"reductionFactor", 1);
452 if(grid.hasAttribute(
"decisiveDirection")) grid.getAttribute(&
m_decisiveDirection,
"decisiveDirection", 1);
453 if(
nDim != gridDim)
mTerm(1, AT_,
"Number of dimensions mismatch with grid file.");
455 if(
grid_.paraViewPlugin()) {
459 TERMM(1,
"Error: specified default property maxRfnmntLvl < maxLevel! (" + std::to_string(
m_maxRfnmntLvl) +
" < "
463 if(m_maxUniformRefinementLevel < m_minLevel || m_maxUniformRefinementLevel >
m_maxLevel) {
464 mTerm(1, AT_,
"Warning: maxUniformRefinementLevel invalid.");
469 grid_.m_hasMultiSolverBoundingBox = grid.hasAttribute(
"multiSolverBoundingBox");
471 if(
grid_.m_hasMultiSolverBoundingBox) {
472 std::vector<MFloat> multiSolverBoundingBox(2 *
nDim);
473 grid.getAttribute(&multiSolverBoundingBox[0],
"multiSolverBoundingBox", 2 *
nDim);
474 std::copy_n(&multiSolverBoundingBox[0], 2 *
nDim, &(
grid_.m_targetGridBoundingBox[0]));
476 std::vector<MFloat> multiSolverCoG(
nDim);
477 grid.getAttribute(&multiSolverCoG[0],
"multiSolverCenterOfGravity",
nDim);
479 MInt multiSolverMinLevel = -1;
480 grid.getAttribute(&multiSolverMinLevel,
"multiSolverMinLevel");
481 TERMM_IF_COND(multiSolverMinLevel < 0,
482 "ERROR: invalid multiSolverMinLevel " + std::to_string(multiSolverMinLevel));
484 MFloat multiSolverLength0 = -1.0;
485 grid.getAttribute(&multiSolverLength0,
"multiSolverLengthLevel0");
488 MFloat lengthLevel0 = 0.0;
490 grid_.m_targetGridCenterOfGravity[i] = 0.5 * (multiSolverBoundingBox[
nDim + i] + multiSolverBoundingBox[i]);
491 TERMM_IF_NOT_COND(
approx(
grid_.m_targetGridCenterOfGravity[i], multiSolverCoG[i], MFloatEps),
492 "center of gravity mismatch");
496 (F1 + F1 /
FPOW2(30)) * std::fabs(multiSolverBoundingBox[
nDim + i] - multiSolverBoundingBox[i]);
499 lengthLevel0 = std::max(lengthLevel0,
dist);
502 TERMM_IF_NOT_COND(
approx(lengthLevel0, multiSolverLength0, MFloatEps),
503 "length level 0 mismatch: " + to_string(lengthLevel0) +
" != " + to_string(multiSolverLength0));
505 grid_.m_targetGridLengthLevel0 = lengthLevel0;
506 grid_.m_targetGridMinLevel = multiSolverMinLevel;
511 &
grid_.m_targetGridCenterOfGravity[0],
grid_.m_targetGridLengthLevel0,
512 grid_.m_targetGridMinLevel);
514 logGridInfo(
grid_.m_targetGridMinLevel,
grid_.m_targetGridLengthLevel0, &multiSolverBoundingBox[0],
515 &(
grid_.m_targetGridCenterOfGravity[0]),
"multisolver grid information from grid file");
522 if(grid.hasAttribute(
"boundingBox")) {
532 grid.getAttribute(&
noSolvers,
"noSolvers");
535 ASSERT(
grid_.m_addSolverToGrid,
"");
539 if(grid.hasAttribute(
"bitOffset")) {
548 logDuration(attTimeStart,
"Read attributes etc");
553 if(
domainId() == 0) std::cerr <<
" * partition grid on " <<
noDomains() <<
" domains... ";
575 grid.setOffset(noDataToRead, 0);
576 grid.readArray(partitionCellsGlobalId.
data(),
"partitionCellsGlobalId");
583 }
else if(
grid_.m_loadPartition) {
585 MString gridPartitionFileName =
"";
586 gridPartitionFileName = Context::getBasicProperty<MString>(
"partitionFileName", AT_);
589 grid_.loadPartitionFile(gridPartitionFileName, &partitionCellOffsets[0]);
608 "globalIdOffsetsLocal",
"m_domainOffsets");
612 }
else if(
grid_.m_partitionParallelSplit) {
615 MLong offsetPartitionCells =
domainId() * noLocalPartitionCells;
617 if(
domainId() < missingPartitionCells) {
618 noLocalPartitionCells++;
621 offsetPartitionCells += missingPartitionCells;
625 MFloatScratchSpace partitionCellsWorkLoad(noLocalPartitionCells, AT_,
"partitionCellsWorkload");
628 grid.setOffset(noLocalPartitionCells, offsetPartitionCells);
629 grid.readArray(&partitionCellsWorkLoad[0],
"partitionCellsWorkload");
635 &partitionCellOffsets[0]);
653 "globalIdOffsetsLocal",
"m_domainOffsets");
658 const MInt tmpCount =
664 MLongScratchSpace partitionCellsGlobalId(tmpCount, AT_,
"partitionCellsGlobalId");
667 grid.setOffset(tmpCount, tmpOffset);
668 grid.readArray(partitionCellsWorkload.
data(),
"partitionCellsWorkload");
669 grid.readArray(partitionCellsGlobalId.
data(),
"partitionCellsGlobalId");
671 totalWorkload, &partitionCellOffsets[0], &globalIdOffsets[0]);
674 if(
domainId() == 0) std::cerr << std::endl;
675 logDuration(partitionTimeStart,
"Partition");
685 grid.readArray(cellInfo.data(),
"cellInfo");
686 logDuration(readCellInfoTimeStart,
"Read cell info");
693 "Wrong number of partition level ancestors: " + std::to_string(noPartitionLevelAncestorsGlobal)
695 logDuration(correctPlsTimeStart,
"Correct offsets PLS");
711 std::fill(solver.
begin(), solver.
end(), 1);
718 grid.readArray(&solver[0],
"solver");
722 m_tree.solverFromBits(cellId, solver[cellId]);
726 if(
grid_.m_addSolverToGrid) {
731 if(
m_tree.solver(cellId,
grid_.m_referenceSolver)) {
732 m_tree.solver(cellId, newSolverId) =
true;
737 logDuration(readSolverInfoTimeStart,
"Read solver info");
741 std::cerr <<
" * setup grid connectivity on level " <<
m_minLevel << std::endl;
743 std::cerr <<
" * setup grid connectivity from level " <<
m_minLevel <<
" to " <<
m_maxLevel << std::endl;
746 m_log <<
" * setup grid connectivity on level " <<
m_minLevel << std::endl;
769 MInt noMinLevelCells = 0;
770 std::vector<MInt> localMinLevelCells;
772 localMinLevelId.
fill(-1);
774 MUint tmpBit =
static_cast<MUint>(cellInfo[i]);
775 MUint isMinLevel = (tmpBit >> 7) & 1;
777 localMinLevelId(i) = noMinLevelCells;
780 localMinLevelCells.push_back(i);
791 MInt minLevelCellOffset = 0;
792 MPI_Exscan(&noMinLevelCells, &minLevelCellOffset, 1, MPI_INT, MPI_SUM,
mpiComm(), AT_,
"noMinLevelCells",
793 "minLevelCellOffset");
795 grid.setOffset(noMinLevelCells, minLevelCellOffset);
796 grid.readArray(minLevelCellsTreeId.
data(),
"minLevelCellsTreeId");
798 grid.readArray(minLevelCellsNghbrIds.
data(),
"minLevelCellsNghbrIds");
800 for(
MInt i = 0; i < noMinLevelCells; i++) {
801 MInt cellId = localMinLevelCells[i];
804 : minLevelCellsNghbrIds(i, j);
808 logDuration(readMinCellInfoTimeStart,
"Read min cell info");
812 for(
MInt i = 0; i < noMinLevelCells; i++) {
813 maia::grid::hilbert::treeIdToCoordinates<nDim>(&
a_coordinate(localMinLevelCells[i], 0), minLevelCellsTreeId[i],
814 static_cast<MLong>(
grid_.m_targetGridMinLevel),
815 &(
grid_.m_targetGridCenterOfGravity[0]),
816 grid_.m_targetGridLengthLevel0);
834 &minLevelCellsTreeId[0], &cellInfo[0]);
837 ASSERT(
a_hasProperty(i, Cell::IsPartLvlAncestor),
"Error: Cell not marked as partition level ancestor.");
841 logDuration(exchangePlaTimeStart,
"Exchange partition level ancestors");
853 TERMM(1,
"Wrong level for partition cell: level " + std::to_string(
a_level(cellId)) +
", partitionCellGlobalId "
859 logDuration(setupSubtreesTimeStart,
"Setup local grid subtrees");
867 logDuration(neighborsTimeStart,
"Propagate neighbors");
874 grid_.createGlobalToLocalIdMapping();
875 grid_.changeGlobalToLocalIds();
885 MBool boundingBoxDiff =
false;
886 for(
MInt i = 0; i < 2 *
nDim; i++) {
888 boundingBoxDiff =
true;
899 MBool centerOfGravityDiff =
false;
900 for(
MInt dir = 0; dir <
nDim; dir++) {
904 centerOfGravityDiff =
true;
910 if(boundingBoxDiff || centerOfGravityDiff) {
912 <<
"WARNING: the grid information from the grid file have been updated/corrected, "
913 "which should only be necessary in case of a converted old grid file with bad "
914 "bounding box/center of gravity information (see m_log for the changes)."
918 "updated/corrected grid information with geometry bounding box");
927 if(
domainId() == 0) std::cerr <<
"done." << std::endl;
928 m_log <<
"done." << std::endl;
929 logDuration(gridTimeStart,
"Load grid total");
943 constexpr MInt noInternalConnections = (
nDim == 2) ? 4 : 12;
944 constexpr MInt connectionDirs[12] = {1, 1, 3, 3, 1, 1, 3, 3, 5, 5, 5, 5};
945 constexpr MInt childs0[12] = {0, 2, 0, 1, 4, 6, 4, 5, 0, 1, 2, 3};
946 constexpr MInt childs1[12] = {1, 3, 2, 3, 5, 7, 6, 7, 4, 5, 6, 7};
947 constexpr MInt dirStencil[3][8] = {{0, 1, 0, 1, 0, 1, 0, 1}, {2, 2, 3, 3, 2, 2, 3, 3}, {4, 4, 4, 4, 5, 5, 5, 5}};
948 constexpr MInt sideIds[3][8] = {{0, 1, 0, 1, 0, 1, 0, 1}, {0, 0, 1, 1, 0, 0, 1, 1}, {0, 0, 0, 0, 1, 1, 1, 1}};
949 constexpr MInt otherSide[2] = {1, 0};
950 constexpr MInt revDir[6] = {1, 0, 3, 2, 5, 4};
952 std::map<MLong, MInt> exchangeCells;
954 exchangeCells.clear();
955 MInt noExchangeCells = 0;
962 if(exchangeCells.count(nghbrId) == 0) {
971 mTerm(1, AT_,
"Neighbor domain not found " + std::to_string(cpu) +
" " + std::to_string(nghbrId));
973 exchangeCells[nghbrId] = cpu;
981 MInt noCellsToQuery = exchangeCells.size();
984 for(
auto it = exchangeCells.begin(); it != exchangeCells.end(); it++) {
985 queryGlobalIds[cnt] = it->first;
997 if(childId < 0)
continue;
998 ASSERT(
a_level(childId) == level + 1,
999 "wrong child level cell" + std::to_string(i) +
" child" + std::to_string(childId));
1003 nodeId[j] = sideIds[j][child] *
IPOW2(j);
1004 revNodeId[j] = otherSide[sideIds[j][child]] *
IPOW2(j);
1007 const MInt dir = dirStencil[d][child];
1011 const MInt childNode = child - nodeId[d] + revNodeId[d];
1014 MLong childNghbrId = -1;
1016 childNghbrId =
a_childId(nghbrId, childNode);
1018 if(exchangeCells.count(nghbr0) == 0) {
1019 mTerm(1, AT_,
"Exchange cell not found.");
1021 MInt idx = exchangeCells[nghbr0];
1022 if(queryGlobalIds[idx] != nghbr0)
mTerm(1, AT_,
"Cell inconsistency.");
1023 childNghbrId = recvChildIds(idx, childNode);
1025 if(childNghbrId < 0) {
1030 if(childNghbrLocalId > -1) {
1033 mTerm(1, AT_,
"Nghbr inconsistency.");
1039 for(
MInt c = 0; c < noInternalConnections; c++) {
1040 MInt dir = connectionDirs[c];
1052 mTerm(1, AT_,
"Nghbr inconsistency.");
1076 template <
class ITERATOR,
typename U>
1078 return grid_.findIndex(first, last, val);
1090 const MFloat*
const fltData,
const MInt noGroups,
const MLong*
const groupOffsets,
1091 const MInt noRanksPerGroup,
const MLong noGlobalData,
const MInt noRecvData,
1092 MLong*
const recvInt,
MFloat*
const recvFlt, MPI_Comm comm,
MInt rank) {
1099 requests.
fill(MPI_REQUEST_NULL);
1102 while(locCnt < noLocalData) {
1105 while(!(localDataOffset + locCnt >= groupOffsets[group] && localDataOffset + locCnt < groupOffsets[group + 1]))
1107 ASSERT(group < noGroups,
"");
1108 while(locCnt + sendCnt < noLocalData && localDataOffset + locCnt + sendCnt >= groupOffsets[group]
1109 && localDataOffset + locCnt + sendCnt < groupOffsets[group + 1])
1111 MInt locOffset = localDataOffset + locCnt - groupOffsets[group];
1112 MInt ndom = group * noRanksPerGroup;
1114 ASSERT(locOffset + sendCnt <= noRecvData,
"");
1115 std::copy(&fltData[locCnt], &fltData[locCnt] + sendCnt, &recvFlt[locOffset]);
1116 std::copy(&intData[locCnt], &intData[locCnt] + sendCnt, &recvInt[locOffset]);
1117 recvCount += sendCnt;
1119 ASSERT(groupOffsets[group] + locOffset + sendCnt <= noGlobalData,
"");
1120 sendBuf(group, 0) = sendCnt;
1121 sendBuf(group, 1) = locOffset;
1122 MPI_Isend(&sendBuf(group, 0), 2, MPI_LONG, ndom, 0, comm, &requests[noReqs++], AT_,
1124#if defined(HOST_Klogin)
1125 MPI_Isend(
const_cast<MLong*
>(&intData[locCnt]), sendCnt, MPI_LONG, ndom, 1, comm, &requests[noReqs++], AT_,
1126 "const_cast<MLong*>(&intData[locCnt])");
1127 MPI_Isend(
const_cast<MFloat*
>(&fltData[locCnt]), sendCnt, MPI_DOUBLE, ndom, 2, comm, &requests[noReqs++], AT_,
1128 "const_cast<MFloat*>(&fltData[locCnt])");
1130 MPI_Isend(&intData[locCnt], sendCnt, MPI_LONG, ndom, 1, comm, &requests[noReqs++], AT_,
1132 MPI_Isend(&fltData[locCnt], sendCnt, MPI_DOUBLE, ndom, 2, comm, &requests[noReqs++], AT_,
1140 const MFloat time0 = MPI_Wtime();
1141 MFloat time1 = MPI_Wtime();
1142 while(recvCount < noRecvData) {
1145 MPI_Iprobe(MPI_ANY_SOURCE, 0, comm, &flag, &status);
1147 MInt source = status.MPI_SOURCE;
1149 MPI_Recv(recvBuf, 2, MPI_LONG, source, 0, comm, &status, AT_,
1151 MInt recvSize = recvBuf[0];
1152 MLong locOffset = recvBuf[1];
1153 ASSERT(locOffset + recvSize <= noRecvData,
"");
1154 MPI_Irecv(&recvInt[locOffset], recvSize, MPI_LONG, source, 1, comm, &requests[noReqs++], AT_,
1155 "recvInt[locOffset]");
1156 MPI_Irecv(&recvFlt[locOffset], recvSize, MPI_DOUBLE, source, 2, comm, &requests[noReqs++], AT_,
1157 "recvFlt[locOffset]");
1158 recvCount += recvSize;
1160 if((
MInt)((MPI_Wtime() - time0) / 10) > (
MInt)((time1 - time0) / 10)) {
1161 std::cerr <<
"Rank " << rank <<
" already waiting " << MPI_Wtime() - time0
1162 <<
" seconds for incoming partition data..." << std::endl;
1164 time1 = MPI_Wtime();
1167 if(noReqs)
MPI_Waitall(noReqs, &requests[0], MPI_STATUSES_IGNORE, AT_);
1181 isPartitionLevelAncestor.
fill(1);
1182 MInt noPartitionLevelAncestorsGlobal = 0;
1190 noPartitionLevelAncestorsLocal -=
1192 &isPartitionLevelAncestor[0]);
1197 MPI_Allreduce(&noPartitionLevelAncestorsLocal, &noPartitionLevelAncestorsGlobal, 1, MPI_INT, MPI_SUM,
mpiComm(),
1198 AT_,
"noPartitionLevelAncestorsLocal",
"noPartitionLevelAncestorsGlobal");
1200 if(noPartitionLevelAncestorsGlobal > 0) {
1201 MInt domainShift = 0;
1206 while(isPartitionLevelAncestor[lastId]) {
1212 TERMM(1,
"The last domain cannot have a domain shift since there is no following domain.");
1218 "domainOffsetsDelta.data()");
1220 MPI_Request req = MPI_REQUEST_NULL;
1248 MPI_Wait(&req, MPI_STATUS_IGNORE, AT_);
1251 return noPartitionLevelAncestorsGlobal;
1263 const MInt*
const localMinLevelId,
const MLong*
const minLevelCellsTreeId,
1264 const MUchar*
const cellInfo) {
1267 using namespace std;
1282 isPartitionCell.
fill(0);
1285 isPartitionCell(cellId) = 1;
1289 MLong firstMinLevelCell = std::numeric_limits<MLong>::max();
1290 for(
MInt i = 0; i < noLocalMinLevelCells; i++) {
1291 firstMinLevelCell =
mMin(firstMinLevelCell,
a_globalId(localMinLevelCells[i]));
1297 "firstMinLevelCell",
"minLevelCellGlobalIdOffsets.data()");
1302 minLevelCellGlobalIdOffsets[cpu] = minLevelCellGlobalIdOffsets[cpu + 1];
1306 std::vector<std::tuple<MLong, MInt, MInt>> partitionLevelAncestorsOnMinLevel;
1307 std::vector<std::tuple<MLong, MInt, MInt>> partitionLevelAncestorsOther;
1309 isPartitionLevelAncestor.
fill(1);
1311 MInt noPartitionLevelAncestorsGlobal = 0;
1314 "Error: tree should only contain internal cells at this moment.");
1319 noPartitionLevelAncestorsLocal -=
1321 &isPartitionLevelAncestor[0]);
1326 MPI_Allreduce(&noPartitionLevelAncestorsLocal, &noPartitionLevelAncestorsGlobal, 1, MPI_INT, MPI_SUM,
mpiComm(),
1327 AT_,
"noPartitionLevelAncestorsLocal",
"noPartitionLevelAncestorsGlobal");
1331 a_hasProperty(i, Cell::IsPartLvlAncestor) = isPartitionLevelAncestor(i);
1336 if(localMinLevelId[cellId] < 0) {
1337 isPartitionLevelAncestor[cellId] = 1;
1343 if(isPartitionLevelAncestor(cellId)) {
1344 if(localMinLevelId[cellId] > -1) {
1346 partitionLevelAncestorsOnMinLevel.push_back(std::make_tuple(globalId, cellId,
domainId()));
1352 if(globalId >= minLevelCellGlobalIdOffsets[cpu] && globalId < minLevelCellGlobalIdOffsets[cpu + 1]) {
1357 TERMM_IF_COND(ndom < 0 || ndom >=
noDomains(),
"domain for globalId not found " + std::to_string(ndom));
1358 partitionLevelAncestorsOther.push_back(std::make_tuple(globalId, cellId, ndom));
1368 for(
MUint i = 0; i < partitionLevelAncestorsOther.size(); i++) {
1369 noQuery[get<2>(partitionLevelAncestorsOther[i])]++;
1371 queryOffsets(0) = 0;
1373 queryOffsets(c + 1) = queryOffsets(c) + noQuery[c];
1382 for(
MUint i = 0; i < partitionLevelAncestorsOther.size(); i++) {
1383 MLong globalId = get<0>(partitionLevelAncestorsOther[i]);
1384 MInt cellId = get<1>(partitionLevelAncestorsOther[i]);
1385 MInt cpu = get<2>(partitionLevelAncestorsOther[i]);
1386 queryGlobalId(queryOffsets(cpu) + noQuery(cpu)) = globalId;
1387 queryCellInfo(queryOffsets(cpu) + noQuery(cpu)) = cellInfo[cellId];
1388 queryIsPartitionCell(queryOffsets(cpu) + noQuery(cpu)) = isPartitionCell[cellId];
1397 MPI_Alltoall(&noQuery[0], 1, MPI_INT, &noCollect[0], 1, MPI_INT,
mpiComm(), AT_,
"noQuery[0]",
"noCollect[0]");
1399 collectOffsets(0) = 0;
1401 collectOffsets(c + 1) = collectOffsets(c) + noCollect[c];
1409 queryReq.
fill(MPI_REQUEST_NULL);
1414 if(noQuery[c] == 0)
continue;
1415 MPI_Issend(&queryGlobalId[queryOffsets[c]], noQuery[c], MPI_LONG, c, 456,
mpiComm(), &queryReq[queryCnt++], AT_,
1416 "queryGlobalId[queryOffsets[c]]");
1417 MPI_Issend(&queryCellInfo[queryOffsets[c]], noQuery[c], MPI_UNSIGNED_CHAR, c, 457,
mpiComm(),
1418 &queryReq[queryCnt++], AT_,
"queryCellInfo[queryOffsets[c]]");
1419 MPI_Issend(&queryIsPartitionCell[queryOffsets[c]], noQuery[c], MPI_INT, c, 458,
mpiComm(), &queryReq[queryCnt++],
1420 AT_,
"queryIsPartitionCell[queryOffsets[c]]");
1424 MInt collectCnt = 0;
1426 if(noCollect[c] == 0)
continue;
1427 MPI_Recv(&collectGlobalId[collectOffsets[c]], noCollect[c], MPI_LONG, c, 456,
mpiComm(), MPI_STATUS_IGNORE, AT_,
1428 "collectGlobalId[collectOffsets[c]]");
1429 MPI_Recv(&collectCellInfo[collectOffsets[c]], noCollect[c], MPI_UNSIGNED_CHAR, c, 457,
mpiComm(),
1430 MPI_STATUS_IGNORE, AT_,
"collectCellInfo[collectOffsets[c]]");
1431 MPI_Recv(&collectIsPartitionCell[collectOffsets[c]], noCollect[c], MPI_INT, c, 458,
mpiComm(), MPI_STATUS_IGNORE,
1432 AT_,
"collectIsPartitionCell[collectOffsets[c]]");
1436 if(queryCnt > 0)
MPI_Waitall(queryCnt, &queryReq[0], MPI_STATUSES_IGNORE, AT_);
1439 std::vector<std::tuple<MLong, MUchar, MInt>> collectData;
1440 for(
MUint i = 0; i < partitionLevelAncestorsOnMinLevel.size(); i++) {
1441 MLong globalId = get<0>(partitionLevelAncestorsOnMinLevel[i]);
1442 MInt localId = get<1>(partitionLevelAncestorsOnMinLevel[i]);
1443 ASSERT(
domainId() == get<2>(partitionLevelAncestorsOnMinLevel[i]),
"");
1444 collectData.push_back(std::make_tuple(globalId, cellInfo[localId], isPartitionCell[localId]));
1449 for(
MInt d = 0; d < noCollect[c]; d++) {
1450 MInt id = collectOffsets[c] + d;
1451 collectData.push_back(std::make_tuple(collectGlobalId[
id], collectCellInfo[
id], collectIsPartitionCell[
id]));
1455 sort(collectData.begin(), collectData.end());
1459 MLongScratchSpace collectMinLevelTreeId(collectData.size(), AT_,
"collectMinLevelTreeId");
1462 MIntScratchSpace collectNoChildren(collectData.size(), AT_,
"collectNoChildren");
1463 MIntScratchSpace collectNoOffspring(collectData.size(), AT_,
"collectNoOffspring");
1466 std::vector<MInt> newCells;
1468 std::map<MLong, MInt> collectMap;
1469 collectMinLevelTreeId.
fill(-1);
1470 collectParentId.
fill(-1);
1471 collectLevel.
fill(-1);
1472 collectNoChildren.
fill(-1);
1473 collectNoOffspring.
fill(-1);
1474 collectChildIds.
fill(-1);
1475 collectNghbrIds.
fill(-1);
1478 for(
MUint i = 0; i < collectData.size(); i++) {
1479 collectMap[get<0>(collectData[i])] = i;
1483 for(
MUint i = 0; i < partitionLevelAncestorsOnMinLevel.size(); i++) {
1484 MLong globalId = get<0>(partitionLevelAncestorsOnMinLevel[i]);
1485 MInt localId = get<1>(partitionLevelAncestorsOnMinLevel[i]);
1486 MInt id = collectMap[globalId];
1488 collectParentId(
id) = -1;
1492 MInt minId = localMinLevelId[localId];
1493 ASSERT(minId > -1,
"");
1494 collectMinLevelTreeId(
id) = minLevelCellsTreeId[minId];
1495 collectNoOffspring(
id) = (minId == noLocalMinLevelCells - 1)
1496 ? minLevelCellGlobalIdOffsets[
domainId() + 1] - globalId
1497 :
a_globalId(localMinLevelCells[minId + 1]) - globalId;
1502 for(
MUint i = 0; i < partitionLevelAncestorsOnMinLevel.size(); i++) {
1503 MInt counter = collectMap[get<0>(partitionLevelAncestorsOnMinLevel[i])];
1505 &collectNoChildren[0], &collectChildIds[0], &collectNoOffspring[0]);
1509 for(
MUint i = 0; i < partitionLevelAncestorsOnMinLevel.size(); i++) {
1510 MLong globalId = get<0>(partitionLevelAncestorsOnMinLevel[i]);
1511 MInt localId = get<1>(partitionLevelAncestorsOnMinLevel[i]);
1512 MInt id = collectMap[globalId];
1515 a_childId(localId, j) = collectChildIds(
id, j);
1521 std::vector<MLong> returnData;
1523 if(collectCnt > 0) {
1524 returnOffsets(0) = 0;
1527 if(noCollect[c] > 0) {
1529 std::map<MLong, MInt> returnChain;
1531 for(
MInt d = 0; d < noCollect[c]; d++) {
1532 const MInt id0 = collectOffsets[c] + d;
1533 MLong globalId = collectGlobalId[id0];
1534 MInt id1 = collectMap[globalId];
1535 if(collectIsPartitionCell[id0]) {
1536 id1 = collectMap[collectParentId[id1]];
1537 globalId = get<0>(collectData[id1]);
1539 while(globalId > -1) {
1540 if(returnChain.count(globalId) == 0) {
1541 returnChain[globalId] = id1;
1542 if(collectParentId[id1] > -1) {
1543 id1 = collectMap[collectParentId[id1]];
1544 globalId = get<0>(collectData[id1]);
1554 for(
auto it = returnChain.begin(); it != returnChain.end(); it++) {
1555 MLong globalId = it->first;
1556 MInt id1 = it->second;
1572 localId = it1->second;
1574 ASSERT(
a_hasProperty(localId, Cell::IsPartLvlAncestor),
"");
1578 a_level(localId) = collectLevel(id1);
1581 a_childId(localId, i) = collectChildIds(id1, i);
1589 newCells.push_back(localId);
1590 newCellsMaxLevel =
mMax(newCellsMaxLevel,
a_level(localId));
1593 MInt offs = returnData.size();
1594 returnData.resize(offs + noData);
1595 returnData[offs++] = globalId;
1596 returnData[offs++] = collectMinLevelTreeId(id1);
1597 returnData[offs++] = collectLevel(id1);
1598 returnData[offs++] = collectParentId(id1);
1599 returnData[offs++] = collectNoOffspring(id1);
1601 returnData[offs++] = collectChildIds(id1, i);
1604 returnData[offs++] = collectNghbrIds(id1, i);
1609 returnOffsets(collectCnt + 1) = returnData.
size();
1618 returnReq.
fill(MPI_REQUEST_NULL);
1622 if(collectCnt > 0) {
1625 if(noCollect[c] == 0)
continue;
1627 noReturnDataBuffer[returnCnt] = returnOffsets(returnCnt + 1) - returnOffsets(returnCnt);
1628 MPI_Issend(&noReturnDataBuffer[returnCnt], 1, MPI_INT, c, 459,
mpiComm(), &returnReq[returnCnt], AT_,
1629 "&noReturnDataBuffer[returnCnt]");
1634 MIntScratchSpace noReceiveData(std::max(queryCnt, 1), AT_,
"noReceiveData");
1641 if(noQuery[c] == 0)
continue;
1642 MPI_Recv(&noReceiveData[queryCnt], 1, MPI_INT, c, 459,
mpiComm(), MPI_STATUS_IGNORE, AT_,
1643 "noReceiveData[queryCnt]");
1644 receiveOffs(queryCnt + 1) = receiveOffs(queryCnt) + noReceiveData[queryCnt];
1648 if(returnCnt > 0)
MPI_Waitall(returnCnt, &returnReq[0], MPI_STATUSES_IGNORE, AT_);
1650 MLongScratchSpace receiveData(std::max(receiveOffs(queryCnt), 1), AT_,
"receiveData");
1652 returnReq.
fill(MPI_REQUEST_NULL);
1655 if(collectCnt > 0) {
1658 if(noCollect[c] == 0)
continue;
1659 MInt noReturnData = returnOffsets(returnCnt + 1) - returnOffsets(returnCnt);
1660 MInt offs = returnOffsets(returnCnt);
1661 MPI_Issend(&returnData[offs], noReturnData, MPI_LONG, c, 460,
mpiComm(), &returnReq[returnCnt++], AT_,
1662 "returnData[offs]");
1668 if(noQuery[c] == 0)
continue;
1669 MPI_Recv(&receiveData(receiveOffs(queryCnt)), noReceiveData(queryCnt), MPI_LONG, c, 460,
mpiComm(),
1670 MPI_STATUS_IGNORE, AT_,
"receiveData(receiveOffs(queryCnt))");
1674 if(returnCnt > 0)
MPI_Waitall(returnCnt, &returnReq[0], MPI_STATUSES_IGNORE, AT_);
1679 if(noQuery[c] == 0)
continue;
1680 MInt offs = receiveOffs(queryCnt);
1682 while(offs < receiveOffs(queryCnt + 1)) {
1683 MLong globalId = receiveData(offs++);
1696 localId = it->second;
1698 ASSERT(
a_hasProperty(localId, Cell::IsPartLvlAncestor),
"");
1700 MLong treeId = receiveData(offs++);
1701 a_level(localId) = receiveData(offs++);
1705 newCells.push_back(localId);
1706 newCellsMaxLevel =
mMax(newCellsMaxLevel,
a_level(localId));
1709 a_childId(localId, i) = receiveData(offs++);
1721 maia::grid::hilbert::treeIdToCoordinates<nDim>(
1723 &
grid_.m_targetGridCenterOfGravity[0],
grid_.m_targetGridLengthLevel0);
1734 if(localMinLevelId[cellId] < 0) {
1740 static const MFloat childStencil[8][3] = {{-F1, -F1, -F1}, {F1, -F1, -F1}, {-F1, F1, -F1}, {F1, F1, -F1},
1741 {-F1, -F1, F1}, {F1, -F1, F1}, {-F1, F1, F1}, {F1, F1, F1}};
1744 for(
MUint i = 0; i < newCells.size(); i++) {
1745 if(
a_level(newCells[i]) == lvl) {
1747 if(
a_childId(newCells[i], child) > -1) {
1765 for(
MUint i = 0; i < newCells.size(); i++) {
1766 MInt cellId = newCells[i];
1779 for(
MInt d = firstDom; d <= lastDom; d++) {
1805 for(
MUint i = 0; i < newCells.size(); i++) {
1806 MInt cellId = newCells[i];
1813 for(
MInt d = firstDom; d <= lastDom; d++) {
1816 ASSERT(idx > -1,
"");
1822 ASSERT(idx > -1,
"");
1830 mTerm(1, AT_,
"Invalid sorting of neighbor domains.");
1836 mTerm(1, AT_,
"Invalid sorting of halo cells.");
1841 mTerm(1, AT_,
"Invalid sorting of window cells.");
1856 const MLong*
const dataSource,
const MInt dataWidth) {
1866 queryDomains.
fill(-1);
1867 originalId.
fill(-1);
1869 for(
MInt i = 0; i < noCellsToQuery; i++) {
1870 MLong cellId = queryGlobalIds[i];
1880 mTerm(1, AT_,
"Neighbor domain not found.");
1882 queryDomains[i] = cpu;
1888 recvOffsets(c + 1) = recvOffsets(c) + noRecv[c];
1893 for(
MInt i = 0; i < noCellsToQuery; i++) {
1894 MLong cellId = queryGlobalIds[i];
1895 MInt cpu = queryDomains[i];
1896 recvIds(recvOffsets(cpu) + noRecv(cpu)) = cellId;
1897 originalId(recvOffsets(cpu) + noRecv(cpu)) = i;
1900 MPI_Alltoall(&noRecv[0], 1, MPI_INT, &noSend[0], 1, MPI_INT,
mpiComm(), AT_,
"noRecv[0]",
"noSend[0]");
1903 sendOffsets(c + 1) = sendOffsets(c) + noSend[c];
1911 sendReq.
fill(MPI_REQUEST_NULL);
1914 if(noRecv[c] == 0)
continue;
1915 MPI_Issend(&recvIds[recvOffsets[c]], noRecv[c], MPI_LONG, c, 123,
mpiComm(), &sendReq[recvCnt], AT_,
1916 "recvIds[recvOffsets[c]]");
1920 if(noSend[c] == 0)
continue;
1921 MPI_Recv(&sendIds[sendOffsets[c]], noSend[c], MPI_LONG, c, 123,
mpiComm(), MPI_STATUS_IGNORE, AT_,
1922 "sendIds[sendOffsets[c]]");
1924 if(recvCnt > 0)
MPI_Waitall(recvCnt, &sendReq[0], MPI_STATUSES_IGNORE, AT_);
1929 mTerm(1, AT_,
"Invalid exchange.");
1932 MInt cellId = it->second;
1933 for(
MInt c = 0; c < dataWidth; c++) {
1934 sendData(k, c) = dataSource[cellId * dataWidth + c];
1938 sendReq.
fill(MPI_REQUEST_NULL);
1941 if(noSend[c] == 0)
continue;
1942 MPI_Issend(&sendData(sendOffsets[c], 0), noSend[c] * dataWidth, MPI_LONG, c, 124,
mpiComm(), &sendReq[sendCnt],
1943 AT_,
"sendData(sendOffsets[c]");
1947 if(noRecv[c] == 0)
continue;
1948 MPI_Recv(&recvData(recvOffsets[c], 0), noRecv[c] * dataWidth, MPI_LONG, c, 124,
mpiComm(), MPI_STATUS_IGNORE, AT_,
1949 "recvData(recvOffsets[c]");
1951 if(sendCnt > 0)
MPI_Waitall(sendCnt, &sendReq[0], MPI_STATUSES_IGNORE, AT_);
1954 MInt cellId = originalId[k];
1955 for(
MInt c = 0; c < dataWidth; c++) {
1956 receiveData[cellId * dataWidth + c] = recvData[k * dataWidth + c];
1970 static constexpr MFloat childStencil[8][3] = {{-F1, -F1, -F1}, {F1, -F1, -F1}, {-F1, F1, -F1}, {F1, F1, -F1},
1971 {-F1, -F1, F1}, {F1, -F1, F1}, {-F1, F1, F1}, {F1, F1, F1}};
1972 const MUint childCnt =
1973 static_cast<MUint>(cellInfo[cellId]) & 15u;
1977 for(
MUint child = 0; child < childCnt; child++) {
1979 ASSERT(childId <
m_tree.size(),
1980 "child out of range: " + std::to_string(childId) +
" " + std::to_string(
m_tree.size()));
1981 MUint position = (
static_cast<MUint>(cellInfo[childId]) >> 4u) & 7u;
2003 const MUint childCnt =
2004 static_cast<MUint>(cellInfo[cellId]) & 15u;
2007 MUint childId = cellId;
2008 MUint totalNoChilds = childCnt;
2010 while(totalNoChilds) {
2012 ASSERT((
MInt)childId < N,
"child out of range: " + std::to_string(childId) +
" " + std::to_string(N));
2013 MUint isMinLevel = (
static_cast<MUint>(cellInfo[childId]) >> 7) & 1;
2014 ASSERT(!isMinLevel,
"");
2018 totalNoChilds +=
static_cast<MUint>(cellInfo[childId]) & 15u;
2031 MLong*
const collectParentId,
MInt*
const collectLevel,
2032 MInt*
const collectNoChildren,
MLong*
const collectChildIds,
2033 MInt*
const collectNoOffspring) {
2034 using namespace std;
2036 ASSERT(counter < (
signed)collectData.size(),
"");
2037 const MLong globalId = get<0>(collectData[counter]);
2038 const MInt counterParent = counter;
2039 const MInt isPartitionCell = get<2>(collectData[counterParent]);
2040 const MUint childCnt =
static_cast<MUint>(get<1>(collectData[counterParent]))
2042 if(isPartitionCell)
return;
2046 for(
MUint child = 0; child < childCnt; child++) {
2048 ASSERT(counter < (
signed)collectData.size(),
"");
2049 const MLong childId = get<0>(collectData[counter]);
2050 const MUint position = (
static_cast<MUint>(get<1>(collectData[counter])) >> 4u) & 7u;
2051 collectLevel[counter] = collectLevel[counterParent] + 1;
2053 collectParentId[counter] = globalId;
2055 if(child < childCnt - 1) {
2056 MUint totalNoChilds = 1;
2058 while(totalNoChilds) {
2059 ASSERT(counter + tmpCnt < (
signed)collectData.size(),
"");
2060 if(!get<2>(collectData[counter + tmpCnt])) {
2061 totalNoChilds +=
static_cast<MUint>(get<1>(collectData[counter + tmpCnt])) & 15u;
2065 ASSERT(counter + tmpCnt < (
signed)collectData.size(),
"");
2066 nextId = get<0>(collectData[counter + tmpCnt]);
2069 nextId = globalId + collectNoOffspring[counterParent];
2071 collectNoOffspring[counter] = nextId - childId;
2073 collectChildIds, collectNoOffspring);
2075 collectNoChildren[counterParent] = childCnt;
2086 template <
class CELLTYPE>
2088 const std::vector<std::vector<MInt>>& cpu_haloCells,
2089 const std::vector<std::vector<MInt>>& cpu_windowCells,
2090 const std::vector<std::vector<MInt>>& cpu_azimuthalHaloCells,
2091 const std::vector<MInt>& cpu_azimuthalUnmappedHaloCells,
2092 const std::vector<std::vector<MInt>>& cpu_azimuthalWindowCells,
MInt*
const recalcIdTree) {
2095 const MInt cpu_noCells =
grid_.m_tree.size();
2106 std::vector<std::vector<MInt>> partitionLevelAncestorHaloCells;
2107 std::vector<std::vector<MInt>> partitionLevelAncestorWindowCells;
2113 MInt noHaloCellsTotal = 0;
2114 MInt noWindowCellsTotal = 0;
2118 noHaloCellsTotal += (signed)cpu_haloCells[i].size();
2119 noWindowCellsTotal += (signed)cpu_windowCells[i].size();
2120 for(
MInt j = 0; j < (signed)cpu_haloCells[i].size(); ++j) {
2121 if(
a_hasProperty(cpu_haloCells[i][j], Cell::IsPartLvlAncestor)) {
2122 partitionLevelAncestorHaloCells[i].push_back(cpu_haloCells[i][j]);
2125 for(
MInt j = 0; j < (signed)cpu_windowCells[i].size(); ++j) {
2126 if(
a_hasProperty(cpu_windowCells[i][j], Cell::IsPartLvlAncestor)) {
2127 partitionLevelAncestorWindowCells[i].push_back(cpu_windowCells[i][j]);
2133 if(
grid_.m_azimuthalPer) {
2136 noHaloCellsTotal += (signed)cpu_azimuthalHaloCells[i].size();
2137 noWindowCellsTotal += (signed)cpu_azimuthalWindowCells[i].size();
2139 noHaloCellsTotal += (signed)cpu_azimuthalUnmappedHaloCells.size();
2142 ASSERT(noHalo0 == noHaloCellsTotal, std::to_string(noHalo0) +
" != " + std::to_string(noHaloCellsTotal));
2162 if(
grid_.m_newMinLevel > 0) {
2163 noInternalCells = 0;
2164 for(
MInt cellId = 0; cellId < cpu_noCells; cellId++) {
2166 if(
a_hasProperty(cpu_cells, cellId, Cell::IsHalo))
continue;
2167 changeId2[cellId] = noInternalCells;
2171 noCellsPar.
p[
domainId()] = noInternalCells;
2173 if(
grid_.m_newMinLevel < 0) {
2182 MInt sndBuf =
static_cast<MInt>(tmpCnt);
2190 partitionLevelAncestorWindowCells, partitionLevelAncestorHaloCells);
2193 std::vector<MInt> minLevelCells;
2200 minLevelCells.push_back(i);
2202 if(
grid_.m_newMinLevel > 0)
continue;
2206 minLevelCells.push_back(i);
2211 const MInt minLevelCellsCnt = minLevelCells.size();
2217 minLevelCells_hId.
fill(0);
2218 for(
MInt i = 0; i < minLevelCellsCnt; ++i) {
2219 minLevelCells_id[i] = minLevelCells[i];
2227 MInt offspringSum = 0;
2228 for(
MInt i = 0; i < minLevelCellsCnt; ++i) {
2229 ASSERT(
a_noOffsprings(cpu_cells, minLevelCells_id.
p[i]) > -1,
"minLevelCells_id.p[i]) " << minLevelCells_id.
p[i]);
2233 "offspringPrefix.getPointer()");
2240 tmpCnt += offspringPrefix.
p[i];
2246 for(
MInt i = 0; i < minLevelCellsCnt; ++i) {
2247 minLevelCells_newId.
p[i] = tmpCnt;
2252 if(
grid_.m_newMinLevel < 0) {
2253 for(
MInt i = 0; i < minLevelCellsCnt; ++i) {
2254 ASSERT(minLevelCells_newId.
p[i] ==
a_globalId(minLevelCells_id.
p[i]),
2255 "minLevelCells_newId.p[i] " << minLevelCells_newId.
p[i] <<
"a_globalId(minLevelCells_id.p[i] "
2272 changeId.
getPointer(), partitionLevelAncestorWindowCells,
2273 partitionLevelAncestorHaloCells);
2275 ASSERT(tmpCnt == noCellsPar[
domainId()],
"tmpCnt: " << tmpCnt <<
" noCellsPar: " << noCellsPar[
domainId()]);
2280 for(
MInt i = 0; i < tmpCnt; i++) {
2281 ASSERT(i < tmpCnt,
"index out of range");
2282 recalcIdTree[i] = oldId[i];
2283 ASSERT(recalcIdTree[i] > -1 && recalcIdTree[i] <
m_noInternalCells,
"recalcId out of range");
2289 for(
MInt i = 0; i < cpu_noCells; ++i) {
2294 changeId[oldId[i]] = newId[i];
2298 if(1 <
noDomains() && (noWindowCellsTotal > 0) && (noHaloCellsTotal > 0)) {
2310 for(
MInt j = 0; j < (signed)cpu_windowCells[i].size(); ++j) {
2311 sndBuf.
p[tmpCnt] = changeId.
p[cpu_windowCells[i][j]];
2320 sndDsp.
p[i] = sndDsp.
p[i - 1] + ((signed)cpu_windowCells[i - 1].size());
2321 rcvDsp.
p[i] = rcvDsp.
p[i - 1] + ((signed)cpu_haloCells[i - 1].size());
2324 sndCnt.
p[i] = (signed)cpu_windowCells[i].size();
2325 rcvCnt.
p[i] = (signed)cpu_haloCells[i].size();
2330 const MInt mpiTag = 0;
2332 MPI_Request* mpiReq;
2336 mpiReq[i] = MPI_REQUEST_NULL;
2337 MInt bufSize = (signed)cpu_windowCells[i].size();
2338 if(bufSize == 0)
continue;
2346 MInt bufSize = (signed)cpu_haloCells[i].size();
2347 if(bufSize == 0)
continue;
2353 if((
signed)cpu_windowCells[i].size() == 0)
continue;
2354 MPI_Wait(&mpiReq[i], &status, AT_);
2362 for(
MInt j = 0; j < (signed)cpu_haloCells[i].size(); ++j) {
2363 changeId.
p[cpu_haloCells[i][j]] = rcvBuf.
p[tmpCnt];
2410 tmpCnt += noCellsPar.
p[i];
2414 ASSERT(changeId[oldId[i]] >= tmpCnt && changeId[oldId[i]] < tmpCnt + noCellsPar.
p[
domainId()],
"");
2421 std::vector<MLong> partitionCellsFiltered;
2423 if(!
grid_.m_updatedPartitionCells &&
grid_.m_updatePartitionCellsOnRestart) {
2424 std::vector<MInt> partitionCellsGlobalIds;
2427 for(
MInt i = 0; i < minLevelCellsCnt; ++i) {
2428 ASSERT(minLevelCells_newId.
p[i] >= tmpCnt && minLevelCells_newId.
p[i] < tmpCnt + noCellsPar.
p[
domainId()],
2429 "minLevelCells_newId:" << minLevelCells_newId.
p[i] <<
" tmpCnt: " << tmpCnt
2430 <<
" noCellsPar: " << noCellsPar.
p[
domainId()]);
2431 ASSERT(minLevelCells_newId.
p[i] == changeId[oldId.
p[minLevelCells_newId.
p[i] - tmpCnt]],
"");
2435 std::vector<MInt> tmpPartitionCells;
2436 for(
MInt cellId = 0; cellId < cpu_noCells; ++cellId) {
2440 if(!
grid_.m_newMinLevel) {
2443 tmpPartitionCells.push_back(cellId);
2447 while(!tmpPartitionCells.empty()) {
2448 MInt cellId = tmpPartitionCells.front();
2449 tmpPartitionCells.erase(tmpPartitionCells.begin());
2451 ||
a_workload(cpu_cells, cellId) > workloadThreshold) {
2455 tmpPartitionCells.insert(tmpPartitionCells.begin() + cnt,
a_childId(cellId, j));
2459 }
else if(!
a_hasProperty(cpu_cells, cellId, Cell::IsHalo)) {
2460 partitionCellsFiltered.push_back(changeId.
p[cellId]);
2461 partitionCellsGlobalIds.push_back(
grid_.a_globalId(cellId));
2466 MLong noPartitionCellsGlobal = partitionCellsFiltered.size();
2468 "MPI_IN_PLACE",
"m_noPartitionCellsGlobal");
2469 if(noPartitionCellsGlobal !=
grid_.m_noPartitionCellsGlobal) {
2470 cerr0 <<
"Partition cell update in restart-file! " << noPartitionCellsGlobal <<
" "
2471 <<
grid_.m_noPartitionCellsGlobal << std::endl;
2474 MInt noLocalPartitionCells = partitionCellsFiltered.size();
2476 if(
grid_.m_localPartitionCellLocalIdsRestart !=
nullptr) {
2479 mAlloc(
grid_.m_localPartitionCellLocalIdsRestart, noLocalPartitionCells,
"m_localPartitionCellLocalIdsRestart",
2483 sort(partitionCellsGlobalIds.begin(), partitionCellsGlobalIds.end());
2487 MPI_Allgather(&noLocalPartitionCells, 1, MPI_INT, &localPartitionCellCounts[0], 1, MPI_INT,
mpiComm(), AT_,
2488 "noLocalPartitionCells",
"localPartitionCellCounts[0]");
2493 offset += localPartitionCellCounts[dId];
2497 grid_.m_localPartitionCellOffsetsRestart[0] = offset;
2498 grid_.m_localPartitionCellOffsetsRestart[1] = offset + noLocalPartitionCells;
2499 grid_.m_localPartitionCellOffsetsRestart[2] = noPartitionCellsGlobal;
2501 for(
MInt i = 0; i < noLocalPartitionCells; i++) {
2502 grid_.m_localPartitionCellLocalIdsRestart[i] =
grid_.globalIdToLocalId(partitionCellsGlobalIds[i]);
2506 for(
MInt cellId = 0; cellId < cpu_noCells; ++cellId) {
2513 partitionCellsFiltered.push_back(changeId.
p[cellId]);
2518 sort(partitionCellsFiltered.begin(), partitionCellsFiltered.end());
2521 MIntScratchSpace partitionCellLevelDiff(partitionCellsFiltered.size(), AT_,
"partitionCellLevelDiff");
2522 for(
MInt i = 0; i < (signed)partitionCellsFiltered.size(); ++i) {
2523 partitionCellLevelDiff.
p[i] =
a_level(oldId.
p[partitionCellsFiltered[i] - tmpCnt]) - minLevel;
2528 partitionCellsFilteredSizePar.
p[
domainId()] = partitionCellsFiltered.size();
2531 tmpCnt = partitionCellsFilteredSizePar.
p[
domainId()];
2532 MInt sendVar =
static_cast<MInt>(tmpCnt);
2543 changeId.
data(), partitionCellsFiltered, partitionCellLevelDiff.
data(), changeId2.
data());
2564 MInt gap = cell2SortCnt;
2565 MBool swapped =
true;
2566 while((gap > 1) || swapped) {
2567 gap =
MInt(gap / 1.247330950103979);
2571 while(i + gap < cell2SortCnt) {
2572 if(array2Sort[i] > array2Sort[i + gap]) {
2573 std::swap(array2Sort[i], array2Sort[i + gap]);
2574 std::swap(followUp1[i], followUp1[i + gap]);
2599 template <
typename CELLTYPE>
2601 const std::vector<std::vector<MInt>>& cpu_haloCells,
2602 const std::vector<std::vector<MInt>>& cpu_windowCells,
2603 const std::vector<std::vector<MInt>>& partitionLevelAncestorWindowCells,
2604 const std::vector<std::vector<MInt>>& partitionLevelAncestorHaloCells) {
2609 recvSize += (signed)partitionLevelAncestorWindowCells[d].size();
2618 for(
MInt i = 0; i < input_noCells; ++i) {
2627 ASSERT(!std::isnan(
a_weight(i)),
"cellId " + std::to_string(i) +
" g" + std::to_string(
a_globalId(i)));
2637 for(
MInt i = 0; i < input_noCells; ++i) {
2640 for(
MInt j = 0; j < noChilds; ++j) {
2654 for(
MInt j = 0; j < (signed)partitionLevelAncestorHaloCells[d].size(); j++) {
2655 tmpNoOffs[partitionLevelAncestorHaloCells[d][j]] =
2656 a_noOffsprings(input_cells, partitionLevelAncestorHaloCells[d][j]);
2657 tmpWorkload[partitionLevelAncestorHaloCells[d][j]] =
2658 a_workload(input_cells, partitionLevelAncestorHaloCells[d][j]);
2662 mpiComm(), &tmpNoOffs[0], &recvBuffer[0]);
2664 mpiComm(), &tmpWorkload[0], &recvBuffer2[0]);
2667 for(
MInt j = 0; j < (signed)partitionLevelAncestorWindowCells[d].size(); j++) {
2668 MInt cellId = partitionLevelAncestorWindowCells[d][j];
2670 a_workload(input_cells, cellId) += recvBuffer2[cnt];
2676 for(
MInt j = 0; j < (signed)partitionLevelAncestorWindowCells[d].size(); j++) {
2677 tmpNoOffs[partitionLevelAncestorWindowCells[d][j]] =
2678 a_noOffsprings(input_cells, partitionLevelAncestorWindowCells[d][j]);
2679 tmpWorkload[partitionLevelAncestorWindowCells[d][j]] =
2680 a_workload(input_cells, partitionLevelAncestorWindowCells[d][j]);
2688 for(
MInt j = 0; j < (signed)partitionLevelAncestorHaloCells[d].size(); j++) {
2689 a_noOffsprings(input_cells, partitionLevelAncestorHaloCells[d][j]) =
2690 tmpNoOffs[partitionLevelAncestorHaloCells[d][j]];
2691 a_workload(input_cells, partitionLevelAncestorHaloCells[d][j]) =
2692 tmpWorkload[partitionLevelAncestorHaloCells[d][j]];
2698 for(
MInt j = 0; j < (signed)cpu_windowCells[i].size(); j++) {
2699 tmpNoOffs[cpu_windowCells[i][j]] =
a_noOffsprings(input_cells, cpu_windowCells[i][j]);
2700 tmpWorkload[cpu_windowCells[i][j]] =
a_workload(input_cells, cpu_windowCells[i][j]);
2708 for(
MInt j = 0; j < (signed)cpu_haloCells[i].size(); j++) {
2709 a_noOffsprings(input_cells, cpu_haloCells[i][j]) = tmpNoOffs[cpu_haloCells[i][j]];
2710 a_workload(input_cells, cpu_haloCells[i][j]) = tmpWorkload[cpu_haloCells[i][j]];
2735 template <
typename CELLTYPE>
2737 MInt* minLevelCells_id,
MLong* minLevelCells_newId,
MInt minLevelCellsCnt,
MLong* tmp_id,
2738 std::vector<std::vector<MInt>>& partitionLevelAncestorWindowCells,
2739 std::vector<std::vector<MInt>>& partitionLevelAncestorHaloCells) {
2742 std::fill(&tmp_id[0], &tmp_id[0] +
a_noCells(input_cells), -1);
2744 childOffsprings.
fill(0);
2745 for(
MInt cellId = 0; cellId <
grid_.m_tree.size(); cellId++) {
2763 for(
MInt j = 0; j < (signed)partitionLevelAncestorHaloCells[d].size(); j++) {
2764 MInt haloId = partitionLevelAncestorHaloCells[d][j];
2778 sendOffsets(d + 1) = sendOffsets(d) + noSend(d);
2783 for(
MInt j = 0; j < (signed)partitionLevelAncestorHaloCells[d].size(); j++) {
2784 MInt haloId = partitionLevelAncestorHaloCells[d][j];
2789 sendData(sendOffsets(d) + noSend(d), 0) =
a_noOffsprings(input_cells, childId);
2790 sendData(sendOffsets(d) + noSend(d), 1) = j;
2791 sendData(sendOffsets(d) + noSend(d), 2) = k;
2800 sendReq.
fill(MPI_REQUEST_NULL);
2809 if(sendCnt > 0)
MPI_Waitall(sendCnt, &sendReq[0], MPI_STATUSES_IGNORE, AT_);
2814 recvOffsets(d + 1) = recvOffsets(d) + noRecv(d);
2818 sendReq.
fill(MPI_REQUEST_NULL);
2821 if(noSend[c] == 0)
continue;
2823 &sendReq[sendCnt], AT_,
"sendData(sendOffsets[c]");
2827 if(noRecv[c] == 0)
continue;
2829 MPI_STATUS_IGNORE, AT_,
"recvData(recvOffsets[c]");
2831 if(sendCnt > 0)
MPI_Waitall(sendCnt, &sendReq[0], MPI_STATUSES_IGNORE, AT_);
2834 for(
MInt k = 0; k < noRecv[d]; k++) {
2835 MInt idx = recvData(recvOffsets[d] + k, 1);
2836 MInt child = recvData(recvOffsets[d] + k, 2);
2837 ASSERT(child > -1 && child <
IPOW2(
nDim),
"");
2838 MInt windowId = partitionLevelAncestorWindowCells[d][idx];
2839 childOffsprings(windowId, child) = recvData(recvOffsets[d] + k, 0);
2850 for(
MInt i = 0; i < minLevelCellsCnt; ++i) {
2851 tmp_id[minLevelCells_id[i]] = minLevelCells_newId[i];
2852 ASSERT(tmp_id[minLevelCells_id[i]] > -1,
"");
2863 for(
MInt cellId = 0; cellId <
grid_.m_tree.size(); cellId++) {
2864 if(
a_level(cellId) == level) {
2866 if(tmp_id[cellId] < 0)
continue;
2867 MLong cntId = tmp_id[cellId] + 1;
2871 tmp_id[childId] = cntId;
2873 cntId += childOffsprings(cellId, k);
2878 std::map<MLong, MInt> cellMap;
2879 for(
MInt cellId = 0; cellId < noInternalCells; cellId++) {
2882 ASSERT(tmp_id[cellId] > -1,
"");
2883 cellMap.insert(std::make_pair(tmp_id[cellId], cellId));
2888 for(
auto it = cellMap.begin(); it != cellMap.end(); ++it) {
2889 oldId[tmpCnt] = it->second;
2890 newId[tmpCnt] = it->first;
2905 template <
typename CELLTYPE>
2907 MInt* partitionCellsFilteredSizePar,
MInt* noCellsPar,
MInt* oldId,
MLong* changeId,
2908 const std::vector<MLong>& partitionCellsFiltered,
MInt* partitionCellLevelDiff,
2920 MLong partitionCellsFilteredSizeParMax = 0;
2921 MLong noCellsParMax = 0;
2924 partitionOffset[0] = 0;
2928 partitionOffset[i] = partitionOffset[i - 1] + partitionCellsFilteredSizePar[i - 1];
2929 allOffset[i] = allOffset[i - 1] + noCellsPar[i - 1];
2933 partitionCellsFilteredSizeParMax += (
MLong)partitionCellsFilteredSizePar[i];
2934 noCellsParMax += (
MLong)noCellsPar[i];
2935 tmp += (
MLong)noCellsPar[i];
2938 m_log <<
"total number of cells to be written out " << tmp << std::endl;
2940 MInt minLevel = std::numeric_limits<MInt>::max();
2946 MInt newCellCount = 0;
2947 for(
MInt i = 0; i < noCells; ++i) {
2949 if(
grid_.m_newMinLevel > 0) cellId = changeId2[i];
2950 if(
a_level(oldId[cellId]) <
grid_.m_newMinLevel)
continue;
2951 minLevel =
mMin(minLevel,
a_level(oldId[cellId]));
2952 maxLevel =
mMax(maxLevel,
a_level(oldId[cellId]));
2956 if(
grid_.m_newMinLevel > 0) {
2957 std::cerr <<
"new-cout " << newCellCount <<
" old " << noCellsPar[
domainId()] << std::endl;
2958 ASSERT(newCellCount == noCellsPar[
domainId()],
"");
2965 MInt noMinLevelCells = 0;
2966 MLong noLeafCells = 0;
2968 if(
grid_.m_newMinLevel > 0) minLevel =
grid_.m_newMinLevel;
2970 std::vector<std::pair<MLong, MInt>> minLevelCells;
2972 isMinLevelCell.
fill(0);
2973 for(
MInt i = 0; i < noCells; ++i) {
2974 if(
grid_.m_newMinLevel > 0 && changeId2[i] < 0)
continue;
2976 if(
grid_.m_newMinLevel > 0) cellId = changeId2[i];
2978 if(
a_level(oldId[cellId]) == minLevel) {
2979 if(
grid_.m_newMinLevel < 0) {
2982 minLevelCells.push_back(std::make_pair(changeId[oldId[cellId]], oldId[cellId]));
2983 isMinLevelCell(cellId) = 1;
2988 MPI_Allreduce(MPI_IN_PLACE, &noLeafCells, 1, MPI_LONG, MPI_SUM,
mpiComm(), AT_,
"MPI_IN_PLACE",
"noLeafCells");
2993 "noMinLevelCells",
"noMinLevelCellsPerDomain.getPointer()");
2994 MLong minLevelCellOffset = 0;
2996 minLevelCellOffset += (
MLong)noMinLevelCellsPerDomain[d];
2998 MLong noTotalMinLevelCells = 0;
3000 noTotalMinLevelCells += (
MLong)noMinLevelCellsPerDomain[d];
3003 std::set<MLong> partitionLevelAncestorIds;
3004 MInt partitionLevelShift = 0;
3005 MInt maxPartitionLevelShift = 0;
3006 for(
MInt i = 0; i < partitionCellsFilteredSizePar[
domainId()]; ++i) {
3007 MInt levelDiff =
a_level(oldId[partitionCellsFiltered[i] - allOffset[
domainId()]]) - minLevel;
3008 ASSERT(levelDiff == partitionCellLevelDiff[i],
"");
3009 partitionLevelShift =
mMax(levelDiff, partitionLevelShift);
3011 if(partitionLevelShift) {
3012 for(
MInt i = 0; i < partitionCellsFilteredSizePar[
domainId()]; ++i) {
3013 MInt cellId = oldId[partitionCellsFiltered[i] - allOffset[
domainId()]];
3015 while(parentId > -1 &&
a_level(cellId) >=
grid_.m_newMinLevel) {
3017 partitionLevelAncestorIds.insert(changeId[parentId]);
3025 MLong totalNoPartitionLevelAncestors = 0;
3026 MLong localPartitionLevelAncestorCount = (signed)partitionLevelAncestorIds.size();
3027 MPI_Allreduce(&localPartitionLevelAncestorCount, &totalNoPartitionLevelAncestors, 1, MPI_LONG, MPI_SUM,
mpiComm(),
3028 AT_,
"localPartitionLevelAncestorCount",
"totalNoPartitionLevelAncestors");
3029 MPI_Allreduce(&partitionLevelShift, &maxPartitionLevelShift, 1, MPI_INT, MPI_MAX,
mpiComm(), AT_,
3030 "partitionLevelShift",
"maxPartitionLevelShift");
3033 MLong maxNoOffsprings = 0;
3035 MFloat totalWorkload = F0;
3038 for(
MInt i = 0; i < partitionCellsFilteredSizePar[
domainId()]; ++i) {
3042 totalWorkload +=
a_workload(input_cells, oldId[partitionCellsFiltered[i] - allOffset[
domainId()]]);
3046 MPI_Allreduce(MPI_IN_PLACE, &maxNoOffsprings, 1, MPI_LONG, MPI_MAX,
mpiComm(), AT_,
"MPI_IN_PLACE",
3048 MPI_Allreduce(MPI_IN_PLACE, &maxWorkload, 1, MPI_DOUBLE, MPI_MAX,
mpiComm(), AT_,
"MPI_IN_PLACE",
"maxWorkload");
3049 MPI_Allreduce(MPI_IN_PLACE, &totalWorkload, 1, MPI_DOUBLE, MPI_SUM,
mpiComm(), AT_,
"MPI_IN_PLACE",
3052 maxNoCPUs = totalWorkload / maxWorkload;
3053 MFloat avgWorkload = totalWorkload / ((
MFloat)partitionCellsFilteredSizeParMax);
3054 MFloat avgOffspring = ((
MFloat)noCellsParMax) / ((
MFloat)partitionCellsFilteredSizeParMax);
3064 if(
m_tree.solver(i,
b) ==
true) {
3068 MPI_Allreduce(MPI_IN_PLACE, &solverCount, 1, MPI_LONG, MPI_SUM,
mpiComm(), AT_,
"MPI_IN_PLACE",
"solverCount");
3069 grid.setAttributes(&solverCount,
"noCells_" + std::to_string(
b), 1);
3074 grid.setAttributes(&nodims,
"nDim", 1);
3075 grid.setAttributes(&
noSolvers,
"noSolvers", 1);
3077 grid.setAttributes(&noCellsParMax,
"noCells", 1);
3078 grid.setAttributes(&noLeafCells,
"noLeafCells", 1);
3079 grid.setAttributes(&noTotalMinLevelCells,
"noMinLevelCells", 1);
3080 grid.setAttributes(&partitionCellsFilteredSizeParMax,
"noPartitionCells", 1);
3081 grid.setAttributes(&totalNoPartitionLevelAncestors,
"noPartitionLevelAncestors", 1);
3082 grid.setAttributes(&minLevel,
"minLevel", 1);
3083 grid.setAttributes(&maxLevel,
"maxLevel", 1);
3085 grid.setAttributes(&maxPartitionLevelShift,
"maxPartitionLevelShift", 1);
3091 if(
grid_.m_hasMultiSolverBoundingBox) {
3092 grid.setAttributes(&
grid_.m_targetGridLengthLevel0,
"multiSolverLengthLevel0", 1);
3093 grid.setAttributes(&
grid_.m_targetGridMinLevel,
"multiSolverMinLevel", 1);
3094 grid.setAttributes(&(
grid_.m_targetGridCenterOfGravity[0]),
"multiSolverCenterOfGravity",
nDim);
3095 grid.setAttributes(&(
grid_.m_targetGridBoundingBox[0]),
"multiSolverBoundingBox", 2 *
nDim);
3100 grid.setAttributes(&totalWorkload,
"totalWorkload", 1);
3101 grid.setAttributes(&maxWorkload,
"partitionCellMaxWorkload", 1);
3102 grid.setAttributes(&avgWorkload,
"partitionCellAverageWorkload", 1);
3103 grid.setAttributes(&maxNoOffsprings,
"partitionCellMaxNoOffspring", 1);
3104 grid.setAttributes(&avgOffspring,
"partitionCellAverageNoOffspring", 1);
3106 grid.setAttributes(&partitionCellOffspringThreshold,
"partitionCellOffspringThreshold", 1);
3107 grid.setAttributes(&maxNoCPUs,
"maxNoBalancedCPUs", 1);
3112 grid.defineArray(PIO_LONG,
"partitionCellsGlobalId", partitionCellsFilteredSizeParMax);
3113 grid.defineArray(PIO_FLOAT,
"partitionCellsWorkload", partitionCellsFilteredSizeParMax);
3115 grid.defineArray(PIO_LONG,
"minLevelCellsTreeId", noTotalMinLevelCells);
3116 grid.defineArray(PIO_LONG,
"minLevelCellsNghbrIds", 2 *
nDim * noTotalMinLevelCells);
3118 grid.defineArray(PIO_UCHAR,
"cellInfo", noCellsParMax);
3120 grid.defineArray(PIO_UCHAR,
"solver", noCellsParMax);
3123 m_log <<
"header definition finished" << std::endl;
3128 grid.setOffset(partitionCellsFilteredSizePar[
domainId()], partitionOffset[
domainId()]);
3131 grid.writeArray(&partitionCellsFiltered[0],
"partitionCellsGlobalId");
3136 "tmp_partitionCellsWorkLoad");
3137 for(
MInt i = 0; i < partitionCellsFilteredSizePar[
domainId()]; ++i) {
3138 tmp_partitionCellsWorkLoad[i] =
3141 grid.writeArray(tmp_partitionCellsWorkLoad.
data(),
"partitionCellsWorkload");
3149 for(
MInt i = 0; i < (signed)minLevelCells.size(); i++) {
3152 tmp_nghbrIds(i, j) = changeId[
a_neighborId(minLevelCells[i].second, j)];
3154 tmp_nghbrIds(i, j) = -1;
3158 grid.writeArray(tmp_nghbrIds.
data(),
"minLevelCellsNghbrIds");
3163 grid.setOffset(noMinLevelCells, minLevelCellOffset);
3165 for(
MUint i = 0; i < minLevelCells.size(); i++) {
3166 maia::grid::hilbert::coordinatesToTreeId<nDim>(
3168 &(
grid_.m_targetGridCenterOfGravity[0]),
grid_.m_targetGridLengthLevel0);
3170 grid.writeArray(tmp_treeId.
data(),
"minLevelCellsTreeId");
3177 for(
MInt i = 0; i < noCells; ++i) {
3179 if(
grid_.m_newMinLevel > 0) cellId = changeId2[i];
3180 if(
a_level(oldId[cellId]) <
grid_.m_newMinLevel)
continue;
3182 MUint isMinLevel = (
MUint)isMinLevelCell(cellId);
3185 if(parentId > -1 &&
a_level(oldId[cellId]) >
grid_.m_newMinLevel) {
3187 if(
a_childId(parentId, j) == oldId[cellId]) position = j;
3190 MUint tmpBit = noChilds | (position << 4) | (isMinLevel << 7);
3191 tmp_cellInfo[cellCount] =
static_cast<MUchar>(tmpBit);
3195 grid.writeArray(tmp_cellInfo.
data(),
"cellInfo");
3202 for(
MInt i = 0; i < noCells; ++i) {
3204 if(
grid_.m_newMinLevel > 0) cellId = changeId2[i];
3205 if(
a_level(oldId[cellId]) <
grid_.m_newMinLevel)
continue;
3207 for(
MInt solver = 0; solver <
m_tree.noSolvers(); solver++) {
3208 if(
m_tree.solver(oldId[cellId], solver)) {
3209 tmpBit |= (1 << solver);
3212 tmptmp[cellId] =
static_cast<MUchar>(tmpBit);
3215 grid.writeArray(tmptmp.
data(),
"solver");
void mAlloc(T *&a, const MLong N, const MString &objectName, MString function)
allocates memory for one-dimensional array 'a' of size N
MBool mDeallocate(T *&a)
deallocates the memory previously allocated for element 'a'
static size_t getAvailableMemory()
Returns the amount of available memory in scratch.
This class is a ScratchSpace.
T * getPointer() const
Deprecated: use begin() instead!
void fill(T val)
fill the scratch with a given value
pointer p
Deprecated: use [] instead!
void propagateNeighborsFromMinLevelToMaxLevel()
Given neighbor information on the minLevel, create neighbor information on all higher levels.
static constexpr MInt m_noDirs
maia::grid::cell::BitsetType::reference a_hasProperty(const MInt cellId, const Cell p)
MInt & m_maxUniformRefinementLevel
MLong & a_neighborId(const MInt cellId, const MInt dir)
std::vector< MLong > & m_partitionLevelAncestorChildIds
void loadDonorGridPartition(const MLong *const partitionCellsId, const MInt noPartitionCells)
MLong & m_noPartitionCellsGlobal
MInt a_noChildren(const MInt cellId)
MLong generateHilbertIndex(const MInt cellId, const MInt minLevel)
std::vector< MInt > & m_partitionLevelAncestorNghbrDomains
MInt createTreeOrderingOfIds(Collector< CELLTYPE > *input_cells, MInt noInternalCells, MInt *oldId, MLong *newId, MInt *minLevelCells_id, MLong *minLevelCells_newId, MInt minLevelCellsCnt, MLong *tmp_id, std::vector< std::vector< MInt > > &partitionLevelAncestorWindowCells, std::vector< std::vector< MInt > > &partitionLevelAncestorHaloCells)
Create a tree ordering of Ids.
static void save(Grid &g, const MChar *fileName, Collector< CELLTYPE > *cpu_cells, const std::vector< std::vector< MInt > > &cpu_haloCells, const std::vector< std::vector< MInt > > &cpu_windowCells, const std::vector< std::vector< MInt > > &cpu_azimuthalHaloCells, const std::vector< MInt > &cpu_azimuthalUnmappedHaloCells, const std::vector< std::vector< MInt > > &cpu_azimuthalWindowCells, MInt *const recalcIdTree)
void queryGlobalData(const MInt noCellsToQuery, const MLong *const queryGlobalIds, MLong *const receiveData, const MLong *const dataSource, const MInt dataWidth)
Generic communication of data given by dataSource, matched by queryGlobalIds, stored in receiveData.
void calculateNoOffspringsAndWorkload(Collector< CELLTYPE > *input_cells, MInt input_noCells, const std::vector< std::vector< MInt > > &cpu_haloCells, const std::vector< std::vector< MInt > > &cpu_windowCells, const std::vector< std::vector< MInt > > &partitionLevelAncestorWindowCells, const std::vector< std::vector< MInt > > &partitionLevelAncestorHaloCells)
Caluclate the number of offsprings and the workload for each cell.
MInt traverseAndFlagSubtree(const MUint &cellId, const MUchar *const cellInfo, const MInt N, MInt *const flag)
Recursively flag subtree cells from partition level to the leaf level.
MLong(& m_localPartitionCellOffsets)[3]
std::vector< std::vector< MInt > > & m_partitionLevelAncestorHaloCells
MLong & m_noPartitionLevelAncestorsGlobal
void writeParallelGridFile(const MChar *fileName, Collector< CELLTYPE > *input_cells, MInt *partitionCellsFilteredSizePar, MInt *noCellsPar, MInt *oldId, MLong *changeId, const std::vector< MLong > &partitionCellsFiltered, MInt *partitionCellLevelDiff, MInt *changeId2)
Save a grid file.
MInt correctDomainOffsetsAtPartitionLevelShifts(std::vector< MUchar > &cellInfo)
Check for partition level shifts and in this case make sure the first partition level ancestors are o...
MInt a_hasNeighbor(const MInt cellId, const MInt dir) const
MFloat(& m_centerOfGravity)[3]
static constexpr MInt m_maxNoChilds
MInt a_noCells(Collector< U > *NotUsed(cells_))
static void calculateNoOffspringsAndWorkload(Grid &g, Collector< CELLTYPE > *cpu_cells, MInt cpu_noCells, const std::vector< std::vector< MInt > > &cpu_haloCells, const std::vector< std::vector< MInt > > &cpu_windowCells, const std::vector< std::vector< MInt > > &partitionLevelAncestorWindowCells, const std::vector< std::vector< MInt > > &partitionLevelAncestorHaloCells)
MFloat & a_coordinate(const MInt cellId, const MInt dir)
MFloat cellLengthAtLevel(const MInt level) const
void saveGrid(const MChar *fileName, Collector< CELLTYPE > *cpu_cells, const std::vector< std::vector< MInt > > &cpu_haloCells, const std::vector< std::vector< MInt > > &cpu_windowCells, const std::vector< std::vector< MInt > > &cpu_azimuthalHaloCells, const std::vector< MInt > &cpu_azimuthalUnmappedHaloCells, const std::vector< std::vector< MInt > > &cpu_azimuthalWindowCells, MInt *const recalcIdTree)
Save the grid.
MFloat(& m_boundingBox)[6]
std::map< MLong, MInt > & m_globalToLocalId
MBool & m_loadGridPartition
MInt & m_noMinLevelCellsGlobal
static void propagateNeighborsFromMinLevelToMaxLevel(Grid &g)
MFloat a_weight(const MInt cellId)
MInt findNeighborDomainId(MLong globalId)
void collectDistributedGroupData(const MInt noLocalData, const MLong localDataOffset, const MLong *const intData, const MFloat *const fltData, const MInt noGroups, const MLong *const groupOffsets, const MInt noRanksPerGroup, const MLong noGlobalData, const MInt noRecvData, MLong *const recvInt, MFloat *const recvFlt, MPI_Comm comm, MInt rank)
Collect distributed data located in intData and fltData on the master rank (0) of each communicator/g...
std::vector< std::vector< MInt > > & m_partitionLevelAncestorWindowCells
MInt & a_noOffsprings(Collector< U > *NotUsed(cells_), const MInt cellId)
void sortAfterHilbertIds(MLong *array2Sort, MInt *followUp1, MInt cell2SortCnt)
Sorting after hilbert id.
static void communicatePartitionLevelAncestorData(Grid &g, const MInt noLocalMinLevelCells, const MInt *const localMinLevelCells, const MInt *const localMinLevelId, const MLong *const minLevelCellsTreeId, const MUchar *const cellInfo)
MInt & m_noHaloPartitionLevelAncestors
void communicatePartitionLevelAncestorData(const MInt noLocalMinLevelCells, const MInt *const localMinLevelCells, const MInt *const localMinLevelId, const MLong *const minLevelCellsTreeId, const MUchar *const cellInfo)
The subtrees between the local minLevel and the partitionLevel is gathered on the domain which holds ...
static void load(Grid &g, MString const &fileName)
MInt & m_maxPartitionLevelShift
MLong *& m_localPartitionCellGlobalIds
typename Grid::MInt_dyn_array MInt_dyn_array
void traverseAndSetupSubtree(const MInt &cellId, const MUchar *const cellInfo)
Recursively setup subtree connection from partition level to the leaf level.
std::vector< MInt > & m_partitionLevelAncestorIds
maia::grid::cell::BitsetType::reference a_hasProperty(Collector< U > *NotUsed(cells_), const MInt cellId, const Cell p)
MInt findIndex(ITERATOR first, ITERATOR last, const U &val)
void loadGrid(const MString &fileName)
Load a grid file.
MInt & a_level(const MInt cellId)
MLong & a_parentId(const MInt cellId)
void resetCell(const MInt &cellId)
MLong & a_childId(const MInt cellId, const MInt pos)
MInt & m_decisiveDirection
void domainPartitioningParallel(const MInt tmpCount, const MLong tmpOffset, const MFloat *const partitionCellsWorkload, const MLong *const partitionCellsGlobalId, const MFloat totalWorkload, MLong *const partitionCellOffsets, MLong *const globalIdOffsets, const MBool computeOnlyPartition=false)
Parallel domain partitioning.
MInt *& m_localPartitionCellLocalIds
static constexpr MInt nDim
static void partitionParallel(Grid &g, const MInt tmpCount, const MLong tmpOffset, const MFloat *const partitionCellsWorkload, const MLong *const partitionCellsGlobalId, const MFloat totalWorkload, MLong *const partitionCellOffsets, MLong *const globalIdOffsets, const MBool computeOnlyPartition=false)
MInt & m_noPartitionCells
std::vector< MInt > & m_nghbrDomains
MInt & m_partitionCellOffspringThreshold
MFloat cellLengthAtCell(const MInt cellId) const
MInt & a_noOffsprings(const MInt cellId)
void traverseAndSetupTruncatedSubtree(MInt &counter, const std::vector< std::tuple< MLong, MUchar, MInt > > &collectData, MLong *const collectParentId, MInt *const collectLevel, MInt *const collectNoChildren, MLong *const collectChildIds, MInt *const collectNoOffspring)
Recursively setup truncated subtree between minLevel and partitionLevel (used to setup partition leve...
MLong & a_globalId(const MInt cellId)
MInt noNeighborDomains() const
MInt noAzimuthalNeighborDomains() const
MInt & m_noPartitionLevelAncestors
MFloat & m_partitionCellWorkloadThreshold
MInt globalIdToLocalId(const MLong globalId)
MFloat & a_workload(Collector< U > *NotUsed(cells_), const MInt cellId)
MFloat & m_reductionFactor
void checkMultiSolverGridExtents(const MInt nDim, const MFloat *centerOfGravity, const MFloat lengthLevel0, const MInt minLevel, const MFloat *targetGridCenterOfGravity, const MFloat targetGridLengthLevel0, const MInt targetGridMinLevel)
Checks if the given grid extents and cell sizes match when creating a multisolver grid and correspond...
void mTerm(const MInt errorCode, const MString &location, const MString &message)
const MString const MString & message
MBool approx(const T &, const U &, const T)
constexpr T mMin(const T &x, const T &y)
constexpr T mMax(const T &x, const T &y)
constexpr MLong IPOW2(MInt x)
constexpr MFloat FPOW2(MInt x)
std::basic_string< char > MString
int MPI_Recv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Status *status, const MString &name, const MString &varname)
same as MPI_Recv
int MPI_Isend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request, const MString &name, const MString &varname)
same as MPI_Isend
int MPI_Allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPI_Comm comm, const MString &name, const MString &sndvarname, const MString &rcvvarname)
same as MPI_Allgatherv
int MPI_Irecv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Request *request, const MString &name, const MString &varname)
same as MPI_Irecv
int MPI_Issend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request, const MString &name, const MString &varname)
same as MPI_Issend
int MPI_Wait(MPI_Request *request, MPI_Status *status, const MString &name)
same as MPI_Wait
int MPI_Exscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, const MString &name, const MString &sndvarname, const MString &rcvvarname)
same as MPI_Exscan
int MPI_Waitall(int count, MPI_Request *request, MPI_Status *status, const MString &name)
same as MPI_Waitall
int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, const MString &name, const MString &sndvarname, const MString &rcvvarname)
same as MPI_Allreduce
int MPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, const MString &name, const MString &sndvarname, const MString &rcvvarname)
same as MPI_Alltoall
int MPI_Iprobe(int source, int tag, MPI_Comm comm, int *flag, MPI_Status *status, const MString &name)
Iprobe MPI to get status without actually receiving the message.
int MPI_Bcast(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm, const MString &name, const MString &varname)
same as MPI_Bcast
int MPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, const MString &name, const MString &sndvarname, const MString &rcvvarname)
same as MPI_Allgather
int MPI_Send(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, const MString &name, const MString &varname)
same as MPI_Send
void partitionParallelSplit(const WeightType *const localWeights, const IdType noLocalWeights, const IdType localWeightOffset, const IdType noDomains, const IdType domainId, const MPI_Comm mpiComm, IdType *const offsets)
WeightType optimalPartitioningSerial(const WeightType *const weights, const IdType noWeights, const IdType noPartitions, IdType *const offsets)
Serial algorithm to find the optimal (not ideal) partitioning with given workloads based on a probing...
void heuristicPartitioningParallel(const WeightType *const localWeights, const IdType noLocalWeights, const MLong localOffset, const MLong noGlobalWeights, const WeightType totalWorkload, const IdType noGlobalPartitions, const IdType globalPartitionId, const IdType noGroups, const MPI_Comm comm, MPI_Comm &commGroup, MPI_Comm &commMasters, IdType &groupId, IdType &groupLocalId, MLong *const groupOffsets)
Fully parallel partitioning into <noGroups> partitions based on a heuristical approach.
void exchangeData(const MInt noNghbrDomains, const MInt *const nghbrDomains, const MInt *const noHaloCells, const MInt **const, const MInt *const noWindowCells, const MInt **const windowCells, const MPI_Comm comm, const U *const data, U *const haloBuffer, const MInt noDat=1)
Generic exchange of data.
void reverseExchangeData(const MInt noNghbrDomains, const MInt *const nghbrDomains, const MInt *const noHaloCells, const MInt **const haloCells, const MInt *const noWindowCells, const MInt **const, const MPI_Comm comm, const U *const data, U *const windowBuffer, const MInt noDat=1)
Generic reverse exchange of data.
Namespace for auxiliary functions/classes.
PARALLELIO_DEFAULT_BACKEND ParallelIo
MFloat dist(const Point< DIM > &p, const Point< DIM > &q)
void logDuration_(const MFloat timeStart, const MString module, const MString comment, const MPI_Comm comm, const MInt domainId, const MInt noDomains)
Output the min/max/average duration of a code section over the ranks in a communicator Note: only use...