7#ifndef GRIDCONTROLLER_H_
8#define GRIDCONTROLLER_H_
96 std::vector<std::unique_ptr<Coupling>>* couplers);
155 MLong*
const globalIdOffsets);
164 const MFloat domainWeight,
const MFloat*
const weights);
167 const MLong*
const newPartitionCellOffsets,
168 MLong*
const globalOffsets);
171 MInt*
const noCellsToSendByDomain,
172 MInt*
const noCellsToReceiveByDomain,
173 MInt*
const sortedCellId,
174 MInt*
const bufferIdToCellId);
185 const MInt*
const bufferIdToCellId, std::vector<std::vector<MInt>>& sendSizeVector,
186 std::vector<std::vector<MInt>>& recvSizeVector);
189 std::vector<MInt>& recvSizeVector,
const MInt*
const bufferIdToCellId,
const MInt noCells,
190 std::vector<MInt*>& intDataRecv, std::vector<MLong*>& longDataRecv,
191 std::vector<MFloat*>& floatDataRecv, std::vector<MInt>& dataTypes);
193 void setDataDlb(
const MInt solverId,
const MInt mode, std::vector<MInt*>& intDataRecv,
194 std::vector<MLong*>& longDataRecv, std::vector<MFloat*>& floatDataRecv, std::vector<MInt>& dataTypes,
195 const MBool freeMemory);
346 std::vector<std::function<void(
const MInt)>> vec(
noSolvers());
353 std::vector<std::function<void(
const MInt)>> vec(
noSolvers());
370 std::placeholders::_3);
375 std::vector<std::function<void()>> vec(
noSolvers());
382 std::vector<std::function<void(
const MInt)>> vec(
noSolvers());
399 std::vector<std::unique_ptr<Coupling>>* couplers)
402 m_refineCellSolver(refineCellVec()),
403 m_removeChildsSolver(removeChildsVec()),
404 m_swapProxySolver(swapProxyVec()),
405 m_cellOutside(cellOutsideVec()),
406 m_resizeGridMapSolver(resizeGridMapVec()),
407 m_removeCellSolver(removeCellVec()),
408 m_couplers(couplers) {
503 m_outputDir = Context::getBasicProperty<MString>(
"outputDir", AT_);
507 for(
MInt i = 0; i < maxNoCell; i++) {
522 m_loadBalancingInterval = 0;
524 if(domainId() == 0) {
525 std::cerr <<
"Property 'onlineRestartInterval' is deprecated, please rename it to "
526 "'loadBalancingInterval'."
529 m_loadBalancingInterval = Context::getBasicProperty<MInt>(
"onlineRestartInterval", AT_, &m_loadBalancingInterval);
544 m_loadBalancingInterval = Context::getBasicProperty<MInt>(
"loadBalancingInterval", AT_, &m_loadBalancingInterval);
559 m_balance = (m_loadBalancingInterval > 0);
560 m_balance = Context::getBasicProperty<MBool>(
"balance", AT_, &m_balance);
566 m_forceBalance = Context::getBasicProperty<MInt>(
"forceBalance", AT_, &m_forceBalance);
579 m_balanceAfterAdaptation = (m_adaptation && m_balance);
580 m_balanceAfterAdaptation = Context::getBasicProperty<MBool>(
"balanceAfterAdaptation", AT_, &m_balanceAfterAdaptation);
590 m_balanceAdaptationInterval = 1;
591 m_balanceAdaptationInterval =
592 Context::getBasicProperty<MInt>(
"balanceAfterAdaptationInterval", AT_, &m_balanceAdaptationInterval);
594 m_loadBalancingOffset = 0;
595 m_loadBalancingOffset = Context::getBasicProperty<MInt>(
"loadBalancingOffset", AT_, &m_loadBalancingOffset);
597 const MBool balanceOnlyAfterAdapt = (m_balanceAfterAdaptation && m_loadBalancingInterval <= 0);
598 if(balanceOnlyAfterAdapt) {
599 m_loadBalancingInterval = std::numeric_limits<MInt>::max();
601 m_balance = m_balance || m_balanceAfterAdaptation;
606 m_performanceOutput =
true;
607 m_performanceOutput = Context::getBasicProperty<MBool>(
"performanceOutput", AT_, &m_performanceOutput);
608 m_loadBalancingMode = 1;
612 interval = interval * 5;
615 m_loadBalancingInterval = std::max(interval, 10);
617 if(!m_performanceOutput) {
619 m_log <<
"Performance output: disabled." << std::endl;
623 m_log <<
"Performance output: enabled - every " << m_loadBalancingInterval <<
" time steps." << std::endl;
625 m_log <<
"Dynamic load balancing disabled." << std::endl;
642 m_loadBalancingMode = 0;
643 m_loadBalancingMode = Context::getBasicProperty<MInt>(
"loadBalancingMode", AT_, &m_loadBalancingMode);
658 m_loadBalancingTimerMode = 0;
659 m_loadBalancingTimerMode = Context::getBasicProperty<MInt>(
"loadBalancingTimerMode", AT_, &m_loadBalancingTimerMode);
673 m_loadBalancingStartTimeStep = 0;
674 m_loadBalancingStartTimeStep =
675 Context::getBasicProperty<MInt>(
"loadBalancingStartTimeStep", AT_, &m_loadBalancingStartTimeStep);
690 m_loadBalancingStopTimeStep = std::numeric_limits<MInt>::max();
691 m_loadBalancingStopTimeStep =
692 Context::getBasicProperty<MInt>(
"loadBalancingStopTimeStep", AT_, &m_loadBalancingStopTimeStep);
707 m_loadBalancingTimerStartOffset = std::floor(0.2 * m_loadBalancingInterval);
710 if(balanceOnlyAfterAdapt) {
711 if(m_loadBalancingOffset == 0) {
712 m_loadBalancingTimerStartOffset = std::floor(0.2 * m_adaptationInterval);
714 m_loadBalancingTimerStartOffset = std::floor(0.2 * m_loadBalancingOffset);
717 m_loadBalancingTimerStartOffset =
718 Context::getBasicProperty<MInt>(
"loadBalancingTimerStartOffset", AT_, &m_loadBalancingTimerStartOffset);
721 if(m_loadBalancingTimerStartOffset >= m_loadBalancingInterval) {
722 TERMM(1,
"DLB timerStartOffset = " + std::to_string(m_loadBalancingTimerStartOffset)
723 +
" must be smaller than dlb-interval = " + std::to_string(m_loadBalancingInterval) +
".");
740 m_forceLoadBalancing = Context::getBasicProperty<MBool>(
"forceLoadBalancing", AT_, &m_forceLoadBalancing);
755 m_testDynamicLoadBalancing =
false;
756 m_testDynamicLoadBalancing =
757 Context::getBasicProperty<MBool>(
"testDynamicLoadBalancing", AT_, &m_testDynamicLoadBalancing);
767 m_testUpdatePartitionCells =
false;
768 m_testUpdatePartitionCells =
769 Context::getBasicProperty<MBool>(
"testUpdatePartitionCells", AT_, &m_testUpdatePartitionCells);
771 if(m_testUpdatePartitionCells) {
780 m_testUpdatePartCellsOffspringThreshold = 10;
781 m_testUpdatePartCellsOffspringThreshold = Context::getBasicProperty<MInt>(
782 "testUpdatePartCellsOffspringThreshold", AT_, &m_testUpdatePartCellsOffspringThreshold);
792 m_testUpdatePartCellsWorkloadThreshold = 100.0;
793 m_testUpdatePartCellsWorkloadThreshold = Context::getBasicProperty<MFloat>(
794 "testUpdatePartCellsWorkloadThreshold", AT_, &m_testUpdatePartCellsWorkloadThreshold);
796 m_log <<
"Testing partition cell update before DLB: offspringThreshold = "
797 << m_testUpdatePartCellsOffspringThreshold
798 <<
"; workloadThreshold = " << m_testUpdatePartCellsWorkloadThreshold << std::endl;
814 m_debugBalance =
false;
815 m_debugBalance = Context::getBasicProperty<MBool>(
"debugBalance", AT_, &m_debugBalance);
817 if(m_testDynamicLoadBalancing) {
819 m_forceLoadBalancing =
true;
820 m_debugBalance =
true;
836 m_outputDlbTimings =
false;
837 m_outputDlbTimings = Context::getBasicProperty<MBool>(
"outputDlbTimings", AT_, &m_outputDlbTimings);
839 m_log <<
"Dynamic load balancing activated (mode = " << m_loadBalancingMode
840 <<
", interval = " << m_loadBalancingInterval <<
", startTimeStep = " << m_loadBalancingStartTimeStep
841 <<
", stopTimeStep = " << m_loadBalancingStopTimeStep
842 <<
", timerStartOffset = " << m_loadBalancingTimerStartOffset <<
", force = " << m_forceLoadBalancing
843 <<
", test = " << m_testDynamicLoadBalancing <<
")" << std::endl;
862 const MString dlbPartitionMethod = Context::getBasicProperty<MString>(
"dlbPartitionMethod", AT_);
863 m_dlbPartitionMethod =
string2enum(dlbPartitionMethod);
867 if(m_testDynamicLoadBalancing) {
879 m_dlbUpdatePartitionCells =
false;
880 m_dlbUpdatePartitionCells =
881 Context::getBasicProperty<MBool>(
"dlbUpdatePartitionCells", AT_, &m_dlbUpdatePartitionCells);
883 m_useDomainFactor =
false;
884 m_useDomainFactor = Context::getBasicProperty<MBool>(
"dlbPartitionDomainFactor", AT_, &m_useDomainFactor);
886 m_dlbSmoothGlobalShifts =
true;
887 m_dlbSmoothGlobalShifts = Context::getBasicProperty<MBool>(
"dlbSmoothGlobalShifts", AT_, &m_dlbSmoothGlobalShifts);
889 m_dlbNoLocalShifts = 0;
890 m_dlbNoLocalShifts = Context::getBasicProperty<MInt>(
"dlbNoLocalShifts", AT_, &m_dlbNoLocalShifts);
892 m_dlbNoFinalLocalShifts = 0;
893 m_dlbNoFinalLocalShifts = Context::getBasicProperty<MInt>(
"dlbNoFinalLocalShifts", AT_, &m_dlbNoFinalLocalShifts);
896 m_dlbMaxWorkloadLimit = 1.5;
897 m_dlbMaxWorkloadLimit = Context::getBasicProperty<MFloat>(
"dlbMaxWorkloadLimit", AT_, &m_dlbMaxWorkloadLimit);
898 if(m_dlbMaxWorkloadLimit > 0.0 && m_dlbMaxWorkloadLimit < 1.0) {
899 TERMM(1,
"DLB: maximum workload limit needs to be > 1.0, is " + std::to_string(m_dlbMaxWorkloadLimit)
900 +
"; or set to <= 0.0 to disable this feature.");
919 m_dlbImbalanceThreshold = 0.05;
920 m_dlbImbalanceThreshold = Context::getBasicProperty<MFloat>(
"dlbImbalanceThreshold", AT_, &m_dlbImbalanceThreshold);
933 m_maxPerformanceVarThreshold = 0.15;
934 m_maxPerformanceVarThreshold =
935 Context::getBasicProperty<MFloat>(
"maxPerformanceVarThreshold", AT_, &m_maxPerformanceVarThreshold);
937 m_lastLoadBalancingTimeStep = m_loadBalancingStartTimeStep;
939 if(m_loadBalancingMode == 1) {
940 m_log <<
"Dynamic load balancing: using partition method #" << m_dlbPartitionMethod
941 <<
"; imbalance percentage threshold: " << m_dlbImbalanceThreshold * 100.0
942 <<
"%; maxPerfVarThreshold: " << m_maxPerformanceVarThreshold << std::endl;
943 m_log <<
"DLB settings: domainFactor=" << m_useDomainFactor <<
"; noLocalShifts=" << m_dlbNoLocalShifts
944 <<
"; noFinalLocalShifts=" << m_dlbNoFinalLocalShifts <<
"; maxWorkloadLimit=" << m_dlbMaxWorkloadLimit
945 <<
"; smooth shifts=" << m_dlbSmoothGlobalShifts << std::endl;
948 m_dlbRestartWeights =
false;
950 m_dlbRestartWeights =
true;
952 m_dlbRestartWeights = Context::getBasicProperty<MBool>(
"dlbRestartWeights", AT_, &m_dlbRestartWeights);
954 m_dlbStaticWeights =
nullptr;
958 m_dlbStaticWeightMode = -1;
960 m_dlbStaticWeightMode = 0;
962 cerr0 <<
"Dlb static Load size does not match!" << globalNoLoadTypes() <<
" "
965 mAlloc(m_dlbStaticWeights, globalNoLoadTypes(),
"m_dlbStaticWeight", AT_);
966 for(
MInt i = 0; i < globalNoLoadTypes(); i++) {
967 m_dlbStaticWeights[i] = Context::getBasicProperty<MFloat>(
"dlbStaticWeights", AT_, i);
970 m_dlbStaticWeightMode = Context::getBasicProperty<MInt>(
"dlbStaticWeightMode", AT_, &m_dlbStaticWeightMode);
978 NEW_TIMER_GROUP_NOCREATE(m_timerGroup,
"GridController (noSolvers = " + std::to_string(
noSolvers()) +
")");
980 NEW_TIMER_NOCREATE(m_timers[
Timers::Controller],
"total object lifetime", m_timerGroup);
997 const MInt noSolversAndCouplers =
noSolvers() + noCouplers();
998 m_solverTimerGroups.resize(noSolversAndCouplers);
999 m_solverTimers.resize(noSolversAndCouplers);
1001 for(
MInt b = 0;
b < noSolversAndCouplers;
b++) {
1002 m_solverTimerGroups[
b] = -1;
1003 m_solverTimers[
b].fill(-1);
1006 const MString groupName = (isSolver) ?
"solverId = " + std::to_string(solver(
b).solverId())
1007 :
"couplerId = " + std::to_string(coupler(
b -
noSolvers()).couplerId());
1009 NEW_TIMER_GROUP_NOCREATE(m_solverTimerGroups[
b],
"GridController: " + groupName);
1014 NEW_SUB_TIMER_NOCREATE(m_solverTimers[
b][
SolverTimers::DLB],
"Dynamic load balancing", solverTimer);
1059 if(!m_balance || noDomains() == 1) {
1064 gridb().m_wasBalanced =
false;
1066 std::vector<std::pair<MFloat, MString>> durations{};
1067 auto logDuration = [&durations](
const MFloat time,
const MString comment,
const MBool fromTimer =
false) {
1069 durations.push_back(std::make_pair(duration, comment));
1078 if(noDlbTimers == 0 && m_loadBalancingMode == 1 && !m_testDynamicLoadBalancing) {
1079 std::cerr <<
"There are no DLB timers, but loadBalancingMode is 1 and testing is off; "
1080 "switching to loadBalancingMode = 0."
1082 m_loadBalancingMode = 0;
1086 MFloat localRunTime = 0.0;
1087 MFloat localIdleTime = 0.0;
1089 for(
MInt i = 0; i < noDlbTimers; i++) {
1093 if(m_loadBalancingMode == 1 && !m_testDynamicLoadBalancing && (loadRecord < 0.0 || idleRecord < 0.0)) {
1094 TERMM(1,
"Load/Idle record for dlb timer #" + std::to_string(i) +
" is less than zero on global domain #"
1095 + std::to_string(domainId()) +
": " + std::to_string(loadRecord) +
", "
1096 + std::to_string(idleRecord));
1099 localRunTime += loadRecord;
1100 localIdleTime += idleRecord;
1106 const MFloat localRunTimeLastStep = localRunTime - m_dlbPreviousLocalRunTime;
1107 const MFloat localIdleTimeLastStep = localIdleTime - m_dlbPreviousLocalIdleTime;
1110 m_dlbPreviousLocalRunTime = localRunTime;
1111 m_dlbPreviousLocalIdleTime = localIdleTime;
1114 m_dlbTimings.push_back(localRunTimeLastStep);
1117 if(m_outputDlbTimings) {
1118 m_dlbTimeStepsAll.push_back(timeStep);
1119 m_dlbRunTimeAll.push_back(localRunTimeLastStep);
1120 m_dlbIdleTimeAll.push_back(localIdleTimeLastStep);
1130 MBool resetTimeStep = (m_lastLoadBalancingTimeStep + m_loadBalancingTimerStartOffset == timeStep
1131 || timeStep == m_loadBalancingStartTimeStep + m_loadBalancingTimerStartOffset);
1134 if(m_balanceAfterAdaptation) {
1136 resetTimeStep = resetTimeStep || (timeStep - m_lastAdaptationTimeStep == m_loadBalancingTimerStartOffset);
1138 resetTimeStep = resetTimeStep || (timeStep - m_lastAdaptationTimeStep == 0);
1141 MBool dlbTimeStep = ((timeStep - m_loadBalancingOffset) % m_loadBalancingInterval == 0);
1142 if(m_balanceAfterAdaptation) {
1143 dlbTimeStep = dlbTimeStep
1144 || (((timeStep - m_lastAdaptationTimeStep) == m_loadBalancingOffset)
1145 && (m_nAdaptationsSinceBalance >= m_balanceAdaptationInterval || m_dlbStep < 2)
1146 && (m_loadBalancingStopTimeStep < 0 ||
globalTimeStep < m_loadBalancingStopTimeStep));
1149 if(resetTimeStep && !dlbTimeStep) {
1159 if(!force && !finalTimeStep && (m_loadBalancingInterval <= 0 || !dlbTimeStep)) {
1165 const MBool initialBalance = (timeStep == -1 && force);
1166 const MBool initialAdaptationOnly = (timeStep == -1 && m_performanceOutput);
1167 const MBool afterLastDlbStep = (timeStep > m_loadBalancingStopTimeStep && m_loadBalancingStopTimeStep != -1);
1168 const MBool beforeFirstDlbStep = (timeStep <= m_loadBalancingStartTimeStep);
1170 const MBool performanceOutput =
1171 (m_performanceOutput || finalTimeStep || afterLastDlbStep || beforeFirstDlbStep) && !initialBalance;
1174 if(!dlbTimeStep && (!initialBalance || initialAdaptationOnly)) {
1176 if((!force && !finalTimeStep) || initialAdaptationOnly) {
1182 if(!performanceOutput) {
1183 m_log <<
"Dynamic load balancing at time step " << timeStep << std::endl;
1184 cerr0 <<
"=== Dynamic load balancing at time step " << timeStep << std::endl;
1187 logTimerStatistics(
"before balance");
1190 const MBool lastDlbStep =
1191 (timeStep + m_loadBalancingInterval > m_loadBalancingStopTimeStep && m_loadBalancingStopTimeStep != -1)
1192 && !m_balanceAfterAdaptation;
1195 const MInt isDlbRevertStep =
1196 (m_loadBalancingMode == 1 && !lastDlbStep && m_loadBalancingStopTimeStep != -1 && m_dlbNoFinalLocalShifts > 0
1197 && timeStep == m_loadBalancingStopTimeStep - m_loadBalancingInterval * (m_dlbNoFinalLocalShifts + 1));
1202 MInt noTimeSteps = timeStep - m_dlbLastResetTimeStep;
1203 if(timeStep < 0) noTimeSteps = 0;
1205 if(m_loadBalancingMode == 1) {
1206 ASSERT(noTimeSteps > 0,
"ERROR: noTimeSteps = " + std::to_string(noTimeSteps));
1209 if(noTimeSteps != (
MInt)m_dlbTimings.size()) {
1210 m_log <<
"DLB: Number of timings does not match: " + std::to_string(m_dlbTimings.size()) +
" "
1211 + std::to_string(noTimeSteps) +
" (beforeFirstStep = " + std::to_string(beforeFirstDlbStep)
1212 +
"; afterLastDlbStep = " + std::to_string(afterLastDlbStep) +
")"
1214 std::cerr <<
"WARNING: number of timings mismatch! " << noTimeSteps <<
" " << m_dlbTimings.size() << std::endl;
1215 noTimeSteps = m_dlbTimings.size();
1219 std::vector<MFloat> timings(m_dlbTimings.begin(), m_dlbTimings.end());
1221 std::sort(timings.begin(), timings.end());
1222 const MInt noSamples = timings.size();
1223 if(noSamples < 1 && !initialBalance) {
1224 TERMM(1,
"ERROR: number of samples is < 1");
1227 const MBool truncatedMean = !m_balanceAfterAdaptation;
1229 const MFloat trim = 0.25;
1230 const MInt lowerBound = std::floor(trim * noSamples);
1231 const MInt upperBound = std::max(
MInt(std::floor((1 - trim) * noSamples)), std::min(noSamples, 1));
1232 const MInt noSamplesTruncated = upperBound - lowerBound;
1234 if(m_loadBalancingMode == 1 && noSamplesTruncated < 1 && !initialBalance) {
1235 std::cerr <<
"ERROR no samples truncated " << noSamplesTruncated << std::endl;
1239 const MFloat truncatedMeanRunTime =
1240 std::accumulate(&timings[lowerBound], &timings[upperBound], 0.0) / noSamplesTruncated;
1241 const MFloat meanRunTime = std::accumulate(timings.begin(), timings.end(), 0.0) / noSamples;
1242 const MFloat meanRunTimeTrigger = (truncatedMean) ? truncatedMeanRunTime : meanRunTime;
1248 MFloat timePerStep = (localRunTime + localIdleTime) / noTimeSteps;
1249 MFloat maxRunTime = localRunTime / noTimeSteps;
1250 const MFloat localTimePerStep = timePerStep;
1252 MPI_Allreduce(MPI_IN_PLACE, &timePerStep, 1, MPI_DOUBLE, MPI_MAX, gridb().mpiComm(), AT_,
"MPI_IN_PLACE",
1254 MPI_Allreduce(MPI_IN_PLACE, &maxRunTime, 1, MPI_DOUBLE, MPI_MAX, gridb().mpiComm(), AT_,
"MPI_IN_PLACE",
1257 MFloat maxIdleTime = localIdleTime / noTimeSteps;
1258 MFloat minIdleTime = localIdleTime / noTimeSteps;
1259 MPI_Allreduce(MPI_IN_PLACE, &maxIdleTime, 1, MPI_DOUBLE, MPI_MAX, gridb().mpiComm(), AT_,
"MPI_IN_PLACE",
1261 MPI_Allreduce(MPI_IN_PLACE, &minIdleTime, 1, MPI_DOUBLE, MPI_MIN, gridb().mpiComm(), AT_,
"MPI_IN_PLACE",
1264 const MFloat minIdleTimeRel = minIdleTime / timePerStep;
1266 m_log << timeStep <<
" * Average time per step: " << timePerStep <<
" ; local: " << localTimePerStep
1267 <<
", idle/comp = " << localIdleTime / localRunTime
1268 <<
", idle/timePerStep = " << localIdleTime / (noTimeSteps * timePerStep) << std::endl;
1269 m_log << timeStep <<
" * Relative idle time: max = " << maxIdleTime / timePerStep <<
", min "
1270 << minIdleTime / timePerStep << std::endl;
1271 m_log << timeStep <<
" * maxRunTime " << maxRunTime << std::endl;
1273 logDuration(dlbStartTime,
"DLB preparation/timers");
1279 MBool loadBalance = initialBalance;
1281 loadBalance = needLoadBalancing(meanRunTimeTrigger, &loads[0], imbalance) || force || (m_forceBalance > 0);
1283 if(m_forceBalance == 1) {
1287 logDuration(imbalanceStartTime,
"Imbalance evaluation");
1291 MFloat localRunTimeVariance = 0.0;
1292 for(
MInt i = 0; i < noSamples; i++) {
1293 localRunTimeVariance +=
POW2(timings[i] - meanRunTime);
1295 localRunTimeVariance /= noSamples;
1296 const MFloat localRunTimeStdev = std::sqrt(localRunTimeVariance);
1298 const MFloat performanceVariation = localRunTimeStdev / meanRunTime;
1300 perfVarMax[0] = performanceVariation;
1302 perfVarMax[1] = (loads[domainId()] > 0.85) ? performanceVariation : 0.0;
1305 "MPI_IN_PLACE",
"perfVarMax");
1306 const MFloat maxPerformanceVariation = perfVarMax[1];
1308 const MBool performanceVariationCheck = (maxPerformanceVariation > m_maxPerformanceVarThreshold && !m_adaptation);
1310 m_log << timeStep <<
" * DLB: Performance variation: load>0.85 " << maxPerformanceVariation <<
"; all "
1311 << perfVarMax[0] << std::endl;
1313 m_log << timeStep <<
" * Average timings: avgRunTime = " << localRunTime / noTimeSteps
1314 <<
", truncated = " << truncatedMean <<
" " << truncatedMeanRunTime <<
", diff "
1315 << meanRunTime - truncatedMeanRunTime <<
", noTimeSteps = " << noTimeSteps << std::endl;
1317 m_log << timeStep <<
" * timePerStep = " << timePerStep <<
"; imbalance = " << imbalance
1318 <<
"; maxRunTime = " << maxRunTime <<
"; minIdleTimeRel = " << minIdleTimeRel << std::endl;
1320 if(m_loadBalancingMode == 1 && performanceVariationCheck && !lastDlbStep && !performanceOutput
1321 && !m_testDynamicLoadBalancing) {
1326 std::stringstream perfMessage;
1327 perfMessage <<
"DLB: Performance variation " << maxPerformanceVariation <<
", skip DLB step." << std::endl;
1328 m_log << perfMessage.str();
1329 if(domainId() == 0) {
1330 std::cout << perfMessage.str();
1335 m_lastLoadBalancingTimeStep = timeStep;
1336 m_nAdaptationsSinceBalance = 0;
1341 if(m_loadBalancingMode == 0) {
1342 m_lastLoadBalancingTimeStep = timeStep;
1343 m_nAdaptationsSinceBalance = 0;
1350 MBool newBestTimePerStep =
false;
1351 if(m_loadBalancingMode == 1 && !performanceOutput && !performanceVariationCheck && !m_testDynamicLoadBalancing
1352 && ((timePerStep < m_timePerStepTotal && imbalance < 1.05 * m_imbalance)
1353 || (imbalance < m_imbalance && timePerStep < 1.05 * m_timePerStepTotal))) {
1354 if(m_optPartitionCellOffsetTotal == -1) {
1355 m_timePerStepTotal = -1.0;
1358 m_log << timeStep <<
" * Storing new best domain partitioning: timePerStep = " << timePerStep <<
" ("
1359 << m_timePerStepTotal <<
"); imbalance = " << imbalance <<
" (" << m_imbalance <<
"), maxRunTime "
1360 << maxRunTime <<
" minIdleTimeRel " << minIdleTimeRel << std::endl;
1361 newBestTimePerStep =
true;
1362 m_timePerStepTotal = timePerStep;
1363 m_imbalance = imbalance;
1364 m_optPartitionCellOffsetTotal = gridb().m_localPartitionCellOffsets[0];
1371 std::stringstream partitionFileName;
1372 partitionFileName <<
"partition_n" << noDomains();
1373 gridb().savePartitionFile(partitionFileName.str(), m_optPartitionCellOffsetTotal);
1375 }
else if(m_testDynamicLoadBalancing && finalTimeStep) {
1377 std::stringstream partitionFileName;
1378 partitionFileName <<
"partition_n" << noDomains();
1379 gridb().savePartitionFile(partitionFileName.str(), gridb().m_localPartitionCellOffsets[0]);
1383 if(!loadBalance && !m_forceLoadBalancing && (isDlbRevertStep == 0)) {
1384 m_log <<
" * no load imbalance detected at timestep " << timeStep <<
"!" << std::endl;
1387 if(!(lastDlbStep && !newBestTimePerStep)) {
1392 if(!performanceOutput) {
1393 m_log <<
" * load imbalance detected at timestep " << timeStep <<
"!" << std::endl;
1398 if(performanceOutput) {
1401 m_log <<
"Performance evaluation: clear timings at timestep " << timeStep <<
"!" << std::endl;
1410 printDomainStatistics(
"before load balancing");
1413 MInt backupLevel = -1;
1414 if(gridb().m_newMinLevel > 0) {
1415 backupLevel = gridb().m_newMinLevel;
1416 gridb().m_newMinLevel = -1;
1419 gridb().computeGlobalIds();
1420 gridb().storeMinLevelCells();
1429 solver(i).cancelMpiRequests();
1436 MBool partitionCellChange =
false;
1444 if(m_dlbUpdatePartitionCells && !lastDlbStep) {
1445 MInt offspringThreshold = gridb().m_partitionCellOffspringThreshold;
1446 MFloat workloadThreshold = gridb().m_partitionCellWorkloadThreshold;
1449 if(m_testUpdatePartitionCells && m_dlbStep % 2 == 0) {
1450 offspringThreshold = m_testUpdatePartCellsOffspringThreshold;
1451 workloadThreshold = m_testUpdatePartCellsWorkloadThreshold;
1455 partitionCellChange = gridb().updatePartitionCells(offspringThreshold, workloadThreshold);
1457 if(partitionCellChange) {
1459 m_saveGridNewPartitionCells =
true;
1462 m_optPartitionCellOffsetTotal = -1;
1465 logDuration(updatePartCellsStartTime,
"Update partition cells");
1468 MLongScratchSpace partitionCellOffsets(noDomains() + 1, AT_,
"partitionCellOffsets");
1473 if(m_loadBalancingMode == 0 || timeStep == -1) {
1475 accumulateCellWeights();
1478 partition(&partitionCellOffsets[0], &globalIdOffsets[0],
false);
1488 MBool newPartition =
false;
1490 if((!lastDlbStep || m_testDynamicLoadBalancing) && ((isDlbRevertStep == 0) || newBestTimePerStep)) {
1492 newPartition = loadBalancingPartition(&loads[0], imbalance, &partitionCellOffsets[0], &globalIdOffsets[0]);
1495 if(((!newBestTimePerStep && !m_adaptation) || (isDlbRevertStep != 0)) && m_optPartitionCellOffsetTotal != -1) {
1496 m_log << timeStep <<
" * DLB reverting to best configuration." << std::endl;
1501 MLongScratchSpace localPartitionCellOffsets(noDomains() + 1, AT_,
"localPartitionCellOffsets");
1504 "gridb().m_localPartitionCellOffsets[0]",
"localPartitionCellOffsets[0]");
1505 localPartitionCellOffsets[noDomains()] = gridb().m_noPartitionCellsGlobal;
1510 "partitionCellOffsets[0]");
1511 partitionCellOffsets[noDomains()] = gridb().m_noPartitionCellsGlobal;
1515 (std::mismatch(partitionCellOffsets.
begin(), partitionCellOffsets.
end(), &localPartitionCellOffsets[0]))
1517 != partitionCellOffsets.
end();
1519 loadBalancingCalcNewGlobalOffsets(&localPartitionCellOffsets[0], &partitionCellOffsets[0], &globalIdOffsets[0]);
1524 newPartition =
false;
1531 if(!newPartition && !partitionCellChange && !m_forceLoadBalancing) {
1532 m_log <<
"Dynamic load balancing: load imbalance detected but partition did not change at "
1534 << timeStep <<
"!" << std::endl;
1539 logDuration(partitionStartTime,
"New partitioning");
1555 solver(i).resetSolver();
1561 communicateGlobalSolverVars(&solver(i));
1566 logDuration(resetStartTime,
" Reset solver #" + std::to_string(i));
1570 gridb().deletePeriodicConnection(
false);
1572 std::vector<MInt>().swap(gridb().m_minLevelCells);
1574 gridb().localToGlobalIds();
1576 const MInt oldNoCells = gridb().treeb().size();
1578 MIntScratchSpace noCellsToReceiveByDomain(noDomains() + 1, AT_,
"noCellsToReceiveByDomain");
1579 MIntScratchSpace noCellsToSendByDomain(noDomains() + 1, AT_,
"noCellsToSendByDomain");
1584 bufferIdToCellId.
fill(-1);
1586 loadBalancingCalcNoCellsToSend(&globalIdOffsets[0], &noCellsToSendByDomain[0], &noCellsToReceiveByDomain[0],
1587 &sortedCellId[0], &bufferIdToCellId[0]);
1588 logDuration(prepareGridStartTime,
" Prepare grid");
1590 std::vector<std::vector<MInt>> dataSendSize{};
1591 std::vector<std::vector<MInt>> dataRecvSize{};
1600 determineDataSizesDlb(i, 0, &noCellsToSendByDomain[0], &bufferIdToCellId[0], dataSendSize, dataRecvSize);
1605 solver(i).localToGlobalIds();
1610 logDuration(dataStartTime,
" Data sizes solver #" + std::to_string(i));
1614 std::vector<std::vector<MInt>> dataSendSizeCoupler{};
1615 std::vector<std::vector<MInt>> dataRecvSizeCoupler{};
1618 for(
MInt i = 0; i < noCouplers(); i++) {
1625 determineDataSizesDlb(i, 1, &noCellsToSendByDomain[0], &bufferIdToCellId[0], dataSendSizeCoupler,
1626 dataRecvSizeCoupler);
1631 logDuration(dataStartTime,
" Data sizes coupler #" + std::to_string(i));
1636 logDuration(prepareStartTime,
"Prepare balancing");
1641 gridb().balance(&noCellsToReceiveByDomain[0], &noCellsToSendByDomain[0], &sortedCellId[0], &partitionCellOffsets[0],
1642 &globalIdOffsets[0]);
1644 logDuration(balanceGridStartTime,
"Balance grid");
1656 if(!solver(i).hasSplitBalancing()) {
1657 solver(i).balance(&noCellsToReceiveByDomain[0], &noCellsToSendByDomain[0], &sortedCellId[0], oldNoCells);
1660 std::vector<MInt*> intDataRecv{};
1661 std::vector<MLong*> longDataRecv{};
1662 std::vector<MFloat*> floatDataRecv{};
1663 std::vector<MInt> dataTypes{};
1668 redistributeDataDlb(i, 0, dataSendSize[i], dataRecvSize[i], &bufferIdToCellId[0], oldNoCells, intDataRecv,
1669 longDataRecv, floatDataRecv, dataTypes);
1672 "Redistribute solver #" + std::to_string(i),
true);
1677 solver(i).balancePre();
1683 setDataDlb(i, 0, intDataRecv, longDataRecv, floatDataRecv, dataTypes,
false);
1685 logDuration(RETURN_TIMER(m_solverTimers[i][
SolverTimers::SetData]),
"SetData1 solver #" + std::to_string(i),
1690 solver(i).globalToLocalIds();
1695 solver(i).balancePost();
1698 "BalancePost solver #" + std::to_string(i),
true);
1702 setDataDlb(i, 0, intDataRecv, longDataRecv, floatDataRecv, dataTypes,
true);
1704 logDuration(RETURN_TIMER(m_solverTimers[i][
SolverTimers::SetData]),
"SetData2 solver #" + std::to_string(i),
1707 intDataRecv.clear();
1708 longDataRecv.clear();
1709 floatDataRecv.clear();
1713 logDuration(balanceSolverStartTime,
"Balance solver #" + std::to_string(i));
1719 logDuration(balanceSolversStartTime,
"Balance solvers");
1724 for(
MInt i = 0; i < noCouplers(); i++) {
1732 std::vector<MInt*> intDataRecv{};
1733 std::vector<MLong*> longDataRecv{};
1734 std::vector<MFloat*> floatDataRecv{};
1735 std::vector<MInt> dataTypes{};
1739 redistributeDataDlb(i, 1, dataSendSizeCoupler[i], dataRecvSizeCoupler[i], &bufferIdToCellId[0], oldNoCells,
1740 intDataRecv, longDataRecv, floatDataRecv, dataTypes);
1745 coupler(i).balancePre();
1749 setDataDlb(i, 1, intDataRecv, longDataRecv, floatDataRecv, dataTypes,
false);
1754 coupler(i).balancePost();
1759 setDataDlb(i, 1, intDataRecv, longDataRecv, floatDataRecv, dataTypes,
true);
1762 intDataRecv.clear();
1763 longDataRecv.clear();
1764 floatDataRecv.clear();
1772 logDuration(balanceCouplersStartTime,
"Balance couplers");
1778 solver(i).finalizeBalance();
1779 for(
MInt j = 0; j < noCouplers(); j++) {
1781 coupler(j).finalizeBalance(i);
1782 logDuration(finalizeCouplerStartTime,
1783 "Finalize balance #" + std::to_string(i) +
" coupler #" + std::to_string(j));
1785 logDuration(finalizeSolverStartTime,
"Finalize balance solver #" + std::to_string(i));
1787 logDuration(finalizeBalanceStartTime,
"Finalize balance total");
1790 printDomainStatistics(
"after load balancing");
1798 m_dlbPreviousLocalRunTime = 0.0;
1799 m_dlbPreviousLocalIdleTime = 0.0;
1804 m_lastLoadBalancingTimeStep = timeStep;
1805 m_nAdaptationsSinceBalance = 0;
1808 if(m_outputDlbTimings) {
1812 logDuration(dlbStartTime,
"Balance total");
1816 std::stringstream dlbMessage;
1817 dlbMessage <<
"=== Dynamic load balancing performed at timestep " << timeStep <<
"! Duration: " << dlbTimeTotal
1818 <<
" s" << std::endl;
1819 m_log << dlbMessage.str();
1820 cerr0 << dlbMessage.str();
1825 if(backupLevel > 0) {
1826 gridb().m_newMinLevel = backupLevel;
1837 MLong* globalIdOffsets,
1838 const MBool onlyPartitionOffsets) {
1840 if(gridb().m_maxPartitionLevelShift > 0) {
1841 cerr0 <<
"WARNING: Load balancing with partition level shifts might not fully work!" << std::endl;
1845 const MLong noPartitionCells = gridb().m_noPartitionCells;
1848 MFloat totalWorkload = 0.0;
1849 MFloatScratchSpace partitionCellsWorkload(std::max(noPartitionCells, 1L), AT_,
"partitionCellsWorkload");
1850 MLongScratchSpace partitionCellsGlobalId(std::max(noPartitionCells, 1L), AT_,
"partitionCellsGlobalId");
1852 gridb().calculateNoOffspringsAndWorkload(
static_cast<Collector<void>*
>(
nullptr), gridb().treeb().size());
1854 for(
MUint i = 0; i < noPartitionCells; i++) {
1855 const MLong globalCellId = gridb().m_localPartitionCellGlobalIds[i];
1856 const MInt cellId = gridb().globalIdToLocalId(globalCellId,
true);
1858 partitionCellsWorkload(i) = gridb().a_workload(cellId);
1859 partitionCellsGlobalId(i) = gridb().a_globalId(cellId);
1860 ASSERT(globalCellId == partitionCellsGlobalId(i),
1861 "global cell id mismatch! " + std::to_string(globalCellId) +
" " + std::to_string(partitionCellsGlobalId(i))
1862 +
" " + std::to_string(cellId));
1864 ASSERT(i == 0 || partitionCellsGlobalId(i) > partitionCellsGlobalId(i - 1),
1865 "Partition cells not sorted by global id: " + std::to_string(partitionCellsGlobalId(i))
1866 +
" <= " + std::to_string(partitionCellsGlobalId(i - 1)));
1867 totalWorkload += gridb().a_workload(cellId);
1870 MPI_Allreduce(MPI_IN_PLACE, &totalWorkload, 1, MPI_DOUBLE, MPI_SUM, gridb().mpiComm(), AT_,
"MPI_IN_PLACE",
1873 MBool calcGlobalOffsets =
false;
1875 if(gridb().m_partitionParallelSplit) {
1877 gridb().m_localPartitionCellOffsets[0],
static_cast<MLong>(noDomains()),
1878 static_cast<MLong>(domainId()), gridb().mpiComm(), &partitionCellOffsets[0]);
1879 partitionCellOffsets[noDomains()] = gridb().m_noPartitionCellsGlobal;
1881 if(!onlyPartitionOffsets) {
1883 calcGlobalOffsets =
true;
1886 gridb().partitionParallel(gridb().m_noPartitionCells, gridb().m_localPartitionCellOffsets[0],
1887 &partitionCellsWorkload[0], &partitionCellsGlobalId[0], totalWorkload,
1888 partitionCellOffsets, globalIdOffsets,
true);
1890 if(!onlyPartitionOffsets && gridb().m_maxPartitionLevelShift > 0) {
1894 calcGlobalOffsets =
true;
1899 if(calcGlobalOffsets) {
1900 MLongScratchSpace localPartitionCellCounts(noDomains(), AT_,
"localPartitionCellCounts");
1901 MLongScratchSpace localPartitionCellOffsets(noDomains() + 1, AT_,
"localPartitionCellOffsets");
1903 gridb().determineNoPartitionCellsAndOffsets(&localPartitionCellCounts[0], &localPartitionCellOffsets[0]);
1906 loadBalancingCalcNewGlobalOffsets(&localPartitionCellOffsets[0], &partitionCellOffsets[0], &globalIdOffsets[0]);
1909 if(domainId() == 0) std::cerr << std::endl;
1922 MLong noGridLeafCells = 0;
1923 minNoSolverCells.
fill(0);
1924 maxNoSolverCells.
fill(0);
1925 avgNoSolverCells.
fill(0);
1926 globalNoSolverCells.
fill(0);
1928 for(
MInt cellId = 0; cellId < gridb().treeb().size(); cellId++) {
1929 if(!gridb().a_hasProperty(cellId, Cell::IsHalo) && gridb().a_noChildren(cellId) == 0) {
1934 MLong globalGridNoLeafCells = noGridLeafCells;
1935 MLong globalGridNoCells = (
MLong)gridb().noInternalCells();
1936 MInt maxGridNoCells = gridb().noInternalCells();
1937 MInt minGridNoCells = gridb().noInternalCells();
1940 minNoSolverCells(i) = solver(i).noInternalCells();
1941 maxNoSolverCells(i) = solver(i).noInternalCells();
1942 globalNoSolverCells(i) = solver(i).noInternalCells();
1945 MPI_Allreduce(MPI_IN_PLACE, &globalGridNoLeafCells, 1, MPI_LONG, MPI_SUM, gridb().mpiComm(), AT_,
"MPI_IN_PLACE",
1946 "globalGridNoLeafCells");
1947 MPI_Allreduce(MPI_IN_PLACE, &globalGridNoCells, 1, MPI_LONG, MPI_SUM, gridb().mpiComm(), AT_,
"MPI_IN_PLACE",
1948 "globalGridNoCells");
1949 MPI_Allreduce(MPI_IN_PLACE, &maxGridNoCells, 1, MPI_INT, MPI_MAX, gridb().mpiComm(), AT_,
"MPI_IN_PLACE",
1951 MPI_Allreduce(MPI_IN_PLACE, &minGridNoCells, 1, MPI_INT, MPI_MIN, gridb().mpiComm(), AT_,
"MPI_IN_PLACE",
1954 "MPI_IN_PLACE",
"maxNoSolverCells[0]");
1956 "MPI_IN_PLACE",
"minNoSolverCells[0]");
1957 MPI_Allreduce(MPI_IN_PLACE, &globalNoSolverCells[0],
noSolvers(), MPI_LONG, MPI_SUM, gridb().mpiComm(), AT_,
1958 "MPI_IN_PLACE",
"globalNoSolverCells[0]");
1961 avgNoSolverCells(i) = (
MInt)(((
MFloat)globalNoSolverCells(i)) / ((
MFloat)noDomains()));
1964 const MInt avgGridNoLeafCells = (
MInt)(((
MFloat)globalGridNoLeafCells) / ((
MFloat)noDomains()));
1966 MInt devNoGridCells = std::abs(gridb().noInternalCells() - avgGridNoCells);
1967 MInt devNoGridLeafCells = std::abs(noGridLeafCells - avgGridNoLeafCells);
1970 MPI_Allreduce(MPI_IN_PLACE, &devNoGridCells, 1, MPI_INT, MPI_MAX, gridb().mpiComm(), AT_,
"MPI_IN_PLACE",
1972 MPI_Allreduce(MPI_IN_PLACE, &devNoGridLeafCells, 1, MPI_INT, MPI_MAX, gridb().mpiComm(), AT_,
"MPI_IN_PLACE",
1973 "devNoGridLeafCells");
1974 if(domainId() == 0) {
1975 std::cerr <<
"Domain statistics ";
1976 if(!status.empty()) std::cerr << status <<
" ";
1977 std::cerr <<
"at global time step " <<
globalTimeStep <<
": avg no cells=" << avgGridNoCells
1978 <<
", min=" << minGridNoCells <<
", max=" << maxGridNoCells <<
", max deviation=" << devNoGridCells
1979 <<
", max deviation leaf=" << devNoGridLeafCells <<
", total=" << globalGridNoCells << std::endl;
1981 std::cerr <<
"Solver-statistics for solver " << i <<
" min= " << minNoSolverCells(i)
1982 <<
" max= " << maxNoSolverCells(i) <<
" total= " << globalNoSolverCells(i)
1983 <<
" avg= " << avgNoSolverCells(i) << std::endl;
1995 if(m_grid ==
nullptr) {
2000 gridb().m_wasAdapted =
false;
2004 && ((m_adaptationInterval <= 0 ||
globalTimeStep % m_adaptationInterval != 0)
2011 cerr0 <<
"=== Initialising solver-adaptation at time-step " <<
globalTimeStep << std::endl;
2013 logTimerStatistics(
"before adaptation");
2021 MBool splitAdaptation =
true;
2023 if(!solver(i).m_splitAdaptation) {
2024 std::cerr <<
"Update Adaptation to splitAdaptation! solver " << i << std::endl;
2025 splitAdaptation =
false;
2032 solver(i).cancelMpiRequests();
2035 if(splitAdaptation) {
2041 solver(i).prepareAdaptation();
2046 for(
MInt i = 0; i < noCouplers(); i++) {
2050 coupler(i).prepareAdaptation();
2057 MInt addedAdaptationSteps = 0;
2058 addedAdaptationSteps = Context::getBasicProperty<MInt>(
"addedAdaptationSteps", AT_, &addedAdaptationSteps);
2059 for(
MInt level = gridb().maxUniformRefinementLevel(); level < gridb().maxRefinementLevel() + addedAdaptationSteps;
2061 const MLong oldLocalCnt = gridb().noInternalCells();
2062 const MLong oldCnt = gridb().noCellsGlobal();
2063 std::vector<std::vector<MFloat>> sensors;
2064 std::vector<std::bitset<CartesianGrid<nDim>::m_maxNoSensors>> sensorCellFlag(gridb().noInternalCells());
2065 std::vector<MFloat> sensorWeight;
2066 std::vector<MInt> sensorSolverId;
2070 solver(i).setSensors(sensors, sensorWeight, sensorCellFlag, sensorSolverId);
2077 MBool saveSensorData =
false;
2079 saveSensorData = saveSensorData || solver(i).m_saveSensorData;
2081 if(saveSensorData) {
2083 std::stringstream gridFileName;
2084 gridFileName <<
"sensorDataGrid_" << std::to_string(level) <<
"_" <<
globalTimeStep << ParallelIo::fileExt();
2085 gridb().saveGrid((m_outputDir + gridFileName.str()).c_str(), m_recalcIds);
2088 if(solver(i).m_saveSensorData) {
2089 solver(i).saveSensorData(sensors, level, gridFileName.str(), m_recalcIds);
2095 gridb().meshAdaptation(sensors, sensorWeight, sensorCellFlag, sensorSolverId, m_refineCellSolver,
2096 m_removeChildsSolver, m_removeCellSolver, m_swapProxySolver, m_cellOutside,
2097 m_resizeGridMapSolver);
2104 solver(i).postAdaptation();
2110 for(
MInt j = 0; j < noCouplers(); j++) {
2114 coupler(j).postAdaptation();
2119 m_log <<
"Mesh adaptation: " << gridb().noCellsGlobal() - oldCnt <<
" cells generated"
2120 <<
" (before: " << oldCnt <<
", now: " << gridb().noCellsGlobal() <<
")." << std::endl;
2122 MBool skipLoop =
false;
2124 if(solver(i).m_singleAdaptation) {
2131 if(globalTimeStep < 0 && m_loadBalancingInterval > 0) {
2132 MInt balanceTrigger = 0;
2133 MInt deltaCells = gridb().noInternalCells() - oldLocalCnt;
2134 if(gridb().noInternalCells() + deltaCells * gridb().m_maxNoChilds > gridb().maxNoCells()) {
2137 MPI_Allreduce(MPI_IN_PLACE, &balanceTrigger, 1, MPI_INT, MPI_MAX, gridb().mpiComm(), AT_,
"MPI_IN_PLACE",
2139 if(balanceTrigger || level == (gridb().maxRefinementLevel() - 1)) {
2140 cerr0 <<
"Performing intermediate load balancing step." << std::endl;
2141#ifdef MAIA_GRID_SANITY_CHECKS
2142 gridb().gridSanityChecks();
2143 gridb().checkWindowHaloConsistency(
true);
2147 const MInt backUp = m_loadBalancingMode;
2148 m_loadBalancingMode = 0;
2149 balance(
true,
false,
true);
2150 m_loadBalancingMode = backUp;
2159 solver(i).finalizeAdaptation();
2160 for(
MInt j = 0; j < noCouplers(); j++) {
2161 coupler(j).finalizeAdaptation(i);
2168 const MLong oldCnt = gridb().noCellsGlobal();
2169 std::vector<std::vector<MFloat>> sensors;
2170 std::vector<std::bitset<CartesianGrid<nDim>::m_maxNoSensors>> sensorCellFlag(gridb().noInternalCells());
2171 std::vector<MFloat> sensorWeight;
2172 std::vector<MInt> sensorSolverId;
2175 std::cerr <<
"preparing adaptation for solver " << i << std::endl;
2176 solver(i).prepareAdaptation(sensors, sensorWeight, sensorCellFlag, sensorSolverId);
2180 gridb().meshAdaptation(sensors, sensorWeight, sensorCellFlag, sensorSolverId, m_refineCellSolver,
2181 m_removeChildsSolver, m_removeCellSolver, m_swapProxySolver, m_cellOutside,
2182 m_resizeGridMapSolver);
2186 solver(i).reinitAfterAdaptation();
2189 m_log <<
"Mesh adaptation: " << gridb().noCellsGlobal() - oldCnt <<
" cells generated"
2190 <<
" (before: " << oldCnt <<
", now: " << gridb().noCellsGlobal() <<
")." << std::endl;
2202 m_nAdaptationsSinceBalance = m_nAdaptationsSinceBalance + 1;
2204 printDomainStatistics(
"after adaptation");
2206 cerr0 <<
"=== Finished adaptation" << std::endl;
2222 const MInt noGridCells = gridb().treeb().size();
2225 for(
MInt gridCellId = 0; gridCellId < noGridCells; gridCellId++) {
2226 gridb().treeb().weight(gridCellId) = 0.0;
2229 if(!m_dlbRestartWeights || m_dlbLastWeights ==
nullptr) {
2239 solverweight.
fill(F0);
2242 if(solver(i).isActive()) {
2243 solver(i).setCellWeights(&solverweight[0]);
2249 for(
MInt gridCellId = 0; gridCellId < noGridCells; gridCellId++) {
2250 const MInt id = gridCellId + noGridCells * i;
2251 gridb().treeb().weight(gridCellId) += solverweight[
id];
2258 MInt weightOffset = 0;
2259 for(
MInt solverId = 0; solverId <
noSolvers(); solverId++) {
2260 if(solver(solverId).isActive()) {
2261 for(
MInt cellId = 0; cellId < noGridCells; cellId++) {
2262 if(gridb().a_hasProperty(cellId, Cell::IsHalo)) {
2265 gridb().a_weight(cellId) += solver(solverId).getCellLoad(cellId, &m_dlbLastWeights[weightOffset]);
2268 weightOffset += solver(solverId).noLoadTypes();
2278 const MBool restoreDefaultWeights) {
2281 const MInt noGridCells = gridb().treeb().size();
2285 const MFloat initWeight = (restoreDefaultWeights) ? 1.0 : 0.0;
2286 for(
MInt gridCellId = 0; gridCellId < noGridCells; gridCellId++) {
2287 gridb().a_weight(gridCellId) = initWeight;
2290 if(!restoreDefaultWeights) {
2292 MInt weightOffset = 0;
2293 for(
MInt solverId = 0; solverId <
noSolvers(); solverId++) {
2294 if(solver(solverId).isActive()) {
2295 for(
MInt cellId = 0; cellId < noGridCells; cellId++) {
2296 if(gridb().a_hasProperty(cellId, Cell::IsHalo))
continue;
2297 gridb().a_weight(cellId) += solver(solverId).getCellLoad(cellId, &weights[weightOffset]);
2300 weightOffset += solver(solverId).noLoadTypes();
2305 gridb().calculateNoOffspringsAndWorkload(
static_cast<Collector<void>*
>(
nullptr), noGridCells);
2316 MBool updateGridPartitionWorkloads =
false;
2317 updateGridPartitionWorkloads =
2318 Context::getBasicProperty<MBool>(
"updateGridPartitionWorkloads", AT_, &updateGridPartitionWorkloads);
2319 if(!updateGridPartitionWorkloads) {
2323 m_log <<
"Updating partition cell workloads and save them to grid... " << std::endl;
2325 MBool restore =
false;
2326 restore = Context::getBasicProperty<MBool>(
"restoreDefaultWorkloads", AT_, &restore);
2329 const MInt noLoadTypes = globalNoLoadTypes();
2330 std::vector<MFloat> weights{};
2332 if(noLoadTypes < 1 || restore) {
2334 m_log <<
"Restoring default weights" << std::endl;
2336 m_log <<
"Using specified solver weights" << std::endl;
2338 getSpecifiedSolverWeights(weights);
2342 updateWeightsAndWorkloads(weights, restore);
2344 gridb().savePartitionCellWorkloadsGridFile();
2345 m_log <<
"done" << std::endl;
2347 if(domainId() == 0) {
2348 std::cout <<
"Updated cell weights. Restart solver with 'updateGridPartitionWorkloads = false'" << std::endl;
2359 const MInt noLoadTypes = globalNoLoadTypes();
2360 weights.resize(noLoadTypes);
2361 std::fill(weights.begin(), weights.end(), 0.0);
2365 const MInt solverCount = solver(i).noLoadTypes();
2366 std::vector<MString> names(solverCount);
2369 solver(i).getDefaultWeights(&weights[offset], names);
2372 const MString propName =
"solverWeights_" + std::to_string(i);
2375 TERMM(1,
"wrong length of property '" + propName +
"', should be of length " + std::to_string(solverCount));
2377 for(
MInt j = 0; j < solverCount; j++) {
2378 weights[offset + j] = Context::getBasicProperty<MFloat>(propName, AT_, j);
2382 for(
MInt j = 0; j < solverCount; j++) {
2383 m_log <<
"Solver #" << i <<
", weight #" << j <<
": " << names[j] <<
", " << weights[offset + j] << std::endl;
2386 offset += solverCount;
2393 if(m_grid ==
nullptr) {
2400 std::vector<MInt> dlbTimersStatus(
noSolvers());
2403 dlbTimersStatus[i] = solver(i).dlbTimersEnabled();
2404 if(dlbTimersStatus[i] != 0) {
2405 solver(i).disableDlbTimers();
2411 writeRestart[i] = solver(i).prepareRestart(forceRestart, writeGridRestart[i]);
2414 MBool allsolversrestart =
false;
2415 MBool gridrestart =
false;
2418 if(writeRestart[i]) allsolversrestart =
true;
2419 if(writeRestart[i] && writeGridRestart[i]) gridrestart =
true;
2422 gridb().deletePeriodicConnection(
true);
2424 if(gridrestart || (allsolversrestart && m_saveGridNewPartitionCells)) {
2425 accumulateCellWeights();
2428 std::stringstream s;
2430 if(writeBackup) s <<
"Backup";
2431 if(!m_useNonSpecifiedRestartFile || writeBackup) s <<
"_00" <<
globalTimeStep;
2436 s << ParallelIo::fileExt();
2437 m_currentGridFileName = s.str();
2442 solver(i).cancelMpiRequests();
2445 cerr0 <<
"Saving adapted grid file for all solvers " << s.str() <<
"... ";
2447 gridb().saveGrid((m_outputDir + m_currentGridFileName).c_str(), m_recalcIds);
2449 cerr0 <<
"ok." << std::endl;
2451 m_saveGridNewPartitionCells =
false;
2456 for(
MInt c = 0; c < noCouplers(); c++) {
2457 coupler(c).writeRestartFile(allsolversrestart);
2460 if(allsolversrestart) {
2464 if(solver(i).isActive()) {
2465 solver(i).writeRestartFile(writeRestart[i], writeBackup, m_currentGridFileName, m_recalcIds);
2472 solver(i).reIntAfterRestart(writeRestart[i]);
2475 gridb().restorePeriodicConnection();
2479 if(dlbTimersStatus[i] != 0) {
2480 solver(i).enableDlbTimers();
2489 MInt noLoadTypes = 0;
2491 noLoadTypes += solver(i).noLoadTypes();
2501 std::fill_n(&loadQuantities[0], globalNoLoadTypes(), 0);
2505 solver(i).getLoadQuantities(&loadQuantities[offset]);
2506 offset += solver(i).noLoadTypes();
2523 if(noDomains() == 1) {
2532 gridb().mpiComm(), AT_,
"localRunTime",
"localRunTimes[0]");
2535 const MFloat averageRunTime = std::accumulate(localRunTimes.
begin(), localRunTimes.
end(), 0.0) / noDomains();
2536 const MFloat maxRunTime = *std::max_element(localRunTimes.
begin(), localRunTimes.
end());
2540 imbalance = (maxRunTime - averageRunTime) / maxRunTime * noDomains() / (noDomains() - 1.0);
2541 const MBool loadBalance = (imbalance > m_dlbImbalanceThreshold);
2544 for(
MInt i = 0; i < noDomains(); i++) {
2545 const MFloat load = localRunTimes[i] / averageRunTime;
2549 const MFloat maxLoad = *std::max_element(&loads[0], &loads[0] + noDomains());
2550 const MFloat minLoad = *std::min_element(&loads[0], &loads[0] + noDomains());
2553 std::sprintf(imb,
"%.2f", imbalance * 100.0);
2556 <<
", t_max = " << maxRunTime << std::endl;
2562 const MFloat binWidth = 0.05;
2563 const MInt noBins = std::ceil(maxLoad / binWidth) + 1;
2564 std::vector<MInt> loadBins(noBins);
2565 std::fill(loadBins.begin(), loadBins.end(), 0);
2566 for(
MInt i = 0; i < noDomains(); i++) {
2567 const MInt binId = std::floor(loads[i] / binWidth);
2568 loadBins[binId] += 1;
2571 const MInt sumBins = std::accumulate(loadBins.begin(), loadBins.end(), 0);
2572 if(sumBins != noDomains()) {
2573 m_log <<
"ERROR in load bins, count " << sumBins <<
" != " << noDomains() << std::endl;
2576 const MInt noBinsPrev = m_previousLoadBins.size();
2577 const MInt maxNoBins = std::max(noBins, noBinsPrev);
2578 const MInt maxBinCountCurr = *std::max_element(loadBins.begin(), loadBins.end());
2579 const MInt maxBinCountPrev =
2580 (noBinsPrev > 0) ? *std::max_element(m_previousLoadBins.begin(), m_previousLoadBins.end()) : 0;
2581 const MInt maxBinCount = std::max(maxBinCountCurr, maxBinCountPrev);
2583 const MInt firstBin =
2584 (std::find_if(loadBins.begin(), loadBins.end(), [](
MInt i) { return i > 0; }) - loadBins.begin()) - 1;
2585 const MInt firstBinPrev =
2586 (std::find_if(m_previousLoadBins.begin(), m_previousLoadBins.end(), [](
MInt i) { return i > 0; })
2587 - m_previousLoadBins.begin())
2590 const MInt minFirstBin = (noBinsPrev > 0) ? std::max(std::min(firstBin, firstBinPrev), 0) : std::max(firstBin, 0);
2593 const MInt maxLineLength = 256;
2594 const MString dashes(maxLineLength,
'-');
2597 std::stringstream hist;
2599 snprintf(
b, maxLineLength,
" |%.126s|\n", dashes.c_str());
2602 snprintf(
b, maxLineLength,
" |%.18s|%.42s|%.42s|%.21s|\n", dashes.c_str(), dashes.c_str(), dashes.c_str(),
2607 snprintf(
b, maxLineLength,
2608 " | Load distribution at global timestep %-8d%*s - imbalance %5.2f%% - t_avg %5.3e - t_max %5.3e - loads "
2609 "[%5.3f,%5.3f] |\n",
2610 globalTimeStep, 2,
" ", imbalance * 100.0, averageRunTime, maxRunTime, minLoad, maxLoad);
2611 hist <<
"\n" << separatorTop <<
b;
2613 if(noBinsPrev > 0) {
2614 snprintf(
b, maxLineLength,
2615 " | Load distribution at global timestep %-8d%*s - imbalance %5.2f%% - t_avg %5.3e - t_max %5.3e - loads "
2616 "[%5.3f,%5.3f] |\n",
2617 m_previousLoadInfoStep, 2,
" ", m_previousLoadInfo[0], m_previousLoadInfo[1], m_previousLoadInfo[2],
2618 m_previousLoadInfo[3], m_previousLoadInfo[4]);
2619 hist << separatorTop <<
b;
2622 snprintf(
b, maxLineLength,
" | load bin | %-32s%8d | %-32s%8d | curr/prev count |\n",
2623 "current distribution - timestep",
globalTimeStep,
"previous distribution - timestep",
2624 m_previousLoadInfoStep);
2625 hist << separator <<
b << separator;
2627 MInt checksum = 0, checksumPrev = 0;
2629 for(
MInt i = maxNoBins - 1; i >= minFirstBin; i--) {
2630 const MInt maxBarWidth = 40;
2631 const MInt count = (i < noBins) ? loadBins[i] : 0;
2632 const MInt countPrev = (i < noBinsPrev) ? m_previousLoadBins[i] : 0;
2634 const MInt width = (count > 0) ? std::max((
MInt)(maxBarWidth * (
MFloat)count / (
MFloat)maxBinCount), 1) : 0;
2635 const MInt widthPrev =
2636 (countPrev > 0) ? std::max((
MInt)(maxBarWidth * (
MFloat)countPrev / (
MFloat)maxBinCount), 1) : 0;
2637 const MString bar(width,
'#');
2638 const MString barPrev(widthPrev,
'@');
2640 if(
approx(i * binWidth, 1.0, 0.0001)) {
2642 snprintf(
b, maxLineLength,
" | [%6.3f, %-6.3f) |_%-40s_|_%-40s_| %8d | %8d |\n", i * binWidth, (i + 1) * binWidth,
2643 bar.c_str(), barPrev.c_str(), count, countPrev);
2645 snprintf(
b, maxLineLength,
" | [%6.3f, %-6.3f) | %-40s | %-40s | %8d | %8d |\n", i * binWidth, (i + 1) * binWidth,
2646 bar.c_str(), barPrev.c_str(), count, countPrev);
2652 checksumPrev += countPrev;
2656 if(checksum != noDomains() || (checksumPrev != noDomains() && noBinsPrev > 0)) {
2657 m_log <<
"ERROR in load histogram" << std::endl;
2658 cerr0 <<
"ERROR in load histogram" << std::endl;
2660 m_log << hist.str() << std::endl;
2661 cerr0 << hist.str() << std::endl;
2665 m_previousLoadBins = loadBins;
2667 m_previousLoadInfo[0] = imbalance * 100.0;
2668 m_previousLoadInfo[1] = averageRunTime;
2669 m_previousLoadInfo[2] = maxRunTime;
2670 m_previousLoadInfo[3] = minLoad;
2671 m_previousLoadInfo[4] = maxLoad;
2688 const MLong*
const newPartitionCellOffsets,
2689 MLong*
const globalOffsets) {
2692 const MInt oldNoPartitionCells = gridb().m_noPartitionCells;
2693 MLongScratchSpace partitionCellsGlobalId(oldNoPartitionCells, AT_,
"partitionCellsGlobalId");
2695 for(
MInt i = 0; i < oldNoPartitionCells; i++) {
2696 const MLong globalCellId = gridb().m_localPartitionCellGlobalIds[i];
2698 partitionCellsGlobalId(i) = globalCellId;
2699 ASSERT(i == 0 || partitionCellsGlobalId(i) > partitionCellsGlobalId(i - 1),
2700 "Partition cell global ids not in ascending order.");
2704 std::fill_n(&globalOffsets[0], noDomains() + 1, 0);
2707 for(
MInt i = 1; i < noDomains(); i++) {
2708 const MLong partitionCellId = newPartitionCellOffsets[i];
2710 const MBool hasPartitionCell = (oldPartitionCellOffsets[domainId()] <= partitionCellId
2711 && partitionCellId < oldPartitionCellOffsets[domainId() + 1]);
2714 if(hasPartitionCell) {
2716 const MLong partitionCellLocalId = partitionCellId - oldPartitionCellOffsets[domainId()];
2717 globalOffsets[i] = partitionCellsGlobalId[partitionCellLocalId];
2720 MInt currentId = gridb().m_localPartitionCellLocalIds[partitionCellLocalId];
2721 MInt parentId = gridb().a_parentId(currentId);
2729 while(gridb().a_level(currentId) != gridb().minLevel()
2730 && gridb().a_globalId(parentId) == gridb().a_globalId(currentId) - 1) {
2732 currentId = parentId;
2733 parentId = gridb().a_parentId(currentId);
2736 TERMM_IF_COND(shift > 0 && gridb().m_maxPartitionLevelShift == 0,
2737 "Error: domain offset has a shift but the maximum partition level shift is 0.");
2740 globalOffsets[i] -= shift;
2745 if(domainId() == 0) {
2746 globalOffsets[noDomains()] = gridb().domainOffset(noDomains());
2753 gridb().mpiComm(), AT_,
"MPI_IN_PLACE",
"globalOffsets[0]");
2769 MInt*
const noCellsToSendByDomain,
2770 MInt*
const noCellsToReceiveByDomain,
2771 MInt*
const sortedCellId,
2772 MInt*
const bufferIdToCellId) {
2775 const MInt noCells = gridb().treeb().size();
2778 std::fill_n(&targetDomainsByCell[0], noCells, -1);
2779 std::fill_n(&noCellsToSendByDomain[0], noDomains() + 1, 0);
2783 for(
MInt cellId = 0; cellId < noCells; cellId++) {
2785 if(gridb().a_hasProperty(cellId, Cell::IsHalo)) {
2789 const MLong globalCellId = gridb().a_globalId(cellId);
2791 auto lowerBound = std::lower_bound(&offsets[0], &offsets[0] + noDomains(), globalCellId);
2792 const MInt dist = std::distance(&offsets[0], lowerBound);
2794 const MBool isDomainOffset = (*lowerBound == globalCellId);
2796 const MInt globalDomain = (isDomainOffset) ?
dist :
dist - 1;
2799 targetDomainsByCell[cellId] = globalDomain;
2801 noCellsToSendByDomain[globalDomain]++;
2807 "noCellsToReceiveByDomain[0]");
2810 noCellsToSendByDomain[noDomains()] =
2811 std::accumulate(&noCellsToSendByDomain[0], &noCellsToSendByDomain[0] + noDomains(), 0);
2812 noCellsToReceiveByDomain[noDomains()] =
2813 std::accumulate(&noCellsToReceiveByDomain[0], &noCellsToReceiveByDomain[0] + noDomains(), 0);
2815 if(noCellsToSendByDomain[noDomains()] < 1) {
2816 TERMM(1, std::to_string(domainId()) +
" noCellsToSend = " + std::to_string(noCellsToSendByDomain[noDomains()]));
2818 if(noCellsToReceiveByDomain[noDomains()] < 1) {
2819 TERMM(1, std::to_string(domainId()) +
" noCellsToRecv = " + std::to_string(noCellsToReceiveByDomain[noDomains()]));
2823 std::fill_n(&sortedCellId[0], noCells, -1);
2826 MInt currentBufferId = 0;
2827 for(
MInt dom = 0; dom < noDomains(); ++dom) {
2829 std::map<MLong, MInt> cellMap;
2830 for(
MInt cellId = 0; cellId < noCells; ++cellId) {
2833 if(targetDomainsByCell[cellId] == dom) {
2834 cellMap[gridb().a_globalId(cellId)] = cellId;
2840 for(
auto&& cell : cellMap) {
2841 const MInt cellId = cell.second;
2842 sortedCellId[cellId] = currentBufferId;
2843 bufferIdToCellId[currentBufferId] = cellId;
2858 const MInt localRootGlobalDomain = solverLocalRootDomain(solver);
2860 std::vector<MInt> globalIdVars(0);
2861 std::vector<MFloat> globalFloatVars(0);
2866 const MInt noIdVars = globalIdVars.size();
2867 const MInt noFloatVars = globalFloatVars.size();
2873 AT_,
"globalFloatVars[0]");
2883 const MInt*
const bufferIdToCellId,
2884 std::vector<std::vector<MInt>>& sendSizeVector,
2885 std::vector<std::vector<MInt>>& recvSizeVector) {
2888 const MBool isSolver = (mode == 0);
2891 MInt dataCount = (isSolver) ? solver(
id).noCellDataDlb() : coupler(
id).noCellDataDlb();
2896 "MPI_IN_PLACE",
"dataCount");
2904 for(
MInt dataId = 0; dataId < dataCount; dataId++) {
2910 MInt domainDataSize = 0;
2913 for(
MInt i = 0; i < noCellsToSend[domain]; i++) {
2916 const MInt cellId = bufferIdToCellId[bufferId];
2917 ASSERT(cellId > -1,
"");
2918 const MInt dataSize =
2919 (isSolver) ? solver(
id).cellDataSizeDlb(dataId, cellId) : coupler(
id).cellDataSizeDlb(dataId, cellId);
2920 domainDataSize += dataSize;
2925 dataSendSize[offset + domain] = domainDataSize;
2937 std::accumulate(&dataSendSize[offset], &dataSendSize[offset +
globalNoDomains()], 0);
2941 std::accumulate(&dataRecvSize[offset], &dataRecvSize[offset +
globalNoDomains()], 0);
2944 sendSizeVector.push_back(dataSendSize);
2945 recvSizeVector.push_back(dataRecvSize);
2952 std::vector<MInt>& dataRecvSize,
const MInt*
const bufferIdToCellId,
2953 const MInt oldNoCells, std::vector<MInt*>& intDataRecv,
2954 std::vector<MLong*>& longDataRecv, std::vector<MFloat*>& floatDataRecv,
2955 std::vector<MInt>& dataTypes) {
2958 const MBool isSolver = (mode == 0);
2961 Solver*
const solverP = (isSolver) ? &solver(
id) :
nullptr;
2962 Coupling*
const couplerP = (isSolver) ?
nullptr : &coupler(
id);
2968 const MInt localRootGlobalDomain = (isSolver) ? solverLocalRootDomain(solverP) : 0;
2975 "MPI_IN_PLACE",
"dataCount");
2978 MInt intDataCount = 0;
2979 MInt longDataCount = 0;
2980 MInt floatDataCount = 0;
2983 for(
MInt dataId = 0; dataId < dataCount; dataId++) {
3007 dataTypes.push_back(
MINT);
3009 MInt* newIntData =
nullptr;
3010 mAlloc(newIntData, std::max(recvSize, 1),
"newIntData", AT_);
3011 intDataRecv.push_back(newIntData);
3019 solverP->
getCellDataDlb(dataId, oldNoCells, &bufferIdToCellId[0], &intDataSend[0]);
3021 couplerP->
getCellDataDlb(dataId, oldNoCells, &bufferIdToCellId[0], &intDataSend[0]);
3028 &dataSendSize[offset], &dataRecvSize[offset], &intDataRecv[intDataCount][0]);
3035 dataTypes.push_back(
MLONG);
3037 MLong* newLongData =
nullptr;
3038 mAlloc(newLongData, std::max(recvSize, 1),
"newLongData", AT_);
3039 longDataRecv.push_back(newLongData);
3047 solverP->
getCellDataDlb(dataId, oldNoCells, &bufferIdToCellId[0], &longDataSend[0]);
3049 couplerP->
getCellDataDlb(dataId, oldNoCells, &bufferIdToCellId[0], &longDataSend[0]);
3056 &dataSendSize[offset], &dataRecvSize[offset], &longDataRecv[longDataCount][0]);
3063 dataTypes.push_back(
MFLOAT);
3065 MFloat* newFloatData =
nullptr;
3066 mAlloc(newFloatData, std::max(recvSize, 1),
"newFloatData", AT_);
3067 floatDataRecv.push_back(newFloatData);
3075 solverP->
getCellDataDlb(dataId, oldNoCells, &bufferIdToCellId[0], &floatDataSend[0]);
3077 couplerP->
getCellDataDlb(dataId, oldNoCells, &bufferIdToCellId[0], &floatDataSend[0]);
3084 &dataSendSize[offset], &dataRecvSize[offset], &floatDataRecv[floatDataCount][0]);
3091 TERMM(1,
"Unknown data type: " + std::to_string(dataType) +
".");
3101 std::vector<MLong*>& longDataRecv, std::vector<MFloat*>& floatDataRecv,
3102 std::vector<MInt>& dataTypes,
const MBool freeMemory) {
3105 const MBool isSolver = (mode == 0);
3108 Solver*
const solverP = (isSolver) ? &solver(
id) :
nullptr;
3109 Coupling*
const couplerP = (isSolver) ?
nullptr : &coupler(
id);
3117 "MPI_IN_PLACE",
"dataCount");
3120 MInt intDataCount = 0;
3121 MInt longDataCount = 0;
3122 MInt floatDataCount = 0;
3125 for(
MInt dataId = 0; dataId < dataCount; dataId++) {
3126 const MInt dataType = dataTypes[dataId];
3143 solverP->
setCellDataDlb(dataId, &longDataRecv[longDataCount][0]);
3145 couplerP->
setCellDataDlb(dataId, &longDataRecv[longDataCount][0]);
3155 solverP->
setCellDataDlb(dataId, &floatDataRecv[floatDataCount][0]);
3157 couplerP->
setCellDataDlb(dataId, &floatDataRecv[floatDataCount][0]);
3166 TERMM(1,
"Unknown data type.");
3185 const MInt noTimings = m_dlbTimeStepsAll.size();
3186 const MInt noValues = 2;
3188 if(noTimings == 0) {
3194 std::stringstream fileName;
3195 fileName << m_outputDir <<
"timings_" << noDomains() <<
"_" <<
globalTimeStep << ParallelIo::fileExt();
3197 m_log <<
"Write timings to file: " << fileName.str() << std::endl;
3199 ParallelIo file(fileName.str(), PIO_REPLACE, gridb().mpiComm());
3201 file.defineArray(PIO_INT,
"timeStep", noTimings);
3204 ParallelIo::size_type dimSizes[] = {noDomains(), noTimings, noValues};
3206 file.defineArray(PIO_FLOAT,
"timings", 3, &dimSizes[0]);
3208 file.setAttribute(
"domain index",
"dim_0",
"timings");
3209 file.setAttribute(
"time step index",
"dim_1",
"timings");
3210 file.setAttribute(
"timings index",
"dim_2",
"timings");
3212 file.setAttribute(
"run time",
"var_0",
"timings");
3213 file.setAttribute(
"idle time",
"var_1",
"timings");
3218 for(
MInt i = 0; i < noTimings; i++) {
3219 timeSteps[i] = m_dlbTimeStepsAll[i];
3220 data(i, 0) = m_dlbRunTimeAll[i];
3221 data(i, 1) = m_dlbIdleTimeAll[i];
3225 if(domainId() == 0) {
3226 file.setOffset(noTimings, 0);
3228 file.setOffset(0, 0);
3230 file.writeArray(&timeSteps[0],
"timeStep");
3233 file.setOffset(1, domainId(), 3);
3234 file.writeArray(&data[0],
"timings");
3243 const MInt*
const loadQuantities,
const MFloat domainWeight,
3244 const MFloat*
const weights) {
3250 if(domainId() == 0) {
3251 std::cerr <<
"Store loads and weights" << std::endl;
3254 std::stringstream fileName;
3255 fileName << m_outputDir <<
"loads_" << noDomains() <<
"_" << std::setw(8) << std::setfill(
'0') <<
globalTimeStep;
3256 fileName << ParallelIo::fileExt();
3258 ParallelIo file(fileName.str(), PIO_REPLACE, gridb().mpiComm());
3260 file.defineArray(PIO_FLOAT,
"weights", noLoadTypes);
3261 file.defineArray(PIO_FLOAT,
"loads", noDomains());
3262 file.defineArray(PIO_FLOAT,
"domainWeights", noDomains());
3265 ParallelIo::size_type dimSizes[] = {noDomains(), noLoadTypes};
3266 file.defineArray(PIO_INT,
"loadQuantities", 2, &dimSizes[0]);
3269 if(domainId() == 0) {
3270 file.setOffset(noLoadTypes, 0);
3272 file.setOffset(0, 0);
3274 file.writeArray(&weights[0],
"weights");
3276 file.setOffset(1, domainId());
3277 file.writeArray(&loads[domainId()],
"loads");
3278 file.writeArray(&domainWeight,
"domainWeights");
3281 file.setOffset(1, domainId(), 2);
3282 file.writeArray(&loadQuantities[0],
"loadQuantities");
3292 "MPI_IN_PLACE",
"localRootGlobalDomain");
3293 return localRootGlobalDomain;
3303 MInt maxUniformRefinementLevel) {
3304 this->m_adaptationInterval = this->m_adaptationInterval /
IPOW2(maxLevel - maxUniformRefinementLevel)
3305 *
IPOW2(maxLevel - maxUniformRefinementLevel);
3306 this->m_adaptationStart = this->m_adaptationStart /
IPOW2(maxLevel - maxUniformRefinementLevel)
3307 *
IPOW2(maxLevel - maxUniformRefinementLevel);
3308 std::cout <<
"Set adaptationStart to: " << this->m_adaptationStart
3309 <<
" and adaptationInterval to: " << this->m_adaptationInterval <<
"\n";
3317 m_dlbPreviousLocalRunTime = 0.0;
3318 m_dlbPreviousLocalIdleTime = 0.0;
3321 std::vector<MFloat>().swap(m_dlbTimings);
3331 m_log <<
"DLB: Timer statistics ";
3332 if(!status.empty())
m_log << status <<
" ";
3336 MFloat localRunTime = 0.0;
3337 MFloat localIdleTime = 0.0;
3339 for(
MInt i = 0; i < noDlbTimers; i++) {
3343 if(m_loadBalancingMode == 1 && !m_testDynamicLoadBalancing && (loadRecord < 0.0 || idleRecord < 0.0)) {
3344 TERMM(1,
"Load/Idle record for dlb timer #" + std::to_string(i) +
" is less than zero on global domain #"
3345 + std::to_string(domainId()));
3348 localRunTime += loadRecord;
3349 localIdleTime += idleRecord;
3354 MFloat timePerStep = (localRunTime + localIdleTime) / noTimeSteps;
3355 MFloat maxRunTime = localRunTime / noTimeSteps;
3356 const MFloat localTimePerStep = timePerStep;
3359 MPI_Allreduce(MPI_IN_PLACE, &timePerStep, 1, MPI_DOUBLE, MPI_MAX, gridb().mpiComm(), AT_,
"MPI_IN_PLACE",
3361 MPI_Allreduce(MPI_IN_PLACE, &maxRunTime, 1, MPI_DOUBLE, MPI_MAX, gridb().mpiComm(), AT_,
"MPI_IN_PLACE",
3364 MFloat maxIdleTime = localIdleTime / noTimeSteps;
3365 MFloat minIdleTime = localIdleTime / noTimeSteps;
3366 MPI_Allreduce(MPI_IN_PLACE, &maxIdleTime, 1, MPI_DOUBLE, MPI_MAX, gridb().mpiComm(), AT_,
"MPI_IN_PLACE",
3368 MPI_Allreduce(MPI_IN_PLACE, &minIdleTime, 1, MPI_DOUBLE, MPI_MIN, gridb().mpiComm(), AT_,
"MPI_IN_PLACE",
3372 m_log <<
globalTimeStep <<
" Average time per step: " << timePerStep <<
" ; local: " << localTimePerStep
3373 <<
", idle/comp = " << localIdleTime / localRunTime
3374 <<
", idle/timePerStep = " << localIdleTime / (noTimeSteps * timePerStep) << std::endl;
3375 m_log <<
globalTimeStep <<
" Relative idle time: max = " << maxIdleTime / timePerStep <<
", min "
3376 << minIdleTime / timePerStep << std::endl;
3383 MBool dlbTimeStep = ((
globalTimeStep - m_loadBalancingOffset) % m_loadBalancingInterval == 0)
3384 && (m_loadBalancingStopTimeStep < 0 ||
globalTimeStep < m_loadBalancingStopTimeStep);
3385 if(m_balanceAfterAdaptation) {
3386 dlbTimeStep = dlbTimeStep
3387 || (((
globalTimeStep - m_lastAdaptationTimeStep) == m_loadBalancingOffset)
3388 && (m_nAdaptationsSinceBalance >= m_balanceAdaptationInterval || m_dlbStep < 2)
3389 && (m_loadBalancingStopTimeStep < 0 ||
globalTimeStep < m_loadBalancingStopTimeStep));
3391 if((m_syncTimerSteps && ((
globalTimeStep - m_lastAdaptationTimeStep) < m_loadBalancingOffset)) || m_syncTimeSteps) {
3392 cerr0 <<
"Applying Barrier for correct timer!" << std::endl;
3393 if(m_syncTimeSteps && !((
globalTimeStep - m_lastAdaptationTimeStep) < m_loadBalancingOffset)) {
3394 solver(0).startLoadTimer(AT_);
3397 if(m_syncTimeSteps && !((
globalTimeStep - m_lastAdaptationTimeStep) < m_loadBalancingOffset)) {
3398 solver(0).stopLoadTimer(AT_);
MLong allocatedBytes()
Return the number of allocated bytes.
void mAlloc(T *&a, const MLong N, const MString &objectName, MString function)
allocates memory for one-dimensional array 'a' of size N
MBool mDeallocate(T *&a)
deallocates the memory previously allocated for element 'a'
MString m_gridInputFileName
void savePartitionFile()
Save current grid partitioning to file.
MInt domainId() const
Return the domainId (rank)
static MInt propertyLength(const MString &name, MInt solverId=m_noSolvers)
Returns the number of elements of a property.
static MBool propertyExists(const MString &name, MInt solver=m_noSolvers)
This function checks if a property exists in general.
virtual void setCellDataDlb(const MInt NotUsed(dataId), const MInt *const NotUsed(data))
virtual MInt cellDataTypeDlb(const MInt NotUsed(dataId)) const
virtual MInt noCellDataDlb() const
Methods to inquire coupler data during balancing.
virtual void getCellDataDlb(const MInt NotUsed(dataId), const MInt NotUsed(oldNoCells), const MInt *const NotUsed(bufferIdToCellId), MInt *const NotUsed(data))
MInt noDlbTimers() const
Return the number of DLB timers.
MFloat returnLoadRecord(const MInt dlbTimerId, const MInt mode=0)
Return the load record of a DLB timer.
void enableAllDlbTimers(const MBool *const wasEnabled=nullptr)
Enable all DLB timers (or those given by the array wasEnabled)
void resetRecords()
Reset the records of all DLB timers.
MFloat returnIdleRecord(const MInt dlbTimerId, const MInt mode=0)
Return the idle record of a DLB timer.
void disableAllDlbTimers(MBool *const wasEnabled=nullptr)
Disable all (enabled) DLB timers.
This class is a ScratchSpace.
void fill(T val)
fill the scratch with a given value
Parent class of all solvers This class is the base for all solvers. I.e. all solver class (e....
virtual void removeCell(const MInt)
Remove the given cell.
virtual void setGlobalSolverVars(std::vector< MFloat > &NotUsed(globalFloatVars), std::vector< MInt > &NotUsed(globalIdVars))
virtual MInt domainId() const
Return the domainId (rank)
virtual void removeChilds(const MInt)
Coarsen the given cell.
virtual void resizeGridMap()
Swap the given cells.
virtual void getGlobalSolverVars(std::vector< MFloat > &NotUsed(globalFloatVars), std::vector< MInt > &NotUsed(globalIntVars))
virtual void getCellDataDlb(const MInt NotUsed(dataId), const MInt NotUsed(oldNoCells), const MInt *const NotUsed(bufferIdToCellId), MInt *const NotUsed(data))
virtual MInt cellDataTypeDlb(const MInt NotUsed(dataId)) const
virtual MInt noCellDataDlb() const
Methods to inquire solver data information.
virtual void setCellDataDlb(const MInt NotUsed(dataId), const MInt *const NotUsed(data))
virtual void refineCell(const MInt)
Refine the given cell.
virtual void swapProxy(const MInt, const MInt)
Swap the given cells.
virtual MInt cellOutside(const MFloat *, const MInt, const MInt)
Check whether cell is outside the fluid domain.
Grid controller manages adaptation and load balancing in multi-solver environment.
MBool m_testUpdatePartitionCells
MInt m_lastAdaptationTimeStep
MFloat * m_dlbStaticWeights
MBool m_forceLoadBalancing
MFloat m_maxPerformanceVarThreshold
std::vector< MFloat > m_domainWeights
void writeRestartFile(const MBool, const MBool)
MInt solverLocalRootDomain(Solver *const solver)
Determine the global domain id of the solver local root domain.
std::vector< MInt > m_lastOffsetShiftDirection
MBool m_saveGridNewPartitionCells
std::vector< std::function< MInt(const MFloat *, const MInt, const MInt)> > cellOutsideVec()
MInt m_dlbStaticWeightMode
MInt m_previousLoadInfoStep
MFloat m_dlbImbalanceThreshold
void printDomainStatistics(const MString &status="")
Print statistics regarding the cell distribution among domains.
void logTimerStatistics(const MString &)
void setDataDlb(const MInt solverId, const MInt mode, std::vector< MInt * > &intDataRecv, std::vector< MLong * > &longDataRecv, std::vector< MFloat * > &floatDataRecv, std::vector< MInt > &dataTypes, const MBool freeMemory)
Set the solver/coupler data for load balancing.
MInt m_testUpdatePartCellsOffspringThreshold
Solver & solver(const MInt solverId)
void partition(MLong *partitionCellOffsets, MLong *globalIdOffsets, const MBool onlyPartitionOffsets)
Compute new domain decomposition.
MBool m_timersInitialized
void getSpecifiedSolverWeights(std::vector< MFloat > &weights)
Return the specified (or default) solver weights for all solvers.
MBool m_dlbUpdatePartitionCells
const std::vector< std::function< void(const MInt)> > m_refineCellSolver
const std::vector< std::function< void(const MInt)> > m_removeCellSolver
MInt globalNoLoadTypes()
Return global number of load types of all solvers.
typename CartesianGrid< nDim >::Cell Cell
MBool adaptation(const MBool force=false)
performs mesh adaptation
void computeWeights(const MFloat *loads, const MFloat domainWeight, std::vector< MFloat > &weights)
Compute computational weights for different components in the simulation based on the current distrib...
std::array< MInt, Timers::_count > m_timers
const std::vector< std::unique_ptr< Solver > > * m_solvers
void initTimers()
Initialize timers.
MInt m_loadBalancingStartTimeStep
MFloat m_dlbMaxWorkloadLimit
void determineDataSizesDlb(const MInt solverId, const MInt mode, const MInt *const noCellsToSend, const MInt *const bufferIdToCellId, std::vector< std::vector< MInt > > &sendSizeVector, std::vector< std::vector< MInt > > &recvSizeVector)
Determine the data sizes for a solver/coupler that need to be communicated during load balancing.
const std::vector< std::function< void()> > m_resizeGridMapSolver
const std::vector< std::unique_ptr< Coupling > > * m_couplers
const Coupling & coupler(const MInt couplerId) const
MFloat m_dlbPreviousLocalRunTime
void redistributeDataDlb(const MInt id, const MInt mode, std::vector< MInt > &sendSizeVector, std::vector< MInt > &recvSizeVector, const MInt *const bufferIdToCellId, const MInt noCells, std::vector< MInt * > &intDataRecv, std::vector< MLong * > &longDataRecv, std::vector< MFloat * > &floatDataRecv, std::vector< MInt > &dataTypes)
Communicate all solver data for load balancing according to the send/recv sizes.
std::vector< MFloat > m_dlbRunTimeAll
typename maia::grid::Proxy< nDim > GridProxy
MBool isAdaptationTimeStep()
MInt m_loadBalancingTimerStartOffset
MFloat * m_dlbLastWeights
const Grid & gridb() const
MString m_currentGridFileName
MInt m_dlbLastResetTimeStep
MBool m_dlbSmoothGlobalShifts
void communicateGlobalSolverVars(Solver *const solver)
MInt m_adaptationInterval
MBool needLoadBalancing(const MFloat localRunTime, MFloat *const loads, MFloat &imbalance)
Return if dynamic load balancing is needed and compute domain loads.
void loadBalancingCalcNewGlobalOffsets(const MLong *const oldPartitionCellOffsets, const MLong *const newPartitionCellOffsets, MLong *const globalOffsets)
Calculate new global domain offsets given the current and new global partition cell offsets.
MInt m_loadBalancingInterval
MFloat m_testUpdatePartCellsWorkloadThreshold
MInt m_lastLoadBalancingTimeStep
MInt m_loadBalancingTimerMode
void initDlbProperties()
Read Dynamic Load Balancing properties.
const std::vector< std::function< MInt(const MFloat *, const MInt, const MInt)> > m_cellOutside
MBool loadBalancingPartition(const MFloat *loads, const MFloat imbalance, MLong *const partitionCellOffsets, MLong *const globalIdOffsets)
Determine new partitioning for dynamic load balancing.
std::vector< std::function< void(const MInt)> > refineCellVec()
MFloat m_dlbPreviousLocalIdleTime
MFloat m_timePerStepTotal
MLong m_optPartitionCellOffsetTotal
std::vector< MInt > m_dlbTimeStepsAll
std::vector< MFloat > m_dlbIdleTimeAll
std::vector< std::function< void()> > resizeGridMapVec()
std::vector< MInt > m_solverTimerGroups
MInt m_loadBalancingOffset
MBool balance(const MBool force=false, const MBool finalTimeStep=false, const MBool adaptation=false)
MInt m_dlbPartitionMethod
void loadBalancingCalcNoCellsToSend(const MLong *const offsets, MInt *const noCellsToSendByDomain, MInt *const noCellsToReceiveByDomain, MInt *const sortedCellId, MInt *const bufferIdToCellId)
Based on new domain offsets calculate the number of cells to send/receive to/from each domain.
MInt m_nAdaptationsSinceBalance
MInt m_loadBalancingStopTimeStep
void storeLoadsAndWeights(const MFloat *const loads, const MInt noLoadTypes, const MInt *const loadQuantities, const MFloat domainWeight, const MFloat *const weights)
Store domain loads and additional information to file.
void accumulateCellWeights()
This function handels the setCellWeights-functionality of all solvers and writes the accumulated-cell...
MInt m_dlbNoFinalLocalShifts
void castAdaptationIntervalToMultipleOfCoarsestTimeStep(MInt maxLevel, MInt maxUniformRefinementLevel)
MBool m_dlbRestartWeights
MBool m_testDynamicLoadBalancing
MBool m_performanceOutput
std::vector< std::function< void(const MInt)> > removeCellVec()
void updateWeightsAndWorkloads(const std::vector< MFloat > &weights, const MBool restoreDefaultWeights)
Determine the cell weights using the given weighting factors for the different solver load quantities...
Controller(Grid *grid_, std::vector< std::unique_ptr< Solver > > *solvers, std::vector< std::unique_ptr< Coupling > > *couplers)
MInt m_balanceAdaptationInterval
std::array< MFloat, 5 > m_previousLoadInfo
const std::vector< std::function< void(const MInt)> > m_removeChildsSolver
void storeTimings()
Store timings of all timesteps for all domains for performance evaluations (i.e. runtime and idle tim...
void getLoadQuantities(MInt *const loadQuantities)
Return load quantities of all solvers on this domain.
MBool m_useNonSpecifiedRestartFile
std::vector< std::array< MInt, SolverTimers::_count > > m_solverTimers
std::vector< MFloat > m_dlbTimings
Coupling & coupler(const MInt couplerId)
MBool updateGridPartitionWorkloads()
Update the partition cell workloads in the grid file using user specified or default weights for the ...
std::vector< MInt > m_previousLoadBins
std::vector< std::function< void(const MInt)> > removeChildsVec()
MBool m_balanceAfterAdaptation
const std::vector< std::function< void(const MInt, const MInt)> > m_swapProxySolver
const Solver & solver(const MInt solverId) const
std::vector< std::function< void(const MInt, const MInt)> > swapProxyVec()
void estimateParameters(MInt m, MInt n, const MFloat *const A, const MFloat *const b, MFloat *const x)
Solve the parameter estimation problem A*x=b.
MInt string2enum(MString theString)
This global function translates strings in their corresponding enum values (integer values)....
const MString const MString & message
constexpr Real POW2(const Real x)
MBool approx(const T &, const U &, const T)
void printAllocatedMemory(const MLong oldAllocatedBytes, const MString &solverName, const MPI_Comm comm)
Prints currently allocated memory.
MInt globalNoDomains()
Return global number of domains.
MInt globalDomainId()
Return global domain id.
constexpr MLong IPOW2(MInt x)
std::basic_string< char > MString
int MPI_Barrier(MPI_Comm comm, const MString &name)
same as MPI_Barrier
int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, const MString &name, const MString &sndvarname, const MString &rcvvarname)
same as MPI_Allreduce
int MPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, const MString &name, const MString &sndvarname, const MString &rcvvarname)
same as MPI_Alltoall
int MPI_Bcast(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm, const MString &name, const MString &varname)
same as MPI_Bcast
int MPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, const MString &name, const MString &sndvarname, const MString &rcvvarname)
same as MPI_Allgather
DlbTimerController g_dlbTimerController
void partitionParallelSplit(const WeightType *const localWeights, const IdType noLocalWeights, const IdType localWeightOffset, const IdType noDomains, const IdType domainId, const MPI_Comm mpiComm, IdType *const offsets)
void exchangeData(const MInt noNghbrDomains, const MInt *const nghbrDomains, const MInt *const noHaloCells, const MInt **const, const MInt *const noWindowCells, const MInt **const windowCells, const MPI_Comm comm, const U *const data, U *const haloBuffer, const MInt noDat=1)
Generic exchange of data.
Namespace for auxiliary functions/classes.
PARALLELIO_DEFAULT_BACKEND ParallelIo
MFloat dist(const Point< DIM > &p, const Point< DIM > &q)
@ CalcDataSizesMpiBlocking
@ RedistributeMpiBlocking
void logDurations(std::vector< std::pair< MFloat, MString > > &durations, const MString module, const MPI_Comm comm, const MInt domainId, const MInt noDomains)
Output the min/max/average durations of provided timed code sections over the ranks in a communicator...