MAIA bb96820c
Multiphysics at AIA
Loading...
Searching...
No Matches
cartesiangridcontroller.h
Go to the documentation of this file.
1// Copyright (C) 2024 The m-AIA AUTHORS
2//
3// This file is part of m-AIA (https://git.rwth-aachen.de/aia/m-AIA/m-AIA)
4//
5// SPDX-License-Identifier: LGPL-3.0-only
6
7#ifndef GRIDCONTROLLER_H_
8#define GRIDCONTROLLER_H_
9
10#include <functional>
11#include "COMM/mpioverride.h"
12#include "COUPLER/coupling.h"
13#include "INCLUDE/maiatypes.h"
14#include "IO/context.h"
15#include "cartesiangrid.h"
16#include "cartesiangridproxy.h"
17#include "partition.h"
18#include "solver.h"
19
20#include "LB/lbsolverdxqy.h"
21
22namespace maia {
23namespace grid {
24
25// Create struct for easy timer identification
26struct Timers {
27 // Enum to store timer "names"
28 enum {
38
41
42 // Special enum value used to initialize timer array
43 _count
44 };
45};
46
47// Timers for each solver
49 // Enum to store timer "names"
50 enum {
53 // CancelMPI,
55 // CommGlobalVars,
56
60
61 // LocalToGlobalIds
63
67
69
73
74 // GlobalToLocalIds
77
79
80 // Special enum value used to initialize timer array
81 _count
82 };
83};
84
85
87template <MInt nDim>
89 public:
93
94 // Constructor/destructor
95 Controller(Grid* grid_, std::vector<std::unique_ptr<Solver>>* solvers,
96 std::vector<std::unique_ptr<Coupling>>* couplers);
98 if(m_timersInitialized) { // Timers are not created if there is no Cartesian grid
99 RECORD_TIMER_STOP(m_timers[Timers::Controller]);
100 }
101 }
102
103 // Dynamic load balancing
104 MBool balance(const MBool force = false, const MBool finalTimeStep = false, const MBool adaptation = false);
105 MBool adaptation(const MBool force = false);
106 void writeRestartFile(const MBool, const MBool);
107 void printDomainStatistics(const MString& status = "");
109 if(!m_balance || !gridb().wasBalancedAtLeastOnce()) {
110 return;
111 }
113 }
114
115 // Update the partition workloads in the grid file using given weights
117
118 // Cast of adaptation interval needed for LB
119 void castAdaptationIntervalToMultipleOfCoarsestTimeStep(MInt maxLevel, MInt maxUniformRefinementLevel);
120
121 void logTimerStatistics(const MString&);
124
125 private:
126 // Grid accessors
127 // Accessor to grid is named "gridb" to indicate that this is the base grid and not a
128 // solver-specific grid
129 Grid& gridb() { return *m_grid; }
130 const Grid& gridb() const { return *m_grid; }
131 // constexpr const GridProxy& gridProxy() const { return m_gridProxy; }
132 // GridProxy& gridProxy() { return m_gridProxy; }
133
134 MInt domainId() { return gridb().domainId(); }
135 MInt noDomains() { return gridb().noDomains(); }
136
137 // Solver accessors
138 Solver& solver(const MInt solverId) { return *m_solvers->at(solverId); }
139 const Solver& solver(const MInt solverId) const { return (const Solver&)m_solvers->at(solverId); }
140 MInt noSolvers() const { return m_solvers->size(); }
142
143 // Coupler accessors
144 Coupling& coupler(const MInt couplerId) { return *m_couplers->at(couplerId); }
145 const Coupling& coupler(const MInt couplerId) const { return *m_couplers->at(couplerId); }
146 MInt noCouplers() const { return m_couplers->size(); }
147
148 void initDlbProperties();
149 void initTimers();
150
151 MBool needLoadBalancing(const MFloat localRunTime, MFloat* const loads, MFloat& imbalance);
152
153 void partition(MLong* partitionCellOffsets, MLong* globalIdOffsets, const MBool onlyPartitionOffsets);
154 MBool loadBalancingPartition(const MFloat* loads, const MFloat imbalance, MLong* const partitionCellOffsets,
155 MLong* const globalIdOffsets);
156
157 void updateWeightsAndWorkloads(const std::vector<MFloat>& weights, const MBool restoreDefaultWeights);
158 void getSpecifiedSolverWeights(std::vector<MFloat>& weights);
159
161
162 void storeTimings();
163 void storeLoadsAndWeights(const MFloat* const loads, const MInt noLoadTypes, const MInt* const loadQuantities,
164 const MFloat domainWeight, const MFloat* const weights);
165
166 void loadBalancingCalcNewGlobalOffsets(const MLong* const oldPartitionCellOffsets,
167 const MLong* const newPartitionCellOffsets,
168 MLong* const globalOffsets);
169
170 void loadBalancingCalcNoCellsToSend(const MLong* const offsets,
171 MInt* const noCellsToSendByDomain,
172 MInt* const noCellsToReceiveByDomain,
173 MInt* const sortedCellId,
174 MInt* const bufferIdToCellId);
175
176 void computeWeights(const MFloat* loads, const MFloat domainWeight, std::vector<MFloat>& weights);
178 void getLoadQuantities(MInt* const loadQuantities);
179
180 void estimateParameters(MInt m, MInt n, const MFloat* const A, const MFloat* const b, MFloat* const x);
181
183
184 void determineDataSizesDlb(const MInt solverId, const MInt mode, const MInt* const noCellsToSend,
185 const MInt* const bufferIdToCellId, std::vector<std::vector<MInt>>& sendSizeVector,
186 std::vector<std::vector<MInt>>& recvSizeVector);
187
188 void redistributeDataDlb(const MInt id, const MInt mode, std::vector<MInt>& sendSizeVector,
189 std::vector<MInt>& recvSizeVector, const MInt* const bufferIdToCellId, const MInt noCells,
190 std::vector<MInt*>& intDataRecv, std::vector<MLong*>& longDataRecv,
191 std::vector<MFloat*>& floatDataRecv, std::vector<MInt>& dataTypes);
192
193 void setDataDlb(const MInt solverId, const MInt mode, std::vector<MInt*>& intDataRecv,
194 std::vector<MLong*>& longDataRecv, std::vector<MFloat*>& floatDataRecv, std::vector<MInt>& dataTypes,
195 const MBool freeMemory);
196
197 void resetAllTimer();
198
199 // Data members
201 // GridProxy& m_gridProxy;
202 const std::vector<std::unique_ptr<Solver>>* m_solvers;
203 const std::vector<std::function<void(const MInt)>> m_refineCellSolver;
204 const std::vector<std::function<void(const MInt)>> m_removeChildsSolver;
205 const std::vector<std::function<void(const MInt, const MInt)>> m_swapProxySolver;
206 const std::vector<std::function<MInt(const MFloat*, const MInt, const MInt)>> m_cellOutside;
207 const std::vector<std::function<void()>> m_resizeGridMapSolver;
208 const std::vector<std::function<void(const MInt)>> m_removeCellSolver;
209
210 const std::vector<std::unique_ptr<Coupling>>* m_couplers;
211
212 // Mesh adaptation
220
221 // Sensors
223
224 // Dynamic load balancing
226 // enforce balance even for low imbalance
228 // Performance output switch if balance is not enabled
230 // Debug mode
232
233 // Load balancing mode
235 // Load balancing timer mode
237
238 // Main properties for dynamic load balancing
239 MInt m_loadBalancingInterval = -1; // Load balancing interval
240 MInt m_loadBalancingOffset = 0; // Load balancing offset
241 MInt m_loadBalancingStartTimeStep = 0; // Load balancing start timestep
242 MInt m_loadBalancingStopTimeStep = -1; // Load balancing stop timestep
243 MInt m_loadBalancingTimerStartOffset = 0; // Offset for DLB timings
244 MBool m_forceLoadBalancing = false; // Switch to force DLB
245 MBool m_testDynamicLoadBalancing = false; // Switch for DLB testcases
246 // Switch for DLB testcases with partition level shift and testing of partition cell update
248 // Thresholds for testing/forcing partition cell updates
251 // starting with large value, to trigger first balance
253
256
257 // Number of performed DLB steps
259 // Last time step at which DLB was performed
261
262 // Last time step at which timers were reset
264
265 // Imbalance threshold to trigger DLB (default 5%)
267 // Performance variation threshold to skip a DLB step if the timings on a domain are not reliable
269 // DLB partition method to use
271 // Update the partition cells before load balancing, i.e., introduce new or remove existing
272 // partition cells on higher levels depending on the size/weight of the local subtree (i.e.
273 // increase/decrease partition level shifts)
275 // Indicates if a grid file with updated partition cells should be written with the next restart
277
278 // Additional parameters for DLB_PARTITION_SHIFT_OFFSETS method
280 // Number of intermediate DLB steps for which the offsets are only shifted locally
282 // Number of final DLB steps for which the offsets are only shifted locally (after reverting to
283 // the best found configuration until then)
285 // Limit for the maximum relative workload on a single domain
287
288 // DLB timer records at previous timestep
291
292 // Timings of time steps since last DLB step (for DLB imbalance evaluation)
293 std::vector<MFloat> m_dlbTimings;
294
295 // Switch to output timings
297 // Storage for timings of all timesteps (for performance evaluations)
298 std::vector<MInt> m_dlbTimeStepsAll;
299 std::vector<MFloat> m_dlbRunTimeAll;
300 std::vector<MFloat> m_dlbIdleTimeAll;
301
302 // Previous load distribution for output as a histogram, plus additional information
303 std::vector<MInt> m_previousLoadBins{};
304 std::array<MFloat, 5> m_previousLoadInfo{};
306
307 // Domain weights for DLB (i.e. processing capacities)
308 std::vector<MFloat> m_domainWeights;
309 // Switch to enable use of domain weight ratios during shifting of offsets
311
312 // Store last direction in which each offset was shifted
313 std::vector<MInt> m_lastOffsetShiftDirection{};
314
315 // DLB: store best time per time step and the corresponding 'optimal' partition-cell
316 // offset (revert to this partitioning after a finite number of load balancing
317 // steps to get the best performance in a non-adaptive simulation)
318 MFloat m_timePerStepTotal = std::numeric_limits<MFloat>::max();
320 MFloat m_imbalance = std::numeric_limits<MFloat>::max();
321
322 // balance after loadBalancingOffset timeSteps after any adaptation
325
326 // use DLB weights at restart
331
332 // Timers
333 // Status of timer initialization
335 // Timer group which holds all controller-wide timers
337 // Stores all controller-wide timers
338 std::array<MInt, Timers::_count> m_timers{};
339 // Timer groups for solver (and coupler) timers
340 std::vector<MInt> m_solverTimerGroups{};
341 // Stores all solver (and coupler) timers
342 std::vector<std::array<MInt, SolverTimers::_count>> m_solverTimers{};
343
344 // Create temporary function pointer vectors to create const class members (lennart)
345 std::vector<std::function<void(const MInt)>> refineCellVec() {
346 std::vector<std::function<void(const MInt)>> vec(noSolvers());
347 for(MInt i = 0; i < noSolvers(); i++) {
348 vec[i] = std::bind(&Solver::refineCell, &solver(i), std::placeholders::_1);
349 }
350 return vec;
351 }
352 std::vector<std::function<void(const MInt)>> removeChildsVec() {
353 std::vector<std::function<void(const MInt)>> vec(noSolvers());
354 for(MInt i = 0; i < noSolvers(); i++) {
355 vec[i] = std::bind(&Solver::removeChilds, &solver(i), std::placeholders::_1);
356 }
357 return vec;
358 }
359 std::vector<std::function<void(const MInt, const MInt)>> swapProxyVec() {
360 std::vector<std::function<void(const MInt, const MInt)>> vec(noSolvers());
361 for(MInt i = 0; i < noSolvers(); i++) {
362 vec[i] = std::bind(&Solver::swapProxy, &solver(i), std::placeholders::_1, std::placeholders::_2);
363 }
364 return vec;
365 }
366 std::vector<std::function<MInt(const MFloat*, const MInt, const MInt)>> cellOutsideVec() {
367 std::vector<std::function<MInt(const MFloat*, const MInt, const MInt)>> vec(noSolvers());
368 for(MInt i = 0; i < noSolvers(); i++) {
369 vec[i] = std::bind(&Solver::cellOutside, &solver(i), std::placeholders::_1, std::placeholders::_2,
370 std::placeholders::_3);
371 }
372 return vec;
373 }
374 std::vector<std::function<void()>> resizeGridMapVec() {
375 std::vector<std::function<void()>> vec(noSolvers());
376 for(MInt i = 0; i < noSolvers(); i++) {
377 vec[i] = std::bind(&Solver::resizeGridMap, &solver(i));
378 }
379 return vec;
380 }
381 std::vector<std::function<void(const MInt)>> removeCellVec() {
382 std::vector<std::function<void(const MInt)>> vec(noSolvers());
383 for(MInt i = 0; i < noSolvers(); i++) {
384 vec[i] = std::bind(&Solver::removeCell, &solver(i), std::placeholders::_1);
385 }
386 return vec;
387 }
388
389 // Restart
394};
395
396
397template <MInt nDim>
398Controller<nDim>::Controller(Grid* grid_, std::vector<std::unique_ptr<Solver>>* solvers,
399 std::vector<std::unique_ptr<Coupling>>* couplers)
400 : m_grid(grid_),
401 m_solvers(solvers),
402 m_refineCellSolver(refineCellVec()),
403 m_removeChildsSolver(removeChildsVec()),
404 m_swapProxySolver(swapProxyVec()),
405 m_cellOutside(cellOutsideVec()),
406 m_resizeGridMapSolver(resizeGridMapVec()),
407 m_removeCellSolver(removeCellVec()),
408 m_couplers(couplers) {
409 if(m_grid == nullptr) {
410 return;
411 }
412
413 // Read properties for adaptive mesh refinement
414
428 m_adaptation = false;
429 m_adaptation = Context::getBasicProperty<MBool>("adaptation", AT_, &m_adaptation);
430
444 m_adaptationInterval = Context::getBasicProperty<MInt>("adaptationInterval", AT_, &m_adaptationInterval);
445
460 m_adaptationStart = Context::getBasicProperty<MInt>("adaptationStart", AT_, &m_adaptationStart);
461
475 m_adaptationStop = std::numeric_limits<MInt>::max();
476 m_adaptationStop = Context::getBasicProperty<MInt>("adaptationStop", AT_, &m_adaptationStop);
477
491 m_noMaxAdaptations = std::numeric_limits<MInt>::max();
492 m_noMaxAdaptations = Context::getBasicProperty<MInt>("noMaxAdaptations", AT_, &m_noMaxAdaptations);
493
496
497 // Read properties for write-restart-file:
498 const MInt maxNoCell = gridb().maxNoCells();
500 Context::getBasicProperty<MBool>("useNonSpecifiedRestartFile", AT_, &m_useNonSpecifiedRestartFile);
501
503 m_outputDir = Context::getBasicProperty<MString>("outputDir", AT_);
504
505 m_recalcIds = (MInt*)nullptr;
506 mAlloc(m_recalcIds, maxNoCell, "m_recalcIds", -1, AT_);
507 for(MInt i = 0; i < maxNoCell; i++) {
508 m_recalcIds[i] = i;
509 }
510
512
513 initTimers();
514
515 m_syncTimeSteps = Context::getBasicProperty<MBool>("syncTimeSteps", AT_, &m_syncTimeSteps);
516}
517
518
520template <MInt nDim>
522 m_loadBalancingInterval = 0;
523 if(Context::propertyExists("onlineRestartInterval", -1)) {
524 if(domainId() == 0) {
525 std::cerr << "Property 'onlineRestartInterval' is deprecated, please rename it to "
526 "'loadBalancingInterval'."
527 << std::endl;
528 }
529 m_loadBalancingInterval = Context::getBasicProperty<MInt>("onlineRestartInterval", AT_, &m_loadBalancingInterval);
530 }
531
544 m_loadBalancingInterval = Context::getBasicProperty<MInt>("loadBalancingInterval", AT_, &m_loadBalancingInterval);
545
559 m_balance = (m_loadBalancingInterval > 0);
560 m_balance = Context::getBasicProperty<MBool>("balance", AT_, &m_balance);
561
562 m_forceBalance = 0;
563 // 0: force never
564 // 1: force once
565 // 2: force always
566 m_forceBalance = Context::getBasicProperty<MInt>("forceBalance", AT_, &m_forceBalance);
567
579 m_balanceAfterAdaptation = (m_adaptation && m_balance);
580 m_balanceAfterAdaptation = Context::getBasicProperty<MBool>("balanceAfterAdaptation", AT_, &m_balanceAfterAdaptation);
581
590 m_balanceAdaptationInterval = 1;
591 m_balanceAdaptationInterval =
592 Context::getBasicProperty<MInt>("balanceAfterAdaptationInterval", AT_, &m_balanceAdaptationInterval);
593
594 m_loadBalancingOffset = 0;
595 m_loadBalancingOffset = Context::getBasicProperty<MInt>("loadBalancingOffset", AT_, &m_loadBalancingOffset);
596
597 const MBool balanceOnlyAfterAdapt = (m_balanceAfterAdaptation && m_loadBalancingInterval <= 0);
598 if(balanceOnlyAfterAdapt) {
599 m_loadBalancingInterval = std::numeric_limits<MInt>::max();
600 }
601 m_balance = m_balance || m_balanceAfterAdaptation;
602
603 // Return if not enabled
604 if(!m_balance) {
605 // Note: always display imbalance even though balancing is not enabled
606 m_performanceOutput = true;
607 m_performanceOutput = Context::getBasicProperty<MBool>("performanceOutput", AT_, &m_performanceOutput);
608 m_loadBalancingMode = 1;
609 // Determine interval for performance output as power of 10
610 MInt interval = std::max((MInt)pow(10.0, std::floor(std::log10((MFloat)g_timeSteps / 10.0))), 1);
611 if(g_timeSteps / interval > 20.0) { // Increase interval to limit total number of evaluations
612 interval = interval * 5; // Now interval is in {10,50,100,500,1000,...}
613 }
614
615 m_loadBalancingInterval = std::max(interval, 10);
616 // Return if balance and performanceOutput are both disabled
617 if(!m_performanceOutput) {
618 m_balance = false;
619 m_log << "Performance output: disabled." << std::endl;
620 return;
621 } else {
622 m_balance = true; // Set s.t. balance() does not immediately return
623 m_log << "Performance output: enabled - every " << m_loadBalancingInterval << " time steps." << std::endl;
624 }
625 m_log << "Dynamic load balancing disabled." << std::endl;
626 return;
627 }
628
642 m_loadBalancingMode = 0;
643 m_loadBalancingMode = Context::getBasicProperty<MInt>("loadBalancingMode", AT_, &m_loadBalancingMode);
644
658 m_loadBalancingTimerMode = 0;
659 m_loadBalancingTimerMode = Context::getBasicProperty<MInt>("loadBalancingTimerMode", AT_, &m_loadBalancingTimerMode);
660
673 m_loadBalancingStartTimeStep = 0;
674 m_loadBalancingStartTimeStep =
675 Context::getBasicProperty<MInt>("loadBalancingStartTimeStep", AT_, &m_loadBalancingStartTimeStep);
676
690 m_loadBalancingStopTimeStep = std::numeric_limits<MInt>::max();
691 m_loadBalancingStopTimeStep =
692 Context::getBasicProperty<MInt>("loadBalancingStopTimeStep", AT_, &m_loadBalancingStopTimeStep);
693
707 m_loadBalancingTimerStartOffset = std::floor(0.2 * m_loadBalancingInterval);
708 // Set default timer start offset if balance is used only after adaptation
709 // TODO labels:DLB case with balance interval + balance after adapt?
710 if(balanceOnlyAfterAdapt) {
711 if(m_loadBalancingOffset == 0) {
712 m_loadBalancingTimerStartOffset = std::floor(0.2 * m_adaptationInterval);
713 } else {
714 m_loadBalancingTimerStartOffset = std::floor(0.2 * m_loadBalancingOffset);
715 }
716 }
717 m_loadBalancingTimerStartOffset =
718 Context::getBasicProperty<MInt>("loadBalancingTimerStartOffset", AT_, &m_loadBalancingTimerStartOffset);
719
720 // TODO labels:DLB add checks for timerStartOffset for adaptation/+balancingoffset
721 if(m_loadBalancingTimerStartOffset >= m_loadBalancingInterval) {
722 TERMM(1, "DLB timerStartOffset = " + std::to_string(m_loadBalancingTimerStartOffset)
723 + " must be smaller than dlb-interval = " + std::to_string(m_loadBalancingInterval) + ".");
724 }
725
740 m_forceLoadBalancing = Context::getBasicProperty<MBool>("forceLoadBalancing", AT_, &m_forceLoadBalancing);
741
755 m_testDynamicLoadBalancing = false;
756 m_testDynamicLoadBalancing =
757 Context::getBasicProperty<MBool>("testDynamicLoadBalancing", AT_, &m_testDynamicLoadBalancing);
758
767 m_testUpdatePartitionCells = false;
768 m_testUpdatePartitionCells =
769 Context::getBasicProperty<MBool>("testUpdatePartitionCells", AT_, &m_testUpdatePartitionCells);
770
771 if(m_testUpdatePartitionCells) {
780 m_testUpdatePartCellsOffspringThreshold = 10;
781 m_testUpdatePartCellsOffspringThreshold = Context::getBasicProperty<MInt>(
782 "testUpdatePartCellsOffspringThreshold", AT_, &m_testUpdatePartCellsOffspringThreshold);
783
792 m_testUpdatePartCellsWorkloadThreshold = 100.0;
793 m_testUpdatePartCellsWorkloadThreshold = Context::getBasicProperty<MFloat>(
794 "testUpdatePartCellsWorkloadThreshold", AT_, &m_testUpdatePartCellsWorkloadThreshold);
795
796 m_log << "Testing partition cell update before DLB: offspringThreshold = "
797 << m_testUpdatePartCellsOffspringThreshold
798 << "; workloadThreshold = " << m_testUpdatePartCellsWorkloadThreshold << std::endl;
799 }
800
814 m_debugBalance = false;
815 m_debugBalance = Context::getBasicProperty<MBool>("debugBalance", AT_, &m_debugBalance);
816
817 if(m_testDynamicLoadBalancing) {
818 // Force DLB for testcases and enable additional debug output
819 m_forceLoadBalancing = true;
820 m_debugBalance = true;
821 }
822
836 m_outputDlbTimings = false;
837 m_outputDlbTimings = Context::getBasicProperty<MBool>("outputDlbTimings", AT_, &m_outputDlbTimings);
838
839 m_log << "Dynamic load balancing activated (mode = " << m_loadBalancingMode
840 << ", interval = " << m_loadBalancingInterval << ", startTimeStep = " << m_loadBalancingStartTimeStep
841 << ", stopTimeStep = " << m_loadBalancingStopTimeStep
842 << ", timerStartOffset = " << m_loadBalancingTimerStartOffset << ", force = " << m_forceLoadBalancing
843 << ", test = " << m_testDynamicLoadBalancing << ")" << std::endl;
844
861 if(Context::propertyExists("dlbPartitionMethod", -1)) {
862 const MString dlbPartitionMethod = Context::getBasicProperty<MString>("dlbPartitionMethod", AT_);
863 m_dlbPartitionMethod = string2enum(dlbPartitionMethod);
864 }
865
866 // Use weighting method for testing DLB
867 if(m_testDynamicLoadBalancing) {
868 m_dlbPartitionMethod = DLB_PARTITION_WEIGHT;
869 }
870
879 m_dlbUpdatePartitionCells = false;
880 m_dlbUpdatePartitionCells =
881 Context::getBasicProperty<MBool>("dlbUpdatePartitionCells", AT_, &m_dlbUpdatePartitionCells);
882
883 m_useDomainFactor = false;
884 m_useDomainFactor = Context::getBasicProperty<MBool>("dlbPartitionDomainFactor", AT_, &m_useDomainFactor);
885
886 m_dlbSmoothGlobalShifts = true;
887 m_dlbSmoothGlobalShifts = Context::getBasicProperty<MBool>("dlbSmoothGlobalShifts", AT_, &m_dlbSmoothGlobalShifts);
888
889 m_dlbNoLocalShifts = 0;
890 m_dlbNoLocalShifts = Context::getBasicProperty<MInt>("dlbNoLocalShifts", AT_, &m_dlbNoLocalShifts);
891
892 m_dlbNoFinalLocalShifts = 0;
893 m_dlbNoFinalLocalShifts = Context::getBasicProperty<MInt>("dlbNoFinalLocalShifts", AT_, &m_dlbNoFinalLocalShifts);
894
895 // Note: use 0 or negative to disable max workload limitation
896 m_dlbMaxWorkloadLimit = 1.5;
897 m_dlbMaxWorkloadLimit = Context::getBasicProperty<MFloat>("dlbMaxWorkloadLimit", AT_, &m_dlbMaxWorkloadLimit);
898 if(m_dlbMaxWorkloadLimit > 0.0 && m_dlbMaxWorkloadLimit < 1.0) {
899 TERMM(1, "DLB: maximum workload limit needs to be > 1.0, is " + std::to_string(m_dlbMaxWorkloadLimit)
900 + "; or set to <= 0.0 to disable this feature.");
901 }
902
919 m_dlbImbalanceThreshold = 0.05;
920 m_dlbImbalanceThreshold = Context::getBasicProperty<MFloat>("dlbImbalanceThreshold", AT_, &m_dlbImbalanceThreshold);
921
933 m_maxPerformanceVarThreshold = 0.15;
934 m_maxPerformanceVarThreshold =
935 Context::getBasicProperty<MFloat>("maxPerformanceVarThreshold", AT_, &m_maxPerformanceVarThreshold);
936
937 m_lastLoadBalancingTimeStep = m_loadBalancingStartTimeStep;
938
939 if(m_loadBalancingMode == 1) {
940 m_log << "Dynamic load balancing: using partition method #" << m_dlbPartitionMethod
941 << "; imbalance percentage threshold: " << m_dlbImbalanceThreshold * 100.0
942 << "%; maxPerfVarThreshold: " << m_maxPerformanceVarThreshold << std::endl;
943 m_log << "DLB settings: domainFactor=" << m_useDomainFactor << "; noLocalShifts=" << m_dlbNoLocalShifts
944 << "; noFinalLocalShifts=" << m_dlbNoFinalLocalShifts << "; maxWorkloadLimit=" << m_dlbMaxWorkloadLimit
945 << "; smooth shifts=" << m_dlbSmoothGlobalShifts << std::endl;
946 }
947
948 m_dlbRestartWeights = false;
949 if(m_loadBalancingMode == 0 && m_dlbPartitionMethod == DLB_PARTITION_WEIGHT) {
950 m_dlbRestartWeights = true;
951 }
952 m_dlbRestartWeights = Context::getBasicProperty<MBool>("dlbRestartWeights", AT_, &m_dlbRestartWeights);
953
954 m_dlbStaticWeights = nullptr;
955 //-1: never apply static weights
956 // 0: always apply static weights
957 //>0: apply static weights after n-adaptations
958 m_dlbStaticWeightMode = -1;
959 if(Context::propertyExists("dlbStaticWeights", 0)) {
960 m_dlbStaticWeightMode = 0;
961 if(Context::propertyLength("dlbStaticWeights", 0) != globalNoLoadTypes()) {
962 cerr0 << "Dlb static Load size does not match!" << globalNoLoadTypes() << " "
963 << Context::propertyLength("dlbStaticWeights", 0) << std::endl;
964 } else {
965 mAlloc(m_dlbStaticWeights, globalNoLoadTypes(), "m_dlbStaticWeight", AT_);
966 for(MInt i = 0; i < globalNoLoadTypes(); i++) {
967 m_dlbStaticWeights[i] = Context::getBasicProperty<MFloat>("dlbStaticWeights", AT_, i);
968 }
969 }
970 m_dlbStaticWeightMode = Context::getBasicProperty<MInt>("dlbStaticWeightMode", AT_, &m_dlbStaticWeightMode);
971 }
972}
973
974
976template <MInt nDim>
978 NEW_TIMER_GROUP_NOCREATE(m_timerGroup, "GridController (noSolvers = " + std::to_string(noSolvers()) + ")");
979 m_timers.fill(-1);
980 NEW_TIMER_NOCREATE(m_timers[Timers::Controller], "total object lifetime", m_timerGroup);
981 RECORD_TIMER_START(m_timers[Timers::Controller]);
982
983 // Balancing timers
984 NEW_SUB_TIMER_NOCREATE(m_timers[Timers::DLB], "DLB", m_timers[Timers::Controller]);
985 NEW_SUB_TIMER_NOCREATE(m_timers[Timers::Trigger], "Trigger", m_timers[Timers::DLB]);
986 NEW_SUB_TIMER_NOCREATE(m_timers[Timers::Partition], "Partition", m_timers[Timers::DLB]);
987 NEW_SUB_TIMER_NOCREATE(m_timers[Timers::Prepare], "Prepare", m_timers[Timers::DLB]);
988 NEW_SUB_TIMER_NOCREATE(m_timers[Timers::BalanceGrid], "Balance grid", m_timers[Timers::DLB]);
989 NEW_SUB_TIMER_NOCREATE(m_timers[Timers::BalanceSolvers], "Balance solvers", m_timers[Timers::DLB]);
990 NEW_SUB_TIMER_NOCREATE(m_timers[Timers::BalanceCouplers], "Balance couplers", m_timers[Timers::DLB]);
991 NEW_SUB_TIMER_NOCREATE(m_timers[Timers::IO], "I/O", m_timers[Timers::DLB]);
992
993 // Adaptation timers
994 NEW_SUB_TIMER_NOCREATE(m_timers[Timers::Adaptation], "Adaptation", m_timers[Timers::Controller]);
995 NEW_SUB_TIMER_NOCREATE(m_timers[Timers::MeshAdaptation], "Mesh Adaptation", m_timers[Timers::Adaptation]);
996
997 const MInt noSolversAndCouplers = noSolvers() + noCouplers();
998 m_solverTimerGroups.resize(noSolversAndCouplers);
999 m_solverTimers.resize(noSolversAndCouplers);
1000
1001 for(MInt b = 0; b < noSolversAndCouplers; b++) {
1002 m_solverTimerGroups[b] = -1;
1003 m_solverTimers[b].fill(-1);
1004
1005 const MBool isSolver = (b < noSolvers());
1006 const MString groupName = (isSolver) ? "solverId = " + std::to_string(solver(b).solverId())
1007 : "couplerId = " + std::to_string(coupler(b - noSolvers()).couplerId());
1008
1009 NEW_TIMER_GROUP_NOCREATE(m_solverTimerGroups[b], "GridController: " + groupName);
1010 NEW_TIMER_NOCREATE(m_solverTimers[b][SolverTimers::Total], "total", m_solverTimerGroups[b]);
1011 const MInt solverTimer = m_solverTimers[b][SolverTimers::Total];
1012
1013 // Balancing timers
1014 NEW_SUB_TIMER_NOCREATE(m_solverTimers[b][SolverTimers::DLB], "Dynamic load balancing", solverTimer);
1015 const MInt dlbTimer = m_solverTimers[b][SolverTimers::DLB];
1016
1017 NEW_SUB_TIMER_NOCREATE(m_solverTimers[b][SolverTimers::Reset], "Reset solver", dlbTimer);
1018 NEW_SUB_TIMER_NOCREATE(m_solverTimers[b][SolverTimers::CalcDataSizes], "Determine data sizes", dlbTimer);
1019 const MInt dataSizeTimer = m_solverTimers[b][SolverTimers::CalcDataSizes];
1020
1021 NEW_SUB_TIMER_NOCREATE(m_solverTimers[b][SolverTimers::CalcDataSizesMpiBlocking], "initial blocking MPI",
1022 dataSizeTimer);
1023 NEW_SUB_TIMER_NOCREATE(m_solverTimers[b][SolverTimers::CalcDataSizesMpi], "remaining MPI", dataSizeTimer);
1024
1025 NEW_SUB_TIMER_NOCREATE(m_solverTimers[b][SolverTimers::Balance], "Balance", dlbTimer);
1026 const MInt balanceTimer = m_solverTimers[b][SolverTimers::Balance];
1027
1028 NEW_SUB_TIMER_NOCREATE(m_solverTimers[b][SolverTimers::Redistribute], "Redistribute data", balanceTimer);
1029 const MInt redistTimer = m_solverTimers[b][SolverTimers::Redistribute];
1030
1031 NEW_SUB_TIMER_NOCREATE(m_solverTimers[b][SolverTimers::RedistributeMpiBlocking], "initial blocking MPI",
1032 redistTimer);
1033 NEW_SUB_TIMER_NOCREATE(m_solverTimers[b][SolverTimers::RedistributeMpi], "remaining MPI", redistTimer);
1034
1035 NEW_SUB_TIMER_NOCREATE(m_solverTimers[b][SolverTimers::BalancePre], "Balance pre", balanceTimer);
1036
1037 NEW_SUB_TIMER_NOCREATE(m_solverTimers[b][SolverTimers::SetData], "Set data", balanceTimer);
1038 const MInt setDataTimer = m_solverTimers[b][SolverTimers::SetData];
1039
1040 NEW_SUB_TIMER_NOCREATE(m_solverTimers[b][SolverTimers::SetDataMpiBlocking], "initial blocking MPI", setDataTimer);
1041 NEW_SUB_TIMER_NOCREATE(m_solverTimers[b][SolverTimers::SetDataMpi], "remaining MPI", setDataTimer);
1042
1043 NEW_SUB_TIMER_NOCREATE(m_solverTimers[b][SolverTimers::BalancePost], "Balance post", balanceTimer);
1044
1045 NEW_SUB_TIMER_NOCREATE(m_solverTimers[b][SolverTimers::DlbOther], "Other", dlbTimer);
1046
1047 // Adaptation timers
1048 NEW_SUB_TIMER_NOCREATE(m_solverTimers[b][SolverTimers::Adaptation], "Adaptation", solverTimer);
1049 }
1050}
1051
1052
1056template <MInt nDim>
1057MBool Controller<nDim>::balance(const MBool force, const MBool finalTimeStep, const MBool adaptation) {
1058 // Return if not enabled or in serial
1059 if(!m_balance || noDomains() == 1) {
1060 return false;
1061 }
1062
1063 // Reset balance status
1064 gridb().m_wasBalanced = false;
1065
1066 std::vector<std::pair<MFloat, MString>> durations{};
1067 auto logDuration = [&durations](const MFloat time, const MString comment, const MBool fromTimer = false) {
1068 const MFloat duration = (fromTimer) ? time : wallTime() - time;
1069 durations.push_back(std::make_pair(duration, comment));
1070 };
1071
1072 RECORD_TIMER_START(m_timers[Timers::DLB]);
1073
1074 const MFloat dlbStartTime = wallTime();
1075 const MInt oldAllocatedBytes = allocatedBytes();
1076
1077 const MInt noDlbTimers = maia::dlb::g_dlbTimerController.noDlbTimers();
1078 if(noDlbTimers == 0 && m_loadBalancingMode == 1 && !m_testDynamicLoadBalancing) {
1079 std::cerr << "There are no DLB timers, but loadBalancingMode is 1 and testing is off; "
1080 "switching to loadBalancingMode = 0."
1081 << std::endl;
1082 m_loadBalancingMode = 0;
1083 }
1084
1085 // Accumulate timer records of all dlb timers
1086 MFloat localRunTime = 0.0;
1087 MFloat localIdleTime = 0.0;
1088
1089 for(MInt i = 0; i < noDlbTimers; i++) {
1090 const MFloat loadRecord = maia::dlb::g_dlbTimerController.returnLoadRecord(i, m_loadBalancingTimerMode);
1091 const MFloat idleRecord = maia::dlb::g_dlbTimerController.returnIdleRecord(i, m_loadBalancingTimerMode);
1092
1093 if(m_loadBalancingMode == 1 && !m_testDynamicLoadBalancing && (loadRecord < 0.0 || idleRecord < 0.0)) {
1094 TERMM(1, "Load/Idle record for dlb timer #" + std::to_string(i) + " is less than zero on global domain #"
1095 + std::to_string(domainId()) + ": " + std::to_string(loadRecord) + ", "
1096 + std::to_string(idleRecord));
1097 }
1098
1099 localRunTime += loadRecord;
1100 localIdleTime += idleRecord;
1101 }
1102
1103 const MInt timeStep = globalTimeStep;
1104 if(!adaptation) { // Skip storing timings for call from adaptation
1105 // Runtime and idle-time of last step
1106 const MFloat localRunTimeLastStep = localRunTime - m_dlbPreviousLocalRunTime;
1107 const MFloat localIdleTimeLastStep = localIdleTime - m_dlbPreviousLocalIdleTime;
1108
1109 // Store current timings for next evaluation
1110 m_dlbPreviousLocalRunTime = localRunTime;
1111 m_dlbPreviousLocalIdleTime = localIdleTime;
1112
1113 // Store timing for imbalance evaluation
1114 m_dlbTimings.push_back(localRunTimeLastStep);
1115
1116 // Store runtime and idle time for performance evaluations
1117 if(m_outputDlbTimings) {
1118 m_dlbTimeStepsAll.push_back(timeStep);
1119 m_dlbRunTimeAll.push_back(localRunTimeLastStep);
1120 m_dlbIdleTimeAll.push_back(localIdleTimeLastStep);
1121
1122 // Write timings to disk at final time step
1123 if(finalTimeStep) {
1124 storeTimings();
1125 }
1126 }
1127 }
1128
1129 // Check if this is a timer reset time step
1130 MBool resetTimeStep = (m_lastLoadBalancingTimeStep + m_loadBalancingTimerStartOffset == timeStep
1131 || timeStep == m_loadBalancingStartTimeStep + m_loadBalancingTimerStartOffset);
1132
1133 // TODO labels:DLB @ansgar FIXME
1134 if(m_balanceAfterAdaptation) {
1135 // resetTimeStep = ((timeStep - m_loadBalancingTimerStartOffset) % m_loadBalancingInterval == 0
1136 resetTimeStep = resetTimeStep || (timeStep - m_lastAdaptationTimeStep == m_loadBalancingTimerStartOffset);
1137 // Reset timer at adaptation time step (if this is no DLB step, see below)
1138 resetTimeStep = resetTimeStep || (timeStep - m_lastAdaptationTimeStep == 0);
1139 }
1140
1141 MBool dlbTimeStep = ((timeStep - m_loadBalancingOffset) % m_loadBalancingInterval == 0);
1142 if(m_balanceAfterAdaptation) {
1143 dlbTimeStep = dlbTimeStep
1144 || (((timeStep - m_lastAdaptationTimeStep) == m_loadBalancingOffset)
1145 && (m_nAdaptationsSinceBalance >= m_balanceAdaptationInterval || m_dlbStep < 2)
1146 && (m_loadBalancingStopTimeStep < 0 || globalTimeStep < m_loadBalancingStopTimeStep));
1147 }
1148
1149 if(resetTimeStep && !dlbTimeStep) {
1150 // reset all timer
1151 resetAllTimer();
1152
1153 // ... and return
1154 RECORD_TIMER_STOP(m_timers[Timers::DLB]);
1155 return false;
1156 }
1157
1158 // Quit early if balancing is not forced and interval does not match
1159 if(!force && !finalTimeStep && (m_loadBalancingInterval <= 0 || !dlbTimeStep)) {
1160 RECORD_TIMER_STOP(m_timers[Timers::DLB]);
1161 return false;
1162 }
1163
1164 // const MBool atDlbInterval = (timeStep % m_loadBalancingInterval == 0);
1165 const MBool initialBalance = (timeStep == -1 && force);
1166 const MBool initialAdaptationOnly = (timeStep == -1 && m_performanceOutput);
1167 const MBool afterLastDlbStep = (timeStep > m_loadBalancingStopTimeStep && m_loadBalancingStopTimeStep != -1);
1168 const MBool beforeFirstDlbStep = (timeStep <= m_loadBalancingStartTimeStep);
1169 // Performance output step without load balancing
1170 const MBool performanceOutput =
1171 (m_performanceOutput || finalTimeStep || afterLastDlbStep || beforeFirstDlbStep) && !initialBalance;
1172
1173 // Return if this is not a DLB time step and not the initial balancing
1174 if(!dlbTimeStep && (!initialBalance || initialAdaptationOnly)) {
1175 // Continue if forced or at final time step to determine imbalance before returning
1176 if((!force && !finalTimeStep) || initialAdaptationOnly) {
1177 RECORD_TIMER_STOP(m_timers[Timers::DLB]);
1178 return false;
1179 }
1180 }
1181
1182 if(!performanceOutput) {
1183 m_log << "Dynamic load balancing at time step " << timeStep << std::endl;
1184 cerr0 << "=== Dynamic load balancing at time step " << timeStep << std::endl;
1185 }
1186
1187 logTimerStatistics("before balance");
1188
1189 // Determine if this is the last DLB step
1190 const MBool lastDlbStep =
1191 (timeStep + m_loadBalancingInterval > m_loadBalancingStopTimeStep && m_loadBalancingStopTimeStep != -1)
1192 && !m_balanceAfterAdaptation;
1193 // Determine if this is a DLB step at which the partitioning should be reverted to the best so far
1194 // to improve it by using local shifts only
1195 const MInt isDlbRevertStep =
1196 (m_loadBalancingMode == 1 && !lastDlbStep && m_loadBalancingStopTimeStep != -1 && m_dlbNoFinalLocalShifts > 0
1197 && timeStep == m_loadBalancingStopTimeStep - m_loadBalancingInterval * (m_dlbNoFinalLocalShifts + 1));
1198
1199
1200 // Determine the number of timesteps that are timed
1201 // MInt noTimeSteps = timeStep - m_lastLoadBalancingTimeStep - m_loadBalancingTimerStartOffset;
1202 MInt noTimeSteps = timeStep - m_dlbLastResetTimeStep;
1203 if(timeStep < 0) noTimeSteps = 0;
1204
1205 if(m_loadBalancingMode == 1) {
1206 ASSERT(noTimeSteps > 0, "ERROR: noTimeSteps = " + std::to_string(noTimeSteps));
1207 }
1208
1209 if(noTimeSteps != (MInt)m_dlbTimings.size()) {
1210 m_log << "DLB: Number of timings does not match: " + std::to_string(m_dlbTimings.size()) + " "
1211 + std::to_string(noTimeSteps) + " (beforeFirstStep = " + std::to_string(beforeFirstDlbStep)
1212 + "; afterLastDlbStep = " + std::to_string(afterLastDlbStep) + ")"
1213 << std::endl;
1214 std::cerr << "WARNING: number of timings mismatch! " << noTimeSteps << " " << m_dlbTimings.size() << std::endl;
1215 noTimeSteps = m_dlbTimings.size();
1216 }
1217
1218
1219 std::vector<MFloat> timings(m_dlbTimings.begin(), m_dlbTimings.end());
1220 // Sort timings for evaluation
1221 std::sort(timings.begin(), timings.end());
1222 const MInt noSamples = timings.size();
1223 if(noSamples < 1 && !initialBalance) {
1224 TERMM(1, "ERROR: number of samples is < 1");
1225 }
1226
1227 const MBool truncatedMean = !m_balanceAfterAdaptation;
1228 // Compute 25% truncated mean for computing loads for static grids
1229 const MFloat trim = 0.25;
1230 const MInt lowerBound = std::floor(trim * noSamples);
1231 const MInt upperBound = std::max(MInt(std::floor((1 - trim) * noSamples)), std::min(noSamples, 1));
1232 const MInt noSamplesTruncated = upperBound - lowerBound;
1233
1234 if(m_loadBalancingMode == 1 && noSamplesTruncated < 1 && !initialBalance) {
1235 std::cerr << "ERROR no samples truncated " << noSamplesTruncated << std::endl;
1236 /* return 0; */
1237 }
1238
1239 const MFloat truncatedMeanRunTime =
1240 std::accumulate(&timings[lowerBound], &timings[upperBound], 0.0) / noSamplesTruncated;
1241 const MFloat meanRunTime = std::accumulate(timings.begin(), timings.end(), 0.0) / noSamples;
1242 const MFloat meanRunTimeTrigger = (truncatedMean) ? truncatedMeanRunTime : meanRunTime;
1243
1244 /* const MFloat maxLocalRunTime = *std::max_element(timings.begin(), timings.end()); */
1245 /* const MFloat minLocalRunTime = *std::min_element(timings.begin(), timings.end()); */
1246
1247 // Compute average time per step (all timings)
1248 MFloat timePerStep = (localRunTime + localIdleTime) / noTimeSteps;
1249 MFloat maxRunTime = localRunTime / noTimeSteps;
1250 const MFloat localTimePerStep = timePerStep;
1251 // Communicate and take maximum value -> assure same value on all domains
1252 MPI_Allreduce(MPI_IN_PLACE, &timePerStep, 1, MPI_DOUBLE, MPI_MAX, gridb().mpiComm(), AT_, "MPI_IN_PLACE",
1253 "timePerStep");
1254 MPI_Allreduce(MPI_IN_PLACE, &maxRunTime, 1, MPI_DOUBLE, MPI_MAX, gridb().mpiComm(), AT_, "MPI_IN_PLACE",
1255 "maxRunTime");
1256
1257 MFloat maxIdleTime = localIdleTime / noTimeSteps;
1258 MFloat minIdleTime = localIdleTime / noTimeSteps;
1259 MPI_Allreduce(MPI_IN_PLACE, &maxIdleTime, 1, MPI_DOUBLE, MPI_MAX, gridb().mpiComm(), AT_, "MPI_IN_PLACE",
1260 "maxIdleTime");
1261 MPI_Allreduce(MPI_IN_PLACE, &minIdleTime, 1, MPI_DOUBLE, MPI_MIN, gridb().mpiComm(), AT_, "MPI_IN_PLACE",
1262 "minIdleTime");
1263
1264 const MFloat minIdleTimeRel = minIdleTime / timePerStep;
1265
1266 m_log << timeStep << " * Average time per step: " << timePerStep << " ; local: " << localTimePerStep
1267 << ", idle/comp = " << localIdleTime / localRunTime
1268 << ", idle/timePerStep = " << localIdleTime / (noTimeSteps * timePerStep) << std::endl;
1269 m_log << timeStep << " * Relative idle time: max = " << maxIdleTime / timePerStep << ", min "
1270 << minIdleTime / timePerStep << std::endl;
1271 m_log << timeStep << " * maxRunTime " << maxRunTime << std::endl;
1272
1273 logDuration(dlbStartTime, "DLB preparation/timers");
1274
1275 const MFloat imbalanceStartTime = wallTime();
1276 RECORD_TIMER_START(m_timers[Timers::Trigger]);
1277 MFloat imbalance = -1.0;
1278 ScratchSpace<MFloat> loads(noDomains(), FUN_, "loads");
1279 MBool loadBalance = initialBalance;
1280 if(!loadBalance) { // Skip this for initial balance since there are no timings
1281 loadBalance = needLoadBalancing(meanRunTimeTrigger, &loads[0], imbalance) || force || (m_forceBalance > 0);
1282 }
1283 if(m_forceBalance == 1) {
1284 m_forceBalance = 0;
1285 }
1286 RECORD_TIMER_STOP(m_timers[Timers::Trigger]);
1287 logDuration(imbalanceStartTime, "Imbalance evaluation");
1288
1289 // Determine variance of timings to detect performance variations
1290 // TODO labels:CONTROLLER,DLB exclude ranks with low load?
1291 MFloat localRunTimeVariance = 0.0;
1292 for(MInt i = 0; i < noSamples; i++) {
1293 localRunTimeVariance += POW2(timings[i] - meanRunTime);
1294 }
1295 localRunTimeVariance /= noSamples;
1296 const MFloat localRunTimeStdev = std::sqrt(localRunTimeVariance);
1297
1298 const MFloat performanceVariation = localRunTimeStdev / meanRunTime;
1299 MFloat perfVarMax[2];
1300 perfVarMax[0] = performanceVariation;
1301 // Note: use only ranks above a specific load
1302 perfVarMax[1] = (loads[domainId()] > 0.85) ? performanceVariation : 0.0;
1303
1304 MPI_Allreduce(MPI_IN_PLACE, &perfVarMax, 2, type_traits<MFloat>::mpiType(), MPI_MAX, gridb().mpiComm(), AT_,
1305 "MPI_IN_PLACE", "perfVarMax");
1306 const MFloat maxPerformanceVariation = perfVarMax[1];
1307
1308 const MBool performanceVariationCheck = (maxPerformanceVariation > m_maxPerformanceVarThreshold && !m_adaptation);
1309
1310 m_log << timeStep << " * DLB: Performance variation: load>0.85 " << maxPerformanceVariation << "; all "
1311 << perfVarMax[0] << std::endl;
1312
1313 m_log << timeStep << " * Average timings: avgRunTime = " << localRunTime / noTimeSteps
1314 << ", truncated = " << truncatedMean << " " << truncatedMeanRunTime << ", diff "
1315 << meanRunTime - truncatedMeanRunTime << ", noTimeSteps = " << noTimeSteps << std::endl;
1316
1317 m_log << timeStep << " * timePerStep = " << timePerStep << "; imbalance = " << imbalance
1318 << "; maxRunTime = " << maxRunTime << "; minIdleTimeRel = " << minIdleTimeRel << std::endl;
1319
1320 if(m_loadBalancingMode == 1 && performanceVariationCheck && !lastDlbStep && !performanceOutput
1321 && !m_testDynamicLoadBalancing) {
1322 /* std::cerr << domainId() << " performance variation " << minLocalRunTime << " " */
1323 /* << maxLocalRunTime << " " << localRunTimeVariance << " " << localRunTimeStdev */
1324 /* << " " << localRunTimeStdev/meanRunTime << std::endl; */
1325
1326 std::stringstream perfMessage;
1327 perfMessage << "DLB: Performance variation " << maxPerformanceVariation << ", skip DLB step." << std::endl;
1328 m_log << perfMessage.str();
1329 if(domainId() == 0) {
1330 std::cout << perfMessage.str();
1331 }
1332
1333 resetAllTimer();
1334
1335 m_lastLoadBalancingTimeStep = timeStep;
1336 m_nAdaptationsSinceBalance = 0;
1337 RECORD_TIMER_STOP(m_timers[Timers::DLB]);
1338 return false;
1339 }
1340
1341 if(m_loadBalancingMode == 0) {
1342 m_lastLoadBalancingTimeStep = timeStep;
1343 m_nAdaptationsSinceBalance = 0;
1344 }
1345
1346 // Check if the current time per timestep is the best one so far. If so, store the new best
1347 // time/timestep and the corresponding local domain offset. (Note: includes idle times, also the
1348 // imbalance should not be too large, accept also better imbalance with slightly increased time
1349 // per step as the latter might be due e.g. network latencies and slower communication)
1350 MBool newBestTimePerStep = false;
1351 if(m_loadBalancingMode == 1 && !performanceOutput && !performanceVariationCheck && !m_testDynamicLoadBalancing
1352 && ((timePerStep < m_timePerStepTotal && imbalance < 1.05 * m_imbalance)
1353 || (imbalance < m_imbalance && timePerStep < 1.05 * m_timePerStepTotal))) {
1354 if(m_optPartitionCellOffsetTotal == -1) {
1355 m_timePerStepTotal = -1.0;
1356 m_imbalance = -1.0;
1357 }
1358 m_log << timeStep << " * Storing new best domain partitioning: timePerStep = " << timePerStep << " ("
1359 << m_timePerStepTotal << "); imbalance = " << imbalance << " (" << m_imbalance << "), maxRunTime "
1360 << maxRunTime << " minIdleTimeRel " << minIdleTimeRel << std::endl;
1361 newBestTimePerStep = true;
1362 m_timePerStepTotal = timePerStep;
1363 m_imbalance = imbalance;
1364 m_optPartitionCellOffsetTotal = gridb().m_localPartitionCellOffsets[0];
1365
1366 RECORD_TIMER_START(m_timers[Timers::IO]);
1367 // Store partition file for restarting with the same domain offsets on the
1368 // same number of domains
1369 // This will overwrite the partition_n[noDomains].[ext] file every time a
1370 // better partitioning is found.
1371 std::stringstream partitionFileName;
1372 partitionFileName << "partition_n" << noDomains();
1373 gridb().savePartitionFile(partitionFileName.str(), m_optPartitionCellOffsetTotal);
1374 RECORD_TIMER_STOP(m_timers[Timers::IO]);
1375 } else if(m_testDynamicLoadBalancing && finalTimeStep) {
1376 // Always write partition file at final time step for testing
1377 std::stringstream partitionFileName;
1378 partitionFileName << "partition_n" << noDomains();
1379 gridb().savePartitionFile(partitionFileName.str(), gridb().m_localPartitionCellOffsets[0]);
1380 }
1381
1382 // Return if nothing needs to be done
1383 if(!loadBalance && !m_forceLoadBalancing && (isDlbRevertStep == 0)) {
1384 m_log << " * no load imbalance detected at timestep " << timeStep << "!" << std::endl;
1385 // Continue if this is the last DLB step and the time per step is worse than
1386 // the best one, else return here
1387 if(!(lastDlbStep && !newBestTimePerStep)) {
1388 RECORD_TIMER_STOP(m_timers[Timers::DLB]);
1389 return false;
1390 }
1391 } else {
1392 if(!performanceOutput) {
1393 m_log << " * load imbalance detected at timestep " << timeStep << "!" << std::endl;
1394 }
1395 }
1396
1397 // Return here for final time step/or performance output after determining imbalance etc.
1398 if(performanceOutput) {
1399 // clear timings such that only the next x timesteps are used for the following performance
1400 // output
1401 m_log << "Performance evaluation: clear timings at timestep " << timeStep << "!" << std::endl;
1402
1403 resetAllTimer();
1404
1405 RECORD_TIMER_STOP(m_timers[Timers::DLB]);
1406 return false;
1407 }
1408
1409 // Determine imbalance statistics *BEFORE* online restart
1410 printDomainStatistics("before load balancing");
1411
1412 // reset newMinLeven for balancing, otherwise cells with zero noOffsprings occour!
1413 MInt backupLevel = -1;
1414 if(gridb().m_newMinLevel > 0) {
1415 backupLevel = gridb().m_newMinLevel;
1416 gridb().m_newMinLevel = -1;
1417 }
1418
1419 gridb().computeGlobalIds();
1420 gridb().storeMinLevelCells();
1421
1422 // Cancel any open MPI (receive) requests already opened to allow for interleaved execution, since
1423 // these might lead to conflicting messages
1424 for(MInt i = 0; i < noSolvers(); i++) {
1425 RECORD_TIMER_START(m_solverTimers[i][SolverTimers::Total]);
1426 RECORD_TIMER_START(m_solverTimers[i][SolverTimers::DLB]);
1427
1428 RECORD_TIMER_START(m_solverTimers[i][SolverTimers::DlbOther]);
1429 solver(i).cancelMpiRequests();
1430 RECORD_TIMER_STOP(m_solverTimers[i][SolverTimers::DlbOther]);
1431
1432 RECORD_TIMER_STOP(m_solverTimers[i][SolverTimers::DLB]);
1433 RECORD_TIMER_STOP(m_solverTimers[i][SolverTimers::Total]);
1434 }
1435
1436 MBool partitionCellChange = false;
1437 const MFloat updatePartCellsStartTime = wallTime();
1438 // If enabled: Check if the partition cells need to be updated/changed, i.e., decrease/increase
1439 // the partition level shift
1440 //
1441 // Note: in case of a change in the partition cells the balancing needs to be performed (even if
1442 // the determined new partitioning is the same as after the partition cell update)
1443 // TODO labels:CONTROLLER,DLB last DLB or revert step!
1444 if(m_dlbUpdatePartitionCells && !lastDlbStep) {
1445 MInt offspringThreshold = gridb().m_partitionCellOffspringThreshold;
1446 MFloat workloadThreshold = gridb().m_partitionCellWorkloadThreshold;
1447
1448 // For testing: set different thresholds to force partition cell changes
1449 if(m_testUpdatePartitionCells && m_dlbStep % 2 == 0) {
1450 offspringThreshold = m_testUpdatePartCellsOffspringThreshold;
1451 workloadThreshold = m_testUpdatePartCellsWorkloadThreshold;
1452 }
1453
1454 // Update partition cells
1455 partitionCellChange = gridb().updatePartitionCells(offspringThreshold, workloadThreshold);
1456
1457 if(partitionCellChange) {
1458 // Grid with new partition cells will be written with next restart files
1459 m_saveGridNewPartitionCells = true;
1460
1461 // Reset best found partitioning since not useful anymore with changed partition cells
1462 m_optPartitionCellOffsetTotal = -1;
1463 }
1464 }
1465 logDuration(updatePartCellsStartTime, "Update partition cells");
1466
1467 // Storage for new partitioning
1468 MLongScratchSpace partitionCellOffsets(noDomains() + 1, AT_, "partitionCellOffsets");
1469 MLongScratchSpace globalIdOffsets(noDomains() + 1, AT_, "globalIdOffsets");
1470
1471 const MFloat partitionStartTime = wallTime();
1472 // Determine new partitioning based on load balancing mode
1473 if(m_loadBalancingMode == 0 || timeStep == -1) { // Use mode 0 always for initial balance
1474 RECORD_TIMER_START(m_timers[Timers::Partition]);
1475 accumulateCellWeights();
1476
1477 // Compute new domain decomposition
1478 partition(&partitionCellOffsets[0], &globalIdOffsets[0], false);
1479
1480 // Increase DLB step
1481 m_dlbStep++;
1482
1483 RECORD_TIMER_STOP(m_timers[Timers::Partition]);
1484 } else {
1485 // Static mode (i.e. fixed number of DLB steps)
1486 RECORD_TIMER_START(m_timers[Timers::Partition]);
1487
1488 MBool newPartition = false;
1489 // Check if this is the last DLB step
1490 if((!lastDlbStep || m_testDynamicLoadBalancing) && ((isDlbRevertStep == 0) || newBestTimePerStep)) {
1491 // Determine new partitioning, i.e. new partition cell offsets and domain offsets
1492 newPartition = loadBalancingPartition(&loads[0], imbalance, &partitionCellOffsets[0], &globalIdOffsets[0]);
1493 } else {
1494 // Check if the current configuration is not the best one (wrt. time per timestep)
1495 if(((!newBestTimePerStep && !m_adaptation) || (isDlbRevertStep != 0)) && m_optPartitionCellOffsetTotal != -1) {
1496 m_log << timeStep << " * DLB reverting to best configuration." << std::endl;
1497 // Last DLB step: since there is still some load imbalance, revert to the best partitioning
1498 // found
1499
1500 // Gather current partition cell offsets
1501 MLongScratchSpace localPartitionCellOffsets(noDomains() + 1, AT_, "localPartitionCellOffsets");
1502 MPI_Allgather(&gridb().m_localPartitionCellOffsets[0], 1, type_traits<MLong>::mpiType(),
1503 &localPartitionCellOffsets[0], 1, type_traits<MLong>::mpiType(), gridb().mpiComm(), AT_,
1504 "gridb().m_localPartitionCellOffsets[0]", "localPartitionCellOffsets[0]");
1505 localPartitionCellOffsets[noDomains()] = gridb().m_noPartitionCellsGlobal;
1506
1507 // Gather 'optimal' partition cell offsets
1508 MPI_Allgather(&m_optPartitionCellOffsetTotal, 1, type_traits<MLong>::mpiType(), &partitionCellOffsets[0], 1,
1509 type_traits<MLong>::mpiType(), gridb().mpiComm(), AT_, "m_optPartitionCellOffsetTotal",
1510 "partitionCellOffsets[0]");
1511 partitionCellOffsets[noDomains()] = gridb().m_noPartitionCellsGlobal;
1512
1513 // Check if this is a new partitioning, i.e. different from the old one
1514 newPartition =
1515 (std::mismatch(partitionCellOffsets.begin(), partitionCellOffsets.end(), &localPartitionCellOffsets[0]))
1516 .first
1517 != partitionCellOffsets.end();
1518
1519 loadBalancingCalcNewGlobalOffsets(&localPartitionCellOffsets[0], &partitionCellOffsets[0], &globalIdOffsets[0]);
1520 } else {
1521 // Current partitioning is accepted
1522 // Note: this will not work if m_forceLoadBalancing is activated, since
1523 // the number of cells to send, etc. are not determined.
1524 newPartition = false;
1525 }
1526 }
1527
1528 RECORD_TIMER_STOP(m_timers[Timers::Partition]);
1529
1530 // Return here if partitioning did not change
1531 if(!newPartition && !partitionCellChange && !m_forceLoadBalancing) {
1532 m_log << "Dynamic load balancing: load imbalance detected but partition did not change at "
1533 "timestep "
1534 << timeStep << "!" << std::endl;
1535 RECORD_TIMER_STOP(m_timers[Timers::DLB]);
1536 return false;
1537 }
1538 }
1539 logDuration(partitionStartTime, "New partitioning");
1540
1541 RECORD_TIMER_START(m_timers[Timers::Prepare]);
1542 const MFloat prepareStartTime = wallTime();
1543
1544 // Disable all running DLB timers and store status
1545 ScratchSpace<MBool> dlbTimersStatus(std::max(noDlbTimers, 1), AT_, "dlbTimersStatus");
1547
1548 // Reset solvers before load balancing
1549 for(MInt i = 0; i < noSolvers(); i++) {
1550 const MFloat resetStartTime = wallTime();
1551 RECORD_TIMER_START(m_solverTimers[i][SolverTimers::Total]);
1552 RECORD_TIMER_START(m_solverTimers[i][SolverTimers::DLB]);
1553
1554 RECORD_TIMER_START(m_solverTimers[i][SolverTimers::Reset]);
1555 solver(i).resetSolver();
1556 RECORD_TIMER_STOP(m_solverTimers[i][SolverTimers::Reset]);
1557
1558
1559 RECORD_TIMER_START(m_solverTimers[i][SolverTimers::DlbOther]);
1560 // Communicate global solver variables from current local rank 0 to all domains
1561 communicateGlobalSolverVars(&solver(i));
1562 RECORD_TIMER_STOP(m_solverTimers[i][SolverTimers::DlbOther]);
1563
1564 RECORD_TIMER_STOP(m_solverTimers[i][SolverTimers::DLB]);
1565 RECORD_TIMER_STOP(m_solverTimers[i][SolverTimers::Total]);
1566 logDuration(resetStartTime, " Reset solver #" + std::to_string(i));
1567 }
1568
1569 const MFloat prepareGridStartTime = wallTime();
1570 gridb().deletePeriodicConnection(false);
1571
1572 std::vector<MInt>().swap(gridb().m_minLevelCells);
1573
1574 gridb().localToGlobalIds();
1575
1576 const MInt oldNoCells = gridb().treeb().size();
1577
1578 MIntScratchSpace noCellsToReceiveByDomain(noDomains() + 1, AT_, "noCellsToReceiveByDomain");
1579 MIntScratchSpace noCellsToSendByDomain(noDomains() + 1, AT_, "noCellsToSendByDomain");
1580 // Mapping: gridCellId -> sort index in buffer
1581 MIntScratchSpace sortedCellId(oldNoCells, AT_, "sortedCellId");
1582 // Reverse mapping: sort index in buffer -> gridCellId
1583 MIntScratchSpace bufferIdToCellId(oldNoCells, AT_, "bufferIdToCellId");
1584 bufferIdToCellId.fill(-1);
1585
1586 loadBalancingCalcNoCellsToSend(&globalIdOffsets[0], &noCellsToSendByDomain[0], &noCellsToReceiveByDomain[0],
1587 &sortedCellId[0], &bufferIdToCellId[0]);
1588 logDuration(prepareGridStartTime, " Prepare grid");
1589
1590 std::vector<std::vector<MInt>> dataSendSize{};
1591 std::vector<std::vector<MInt>> dataRecvSize{};
1592
1593 // Determine communication data sizes for each solver
1594 for(MInt i = 0; i < noSolvers(); i++) {
1595 const MFloat dataStartTime = wallTime();
1596 RECORD_TIMER_START(m_solverTimers[i][SolverTimers::Total]);
1597 RECORD_TIMER_START(m_solverTimers[i][SolverTimers::DLB]);
1598
1599 RECORD_TIMER_START(m_solverTimers[i][SolverTimers::CalcDataSizes]);
1600 determineDataSizesDlb(i, 0, &noCellsToSendByDomain[0], &bufferIdToCellId[0], dataSendSize, dataRecvSize);
1601 RECORD_TIMER_STOP(m_solverTimers[i][SolverTimers::CalcDataSizes]);
1602
1603 RECORD_TIMER_START(m_solverTimers[i][SolverTimers::DlbOther]);
1604 // Change local to global ids here, since local ids previously needed to determine data sizes
1605 solver(i).localToGlobalIds();
1606 RECORD_TIMER_STOP(m_solverTimers[i][SolverTimers::DlbOther]);
1607
1608 RECORD_TIMER_STOP(m_solverTimers[i][SolverTimers::DLB]);
1609 RECORD_TIMER_STOP(m_solverTimers[i][SolverTimers::Total]);
1610 logDuration(dataStartTime, " Data sizes solver #" + std::to_string(i));
1611 }
1612
1613
1614 std::vector<std::vector<MInt>> dataSendSizeCoupler{};
1615 std::vector<std::vector<MInt>> dataRecvSizeCoupler{};
1616
1617 // Determine communication data sizes for each coupler
1618 for(MInt i = 0; i < noCouplers(); i++) {
1619 const MFloat dataStartTime = wallTime();
1620 const MInt timerId = noSolvers() + i;
1621 RECORD_TIMER_START(m_solverTimers[timerId][SolverTimers::Total]);
1622 RECORD_TIMER_START(m_solverTimers[timerId][SolverTimers::DLB]);
1623
1624 RECORD_TIMER_START(m_solverTimers[timerId][SolverTimers::CalcDataSizes]);
1625 determineDataSizesDlb(i, 1, &noCellsToSendByDomain[0], &bufferIdToCellId[0], dataSendSizeCoupler,
1626 dataRecvSizeCoupler);
1627 RECORD_TIMER_STOP(m_solverTimers[timerId][SolverTimers::CalcDataSizes]);
1628
1629 RECORD_TIMER_STOP(m_solverTimers[timerId][SolverTimers::DLB]);
1630 RECORD_TIMER_STOP(m_solverTimers[timerId][SolverTimers::Total]);
1631 logDuration(dataStartTime, " Data sizes coupler #" + std::to_string(i));
1632 }
1633
1634
1635 RECORD_TIMER_STOP(m_timers[Timers::Prepare]);
1636 logDuration(prepareStartTime, "Prepare balancing");
1637
1638 const MFloat balanceGridStartTime = wallTime();
1639 RECORD_TIMER_START(m_timers[Timers::BalanceGrid]);
1640 // Rebalance grid
1641 gridb().balance(&noCellsToReceiveByDomain[0], &noCellsToSendByDomain[0], &sortedCellId[0], &partitionCellOffsets[0],
1642 &globalIdOffsets[0]);
1643 RECORD_TIMER_STOP(m_timers[Timers::BalanceGrid]);
1644 logDuration(balanceGridStartTime, "Balance grid");
1645
1646 RECORD_TIMER_START(m_timers[Timers::BalanceSolvers]);
1647 const MFloat balanceSolversStartTime = wallTime();
1648 // Rebalance solvers
1649 for(MInt i = 0; i < noSolvers(); i++) {
1650 RECORD_TIMER_START(m_solverTimers[i][SolverTimers::Total]);
1651 RECORD_TIMER_START(m_solverTimers[i][SolverTimers::DLB]);
1652
1653 RECORD_TIMER_START(m_solverTimers[i][SolverTimers::Balance]);
1654 const MFloat balanceSolverStartTime = wallTime();
1655
1656 if(!solver(i).hasSplitBalancing()) {
1657 solver(i).balance(&noCellsToReceiveByDomain[0], &noCellsToSendByDomain[0], &sortedCellId[0], oldNoCells);
1658 } else {
1659 // Variables to store pointers to allocated memory
1660 std::vector<MInt*> intDataRecv{};
1661 std::vector<MLong*> longDataRecv{};
1662 std::vector<MFloat*> floatDataRecv{};
1663 std::vector<MInt> dataTypes{};
1664
1665 // TODO labels:CONTROLLER,DLB,TIMERS create shorthand for: timer start, function call, timer stop, logDuration?
1666 RECORD_TIMER_START(m_solverTimers[i][SolverTimers::Redistribute]);
1667 // Communicate all solver data
1668 redistributeDataDlb(i, 0, dataSendSize[i], dataRecvSize[i], &bufferIdToCellId[0], oldNoCells, intDataRecv,
1669 longDataRecv, floatDataRecv, dataTypes);
1670 RECORD_TIMER_STOP(m_solverTimers[i][SolverTimers::Redistribute]);
1671 logDuration(RETURN_TIMER(m_solverTimers[i][SolverTimers::Redistribute]),
1672 "Redistribute solver #" + std::to_string(i), true);
1673
1674 RECORD_TIMER_START(m_solverTimers[i][SolverTimers::BalancePre]);
1675 // Perform reinitialization steps prior to setting solver data
1676 // i.e. apply balance to the proxy!
1677 solver(i).balancePre();
1678 RECORD_TIMER_STOP(m_solverTimers[i][SolverTimers::BalancePre]);
1679 logDuration(RETURN_TIMER(m_solverTimers[i][SolverTimers::BalancePre]), "BalancePre solver #" + std::to_string(i),
1680 true);
1681
1682 RECORD_TIMER_START(m_solverTimers[i][SolverTimers::SetData]);
1683 setDataDlb(i, 0, intDataRecv, longDataRecv, floatDataRecv, dataTypes, false);
1684 RECORD_TIMER_STOP(m_solverTimers[i][SolverTimers::SetData]);
1685 logDuration(RETURN_TIMER(m_solverTimers[i][SolverTimers::SetData]), "SetData1 solver #" + std::to_string(i),
1686 true);
1687
1688 RECORD_TIMER_START(m_solverTimers[i][SolverTimers::DlbOther]);
1689 // Change global to local ids in the solver
1690 solver(i).globalToLocalIds();
1691 RECORD_TIMER_STOP(m_solverTimers[i][SolverTimers::DlbOther]);
1692
1693 RECORD_TIMER_START(m_solverTimers[i][SolverTimers::BalancePost]);
1694 // Perform reinitialization steps after setting solver data the first time
1695 solver(i).balancePost();
1696 RECORD_TIMER_STOP(m_solverTimers[i][SolverTimers::BalancePost]);
1697 logDuration(RETURN_TIMER(m_solverTimers[i][SolverTimers::BalancePost]),
1698 "BalancePost solver #" + std::to_string(i), true);
1699
1700 RECORD_TIMER_START(m_solverTimers[i][SolverTimers::SetData]);
1701 // Set solver data again (if required by solver) and deallocate buffers
1702 setDataDlb(i, 0, intDataRecv, longDataRecv, floatDataRecv, dataTypes, true);
1703 RECORD_TIMER_STOP(m_solverTimers[i][SolverTimers::SetData]);
1704 logDuration(RETURN_TIMER(m_solverTimers[i][SolverTimers::SetData]), "SetData2 solver #" + std::to_string(i),
1705 true);
1706
1707 intDataRecv.clear();
1708 longDataRecv.clear();
1709 floatDataRecv.clear();
1710 }
1711
1712 RECORD_TIMER_STOP(m_solverTimers[i][SolverTimers::Balance]);
1713 logDuration(balanceSolverStartTime, "Balance solver #" + std::to_string(i));
1714
1715 RECORD_TIMER_STOP(m_solverTimers[i][SolverTimers::DLB]);
1716 RECORD_TIMER_STOP(m_solverTimers[i][SolverTimers::Total]);
1717 }
1718 RECORD_TIMER_STOP(m_timers[Timers::BalanceSolvers]);
1719 logDuration(balanceSolversStartTime, "Balance solvers");
1720
1721 RECORD_TIMER_START(m_timers[Timers::BalanceCouplers]);
1722 const MFloat balanceCouplersStartTime = wallTime();
1723 // Rebalance couplers (if required)
1724 for(MInt i = 0; i < noCouplers(); i++) {
1725 const MInt timerId = noSolvers() + i;
1726 RECORD_TIMER_START(m_solverTimers[timerId][SolverTimers::Total]);
1727 RECORD_TIMER_START(m_solverTimers[timerId][SolverTimers::DLB]);
1728
1729 RECORD_TIMER_START(m_solverTimers[timerId][SolverTimers::Balance]);
1730
1731 // Variables to store pointers to allocated memory
1732 std::vector<MInt*> intDataRecv{};
1733 std::vector<MLong*> longDataRecv{};
1734 std::vector<MFloat*> floatDataRecv{};
1735 std::vector<MInt> dataTypes{};
1736
1737 RECORD_TIMER_START(m_solverTimers[timerId][SolverTimers::Redistribute]);
1738 // Communicate all coupler data
1739 redistributeDataDlb(i, 1, dataSendSizeCoupler[i], dataRecvSizeCoupler[i], &bufferIdToCellId[0], oldNoCells,
1740 intDataRecv, longDataRecv, floatDataRecv, dataTypes);
1741 RECORD_TIMER_STOP(m_solverTimers[timerId][SolverTimers::Redistribute]);
1742
1743 RECORD_TIMER_START(m_solverTimers[timerId][SolverTimers::BalancePre]);
1744 // Perform reinitialization steps prior to setting solver data
1745 coupler(i).balancePre();
1746 RECORD_TIMER_STOP(m_solverTimers[timerId][SolverTimers::BalancePre]);
1747
1748 RECORD_TIMER_START(m_solverTimers[timerId][SolverTimers::SetData]);
1749 setDataDlb(i, 1, intDataRecv, longDataRecv, floatDataRecv, dataTypes, false);
1750 RECORD_TIMER_STOP(m_solverTimers[timerId][SolverTimers::SetData]);
1751
1752 RECORD_TIMER_START(m_solverTimers[timerId][SolverTimers::BalancePost]);
1753 // Perform reinitialization steps after setting solver data the first time
1754 coupler(i).balancePost();
1755 RECORD_TIMER_STOP(m_solverTimers[timerId][SolverTimers::BalancePost]);
1756
1757 RECORD_TIMER_START(m_solverTimers[timerId][SolverTimers::SetData]);
1758 // Set coupler data again (if required by coupler) and deallocate buffers
1759 setDataDlb(i, 1, intDataRecv, longDataRecv, floatDataRecv, dataTypes, true);
1760 RECORD_TIMER_STOP(m_solverTimers[timerId][SolverTimers::SetData]);
1761
1762 intDataRecv.clear();
1763 longDataRecv.clear();
1764 floatDataRecv.clear();
1765
1766 RECORD_TIMER_STOP(m_solverTimers[timerId][SolverTimers::Balance]);
1767
1768 RECORD_TIMER_STOP(m_solverTimers[timerId][SolverTimers::DLB]);
1769 RECORD_TIMER_STOP(m_solverTimers[timerId][SolverTimers::Total]);
1770 }
1771 RECORD_TIMER_STOP(m_timers[Timers::BalanceCouplers]);
1772 logDuration(balanceCouplersStartTime, "Balance couplers");
1773
1774 const MFloat finalizeBalanceStartTime = wallTime();
1775 // finalize balance for solvers and couplers
1776 for(MInt i = 0; i < noSolvers(); i++) {
1777 const MFloat finalizeSolverStartTime = wallTime();
1778 solver(i).finalizeBalance();
1779 for(MInt j = 0; j < noCouplers(); j++) {
1780 const MFloat finalizeCouplerStartTime = wallTime();
1781 coupler(j).finalizeBalance(i);
1782 logDuration(finalizeCouplerStartTime,
1783 "Finalize balance #" + std::to_string(i) + " coupler #" + std::to_string(j));
1784 }
1785 logDuration(finalizeSolverStartTime, "Finalize balance solver #" + std::to_string(i));
1786 }
1787 logDuration(finalizeBalanceStartTime, "Finalize balance total");
1788
1789 // Determine imbalance statistics *AFTER* online restart
1790 printDomainStatistics("after load balancing");
1791
1792 // Enable all previously active DLB timers again
1794
1795 // Reset DLB timer records
1797
1798 m_dlbPreviousLocalRunTime = 0.0;
1799 m_dlbPreviousLocalIdleTime = 0.0;
1800
1801 // ... reset timings ...
1802 resetAllTimer();
1803
1804 m_lastLoadBalancingTimeStep = timeStep;
1805 m_nAdaptationsSinceBalance = 0;
1806
1807 // Write timings to file
1808 if(m_outputDlbTimings) {
1809 storeTimings();
1810 }
1811
1812 logDuration(dlbStartTime, "Balance total");
1813 logDurations(durations, "DLB", gridb().mpiComm(), globalDomainId(), globalNoDomains());
1814
1815 const MFloat dlbTimeTotal = wallTime() - dlbStartTime;
1816 std::stringstream dlbMessage;
1817 dlbMessage << "=== Dynamic load balancing performed at timestep " << timeStep << "! Duration: " << dlbTimeTotal
1818 << " s" << std::endl;
1819 m_log << dlbMessage.str();
1820 cerr0 << dlbMessage.str();
1821
1822 printAllocatedMemory(oldAllocatedBytes, "gridcontroller::balance()", gridb().mpiComm());
1823
1824 // restore backup value for next restartFile!
1825 if(backupLevel > 0) {
1826 gridb().m_newMinLevel = backupLevel;
1827 }
1828
1829 RECORD_TIMER_STOP(m_timers[Timers::DLB]);
1830 return true;
1831}
1832
1833
1835template <MInt nDim>
1836void Controller<nDim>::partition(MLong* partitionCellOffsets,
1837 MLong* globalIdOffsets,
1838 const MBool onlyPartitionOffsets) {
1839 TRACE();
1840 if(gridb().m_maxPartitionLevelShift > 0) {
1841 cerr0 << "WARNING: Load balancing with partition level shifts might not fully work!" << std::endl;
1842 }
1843
1844 // Note: noPartitionCells = 0 allowed after updatePartitionCells()!
1845 const MLong noPartitionCells = gridb().m_noPartitionCells;
1846
1847 // Update noOffsprings and workLoad for all cells and calculate accumulated workload
1848 MFloat totalWorkload = 0.0;
1849 MFloatScratchSpace partitionCellsWorkload(std::max(noPartitionCells, 1L), AT_, "partitionCellsWorkload");
1850 MLongScratchSpace partitionCellsGlobalId(std::max(noPartitionCells, 1L), AT_, "partitionCellsGlobalId");
1851
1852 gridb().calculateNoOffspringsAndWorkload(static_cast<Collector<void>*>(nullptr), gridb().treeb().size());
1853
1854 for(MUint i = 0; i < noPartitionCells; i++) {
1855 const MLong globalCellId = gridb().m_localPartitionCellGlobalIds[i]; // sorted by global id
1856 const MInt cellId = gridb().globalIdToLocalId(globalCellId, true);
1857
1858 partitionCellsWorkload(i) = gridb().a_workload(cellId);
1859 partitionCellsGlobalId(i) = gridb().a_globalId(cellId);
1860 ASSERT(globalCellId == partitionCellsGlobalId(i),
1861 "global cell id mismatch! " + std::to_string(globalCellId) + " " + std::to_string(partitionCellsGlobalId(i))
1862 + " " + std::to_string(cellId));
1863
1864 ASSERT(i == 0 || partitionCellsGlobalId(i) > partitionCellsGlobalId(i - 1),
1865 "Partition cells not sorted by global id: " + std::to_string(partitionCellsGlobalId(i))
1866 + " <= " + std::to_string(partitionCellsGlobalId(i - 1)));
1867 totalWorkload += gridb().a_workload(cellId);
1868 }
1869
1870 MPI_Allreduce(MPI_IN_PLACE, &totalWorkload, 1, MPI_DOUBLE, MPI_SUM, gridb().mpiComm(), AT_, "MPI_IN_PLACE",
1871 "totalWorkload");
1872
1873 MBool calcGlobalOffsets = false;
1874
1875 if(gridb().m_partitionParallelSplit) {
1876 maia::grid::partitionParallelSplit(&partitionCellsWorkload[0], noPartitionCells,
1877 gridb().m_localPartitionCellOffsets[0], static_cast<MLong>(noDomains()),
1878 static_cast<MLong>(domainId()), gridb().mpiComm(), &partitionCellOffsets[0]);
1879 partitionCellOffsets[noDomains()] = gridb().m_noPartitionCellsGlobal;
1880
1881 if(!onlyPartitionOffsets) {
1882 // Determine the globalIdOffsets later (required for load balancing mode 0)
1883 calcGlobalOffsets = true;
1884 }
1885 } else {
1886 gridb().partitionParallel(gridb().m_noPartitionCells, gridb().m_localPartitionCellOffsets[0],
1887 &partitionCellsWorkload[0], &partitionCellsGlobalId[0], totalWorkload,
1888 partitionCellOffsets, globalIdOffsets, true);
1889
1890 if(!onlyPartitionOffsets && gridb().m_maxPartitionLevelShift > 0) {
1891 // Redetermine the global offsets below in case of a partition level shift since
1892 // partitionParallel() does not include the correction of the offsets as done in
1893 // correctDomainOffsetsAtPartitionLevelShifts()
1894 calcGlobalOffsets = true;
1895 }
1896 }
1897
1898 // Determine the globalIdOffsets
1899 if(calcGlobalOffsets) {
1900 MLongScratchSpace localPartitionCellCounts(noDomains(), AT_, "localPartitionCellCounts");
1901 MLongScratchSpace localPartitionCellOffsets(noDomains() + 1, AT_, "localPartitionCellOffsets");
1902 // Determine the number of partition cells on all domains and the partition cell offsets
1903 gridb().determineNoPartitionCellsAndOffsets(&localPartitionCellCounts[0], &localPartitionCellOffsets[0]);
1904
1905 // Calculate new global domain offsets from the new partition cell offsets
1906 loadBalancingCalcNewGlobalOffsets(&localPartitionCellOffsets[0], &partitionCellOffsets[0], &globalIdOffsets[0]);
1907 }
1908
1909 if(domainId() == 0) std::cerr << std::endl;
1910}
1911
1913template <MInt nDim>
1915 TRACE();
1916
1917 MIntScratchSpace minNoSolverCells(noSolvers(), FUN_, "minNoSolverCells");
1918 MIntScratchSpace maxNoSolverCells(noSolvers(), FUN_, "maxNoSolverCells");
1919 MIntScratchSpace avgNoSolverCells(noSolvers(), FUN_, "avgNoSolverCells");
1920 MLongScratchSpace globalNoSolverCells(noSolvers(), FUN_, "globalNoSolverCells");
1921
1922 MLong noGridLeafCells = 0;
1923 minNoSolverCells.fill(0);
1924 maxNoSolverCells.fill(0);
1925 avgNoSolverCells.fill(0);
1926 globalNoSolverCells.fill(0);
1927
1928 for(MInt cellId = 0; cellId < gridb().treeb().size(); cellId++) {
1929 if(!gridb().a_hasProperty(cellId, Cell::IsHalo) && gridb().a_noChildren(cellId) == 0) {
1930 noGridLeafCells++;
1931 }
1932 }
1933
1934 MLong globalGridNoLeafCells = noGridLeafCells;
1935 MLong globalGridNoCells = (MLong)gridb().noInternalCells();
1936 MInt maxGridNoCells = gridb().noInternalCells();
1937 MInt minGridNoCells = gridb().noInternalCells();
1938
1939 for(MInt i = 0; i < noSolvers(); i++) {
1940 minNoSolverCells(i) = solver(i).noInternalCells();
1941 maxNoSolverCells(i) = solver(i).noInternalCells();
1942 globalNoSolverCells(i) = solver(i).noInternalCells();
1943 }
1944
1945 MPI_Allreduce(MPI_IN_PLACE, &globalGridNoLeafCells, 1, MPI_LONG, MPI_SUM, gridb().mpiComm(), AT_, "MPI_IN_PLACE",
1946 "globalGridNoLeafCells");
1947 MPI_Allreduce(MPI_IN_PLACE, &globalGridNoCells, 1, MPI_LONG, MPI_SUM, gridb().mpiComm(), AT_, "MPI_IN_PLACE",
1948 "globalGridNoCells");
1949 MPI_Allreduce(MPI_IN_PLACE, &maxGridNoCells, 1, MPI_INT, MPI_MAX, gridb().mpiComm(), AT_, "MPI_IN_PLACE",
1950 "maxGridNoCells");
1951 MPI_Allreduce(MPI_IN_PLACE, &minGridNoCells, 1, MPI_INT, MPI_MIN, gridb().mpiComm(), AT_, "MPI_IN_PLACE",
1952 "minGridNoCells");
1953 MPI_Allreduce(MPI_IN_PLACE, &maxNoSolverCells[0], noSolvers(), MPI_INT, MPI_MAX, gridb().mpiComm(), AT_,
1954 "MPI_IN_PLACE", "maxNoSolverCells[0]");
1955 MPI_Allreduce(MPI_IN_PLACE, &minNoSolverCells[0], noSolvers(), MPI_INT, MPI_MIN, gridb().mpiComm(), AT_,
1956 "MPI_IN_PLACE", "minNoSolverCells[0]");
1957 MPI_Allreduce(MPI_IN_PLACE, &globalNoSolverCells[0], noSolvers(), MPI_LONG, MPI_SUM, gridb().mpiComm(), AT_,
1958 "MPI_IN_PLACE", "globalNoSolverCells[0]");
1959
1960 for(MInt i = 0; i < noSolvers(); i++) {
1961 avgNoSolverCells(i) = (MInt)(((MFloat)globalNoSolverCells(i)) / ((MFloat)noDomains()));
1962 }
1963
1964 const MInt avgGridNoLeafCells = (MInt)(((MFloat)globalGridNoLeafCells) / ((MFloat)noDomains()));
1965 const MInt avgGridNoCells = (MInt)(((MFloat)globalGridNoCells) / ((MFloat)noDomains()));
1966 MInt devNoGridCells = std::abs(gridb().noInternalCells() - avgGridNoCells);
1967 MInt devNoGridLeafCells = std::abs(noGridLeafCells - avgGridNoLeafCells);
1968
1969
1970 MPI_Allreduce(MPI_IN_PLACE, &devNoGridCells, 1, MPI_INT, MPI_MAX, gridb().mpiComm(), AT_, "MPI_IN_PLACE",
1971 "devNoGridCells");
1972 MPI_Allreduce(MPI_IN_PLACE, &devNoGridLeafCells, 1, MPI_INT, MPI_MAX, gridb().mpiComm(), AT_, "MPI_IN_PLACE",
1973 "devNoGridLeafCells");
1974 if(domainId() == 0) {
1975 std::cerr << "Domain statistics ";
1976 if(!status.empty()) std::cerr << status << " ";
1977 std::cerr << "at global time step " << globalTimeStep << ": avg no cells=" << avgGridNoCells
1978 << ", min=" << minGridNoCells << ", max=" << maxGridNoCells << ", max deviation=" << devNoGridCells
1979 << ", max deviation leaf=" << devNoGridLeafCells << ", total=" << globalGridNoCells << std::endl;
1980 for(MInt i = 0; i < noSolvers(); i++) {
1981 std::cerr << "Solver-statistics for solver " << i << " min= " << minNoSolverCells(i)
1982 << " max= " << maxNoSolverCells(i) << " total= " << globalNoSolverCells(i)
1983 << " avg= " << avgNoSolverCells(i) << std::endl;
1984 }
1985 }
1986}
1987
1988
1993template <MInt nDim>
1995 if(m_grid == nullptr) {
1996 return false;
1997 }
1998
1999 // Reset adaptation status
2000 gridb().m_wasAdapted = false;
2001
2002 if(!m_adaptation
2003 || (!force
2004 && ((m_adaptationInterval <= 0 || globalTimeStep % m_adaptationInterval != 0)
2005 || globalTimeStep < m_adaptationStart))) {
2006 return false;
2007 }
2008
2009 RECORD_TIMER_START(m_timers[Timers::Adaptation]);
2010
2011 cerr0 << "=== Initialising solver-adaptation at time-step " << globalTimeStep << std::endl;
2012 // TODO labels:TIMERS,ADAPTATION adaptation timers for new split version -> initTimers()
2013 logTimerStatistics("before adaptation");
2014
2015 // Disable all DLB timers
2016 const MInt noDlbTimers = maia::dlb::g_dlbTimerController.noDlbTimers();
2017 ScratchSpace<MBool> dlbTimersStatus(std::max(noDlbTimers, 1), AT_, "dlbTimersStatus");
2019
2020 // TODO labels:CONTROLLER remove if all solvers have splitAdaptation
2021 MBool splitAdaptation = true;
2022 for(MInt i = 0; i < noSolvers(); i++) {
2023 if(!solver(i).m_splitAdaptation) {
2024 std::cerr << "Update Adaptation to splitAdaptation! solver " << i << std::endl;
2025 splitAdaptation = false;
2026 }
2027 }
2028
2029 // Cancel any open MPI requests in the solver since these might conflict with communication
2030 // during adaptation
2031 for(MInt i = 0; i < noSolvers(); i++) {
2032 solver(i).cancelMpiRequests();
2033 }
2034
2035 if(splitAdaptation) { // new split-Adaptation version
2036
2037 // prepare Adaptations
2038 for(MInt i = 0; i < noSolvers(); i++) {
2039 RECORD_TIMER_START(m_solverTimers[i][SolverTimers::Total]);
2040 RECORD_TIMER_START(m_solverTimers[i][SolverTimers::Adaptation]);
2041 solver(i).prepareAdaptation();
2042 RECORD_TIMER_STOP(m_solverTimers[i][SolverTimers::Adaptation]);
2043 RECORD_TIMER_STOP(m_solverTimers[i][SolverTimers::Total]);
2044 }
2045
2046 for(MInt i = 0; i < noCouplers(); i++) {
2047 const MInt timerId = noSolvers() + i;
2048 RECORD_TIMER_START(m_solverTimers[timerId][SolverTimers::Total]);
2049 RECORD_TIMER_START(m_solverTimers[timerId][SolverTimers::Adaptation]);
2050 coupler(i).prepareAdaptation();
2051 RECORD_TIMER_STOP(m_solverTimers[timerId][SolverTimers::Adaptation]);
2052 RECORD_TIMER_STOP(m_solverTimers[timerId][SolverTimers::Total]);
2053 }
2054
2055 // loop over all available levels (and a specified additional number of steps)!
2056 // prefered over while-loop!
2057 MInt addedAdaptationSteps = 0;
2058 addedAdaptationSteps = Context::getBasicProperty<MInt>("addedAdaptationSteps", AT_, &addedAdaptationSteps);
2059 for(MInt level = gridb().maxUniformRefinementLevel(); level < gridb().maxRefinementLevel() + addedAdaptationSteps;
2060 level++) {
2061 const MLong oldLocalCnt = gridb().noInternalCells();
2062 const MLong oldCnt = gridb().noCellsGlobal();
2063 std::vector<std::vector<MFloat>> sensors;
2064 std::vector<std::bitset<CartesianGrid<nDim>::m_maxNoSensors>> sensorCellFlag(gridb().noInternalCells());
2065 std::vector<MFloat> sensorWeight;
2066 std::vector<MInt> sensorSolverId;
2067
2068 // set sensors
2069 for(MInt i = 0; i < noSolvers(); i++) {
2070 solver(i).setSensors(sensors, sensorWeight, sensorCellFlag, sensorSolverId);
2071 }
2072
2073 ASSERT(sensors.size() < CartesianGrid<nDim>::m_maxNoSensors, "Increase bitsetsize!");
2074
2075 // saveSensorData
2076 // Determine of at least one solver wants to save its sensor data
2077 MBool saveSensorData = false;
2078 for(MInt i = 0; i < noSolvers(); i++) {
2079 saveSensorData = saveSensorData || solver(i).m_saveSensorData;
2080 }
2081 if(saveSensorData) {
2082 // Write grid file
2083 std::stringstream gridFileName;
2084 gridFileName << "sensorDataGrid_" << std::to_string(level) << "_" << globalTimeStep << ParallelIo::fileExt();
2085 gridb().saveGrid((m_outputDir + gridFileName.str()).c_str(), m_recalcIds);
2086 // Write sensor data
2087 for(MInt i = 0; i < noSolvers(); i++) {
2088 if(solver(i).m_saveSensorData) {
2089 solver(i).saveSensorData(sensors, level, gridFileName.str(), m_recalcIds);
2090 }
2091 }
2092 }
2093
2094 RECORD_TIMER_START(m_timers[Timers::MeshAdaptation]);
2095 gridb().meshAdaptation(sensors, sensorWeight, sensorCellFlag, sensorSolverId, m_refineCellSolver,
2096 m_removeChildsSolver, m_removeCellSolver, m_swapProxySolver, m_cellOutside,
2097 m_resizeGridMapSolver);
2098 RECORD_TIMER_STOP(m_timers[Timers::MeshAdaptation]);
2099
2100 // compacts cells, and emidiate after adaptation
2101 for(MInt i = 0; i < noSolvers(); i++) {
2102 RECORD_TIMER_START(m_solverTimers[i][SolverTimers::Total]);
2103 RECORD_TIMER_START(m_solverTimers[i][SolverTimers::Adaptation]);
2104 solver(i).postAdaptation();
2105 RECORD_TIMER_STOP(m_solverTimers[i][SolverTimers::Adaptation]);
2106 RECORD_TIMER_STOP(m_solverTimers[i][SolverTimers::Total]);
2107 }
2108
2109 // call couplers only after all solvers have rebild their grid-structure!
2110 for(MInt j = 0; j < noCouplers(); j++) {
2111 const MInt timerId = noSolvers() + j;
2112 RECORD_TIMER_START(m_solverTimers[timerId][SolverTimers::Total]);
2113 RECORD_TIMER_START(m_solverTimers[timerId][SolverTimers::Adaptation]);
2114 coupler(j).postAdaptation();
2115 RECORD_TIMER_STOP(m_solverTimers[timerId][SolverTimers::Adaptation]);
2116 RECORD_TIMER_STOP(m_solverTimers[timerId][SolverTimers::Total]);
2117 }
2118
2119 m_log << "Mesh adaptation: " << gridb().noCellsGlobal() - oldCnt << " cells generated"
2120 << " (before: " << oldCnt << ", now: " << gridb().noCellsGlobal() << ")." << std::endl;
2121
2122 MBool skipLoop = false;
2123 for(MInt i = 0; i < noSolvers(); i++) {
2124 if(solver(i).m_singleAdaptation) {
2125 skipLoop = true;
2126 }
2127 }
2128
2129 if(skipLoop && globalTimeStep > 0) break;
2130
2131 if(globalTimeStep < 0 && m_loadBalancingInterval > 0) {
2132 MInt balanceTrigger = 0;
2133 MInt deltaCells = gridb().noInternalCells() - oldLocalCnt;
2134 if(gridb().noInternalCells() + deltaCells * gridb().m_maxNoChilds > gridb().maxNoCells()) {
2135 balanceTrigger = 1;
2136 }
2137 MPI_Allreduce(MPI_IN_PLACE, &balanceTrigger, 1, MPI_INT, MPI_MAX, gridb().mpiComm(), AT_, "MPI_IN_PLACE",
2138 "balanceTrigger");
2139 if(balanceTrigger || level == (gridb().maxRefinementLevel() - 1)) {
2140 cerr0 << "Performing intermediate load balancing step." << std::endl;
2141#ifdef MAIA_GRID_SANITY_CHECKS
2142 gridb().gridSanityChecks();
2143 gridb().checkWindowHaloConsistency(true);
2144#endif
2145 // preserve loadBalancing mode and
2146 // switch to weight based mode for the balance during initial adaptation!
2147 const MInt backUp = m_loadBalancingMode;
2148 m_loadBalancingMode = 0;
2149 balance(true, false, true);
2150 m_loadBalancingMode = backUp;
2151 }
2152 }
2153 }
2154
2155 // reinit solver after adaptation
2156 for(MInt i = 0; i < noSolvers(); i++) {
2157 RECORD_TIMER_START(m_solverTimers[i][SolverTimers::Total]);
2158 RECORD_TIMER_START(m_solverTimers[i][SolverTimers::Adaptation]);
2159 solver(i).finalizeAdaptation();
2160 for(MInt j = 0; j < noCouplers(); j++) {
2161 coupler(j).finalizeAdaptation(i);
2162 }
2163 RECORD_TIMER_STOP(m_solverTimers[i][SolverTimers::Adaptation]);
2164 RECORD_TIMER_STOP(m_solverTimers[i][SolverTimers::Total]);
2165 }
2166
2167 } else { // TODO labels:CONTROLLER delete below
2168 const MLong oldCnt = gridb().noCellsGlobal();
2169 std::vector<std::vector<MFloat>> sensors;
2170 std::vector<std::bitset<CartesianGrid<nDim>::m_maxNoSensors>> sensorCellFlag(gridb().noInternalCells());
2171 std::vector<MFloat> sensorWeight;
2172 std::vector<MInt> sensorSolverId;
2173
2174 for(MInt i = 0; i < noSolvers(); i++) {
2175 std::cerr << "preparing adaptation for solver " << i << std::endl;
2176 solver(i).prepareAdaptation(sensors, sensorWeight, sensorCellFlag, sensorSolverId);
2177 }
2178 ASSERT(sensors.size() < CartesianGrid<nDim>::m_maxNoSensors, "Increase bitset size!");
2179
2180 gridb().meshAdaptation(sensors, sensorWeight, sensorCellFlag, sensorSolverId, m_refineCellSolver,
2181 m_removeChildsSolver, m_removeCellSolver, m_swapProxySolver, m_cellOutside,
2182 m_resizeGridMapSolver);
2183
2184 for(MInt i = 0; i < noSolvers(); i++) {
2185 // in multisolver case: translate tree ids back to solver-local ids
2186 solver(i).reinitAfterAdaptation();
2187 }
2188
2189 m_log << "Mesh adaptation: " << gridb().noCellsGlobal() - oldCnt << " cells generated"
2190 << " (before: " << oldCnt << ", now: " << gridb().noCellsGlobal() << ")." << std::endl;
2191 }
2192
2193 // Enable all previously active DLB timers again
2195
2196 // reset all timers after an adaptation:
2197 // Tim version
2198 // resetAllTimer();
2199
2200 // set the last adaptation timeStep
2201 m_lastAdaptationTimeStep = globalTimeStep;
2202 m_nAdaptationsSinceBalance = m_nAdaptationsSinceBalance + 1;
2203
2204 printDomainStatistics("after adaptation");
2205
2206 cerr0 << "=== Finished adaptation" << std::endl;
2207 m_log << "Finished adaptation at time-step " << globalTimeStep << std::endl;
2208 RECORD_TIMER_STOP(m_timers[Timers::Adaptation]);
2209
2210 return true;
2211}
2212
2218template <MInt nDim>
2220 TRACE();
2221
2222 const MInt noGridCells = gridb().treeb().size();
2223
2224 // Reset cell weights
2225 for(MInt gridCellId = 0; gridCellId < noGridCells; gridCellId++) {
2226 gridb().treeb().weight(gridCellId) = 0.0;
2227 }
2228
2229 if(!m_dlbRestartWeights || m_dlbLastWeights == nullptr) {
2230 // use static cell weights
2231
2232 // TODO labels:CONTROLLER why not use an array of size noGridCells, set to zero
2233 // and add cell weights of each solver?
2234 const MInt sizeTimesSolver = noGridCells * noSolvers();
2235 MFloatScratchSpace solverweight(sizeTimesSolver, FUN_, "solverweight");
2236 // might be necessary for dimensionless weight-comparison between different-solvers,
2237 // to devide the weight of ech solver-cell by its maximum-possible-value!
2238 // MFloatScratchSpace solvermaxweights(noSolvers(),FUN_, "solvermaxweights");
2239 solverweight.fill(F0);
2240
2241 for(MInt i = 0; i < noSolvers(); i++) {
2242 if(solver(i).isActive()) { // Only for solvers active on this ranks
2243 solver(i).setCellWeights(&solverweight[0]);
2244 }
2245 }
2246
2247 // Accumulate weights of all solvers
2248 for(MInt i = 0; i < noSolvers(); i++) {
2249 for(MInt gridCellId = 0; gridCellId < noGridCells; gridCellId++) {
2250 const MInt id = gridCellId + noGridCells * i;
2251 gridb().treeb().weight(gridCellId) += solverweight[id];
2252 }
2253 }
2254
2255
2256 } else {
2257 // use weights from last DLB
2258 MInt weightOffset = 0;
2259 for(MInt solverId = 0; solverId < noSolvers(); solverId++) {
2260 if(solver(solverId).isActive()) {
2261 for(MInt cellId = 0; cellId < noGridCells; cellId++) {
2262 if(gridb().a_hasProperty(cellId, Cell::IsHalo)) {
2263 continue;
2264 }
2265 gridb().a_weight(cellId) += solver(solverId).getCellLoad(cellId, &m_dlbLastWeights[weightOffset]);
2266 }
2267 }
2268 weightOffset += solver(solverId).noLoadTypes();
2269 }
2270 }
2271}
2272
2273
2276template <MInt nDim>
2277void Controller<nDim>::updateWeightsAndWorkloads(const std::vector<MFloat>& weights,
2278 const MBool restoreDefaultWeights) {
2279 TRACE();
2280
2281 const MInt noGridCells = gridb().treeb().size();
2282
2283 // Restore default cell weights: each cell has weight 1.
2284 // else reset all weights to 0
2285 const MFloat initWeight = (restoreDefaultWeights) ? 1.0 : 0.0;
2286 for(MInt gridCellId = 0; gridCellId < noGridCells; gridCellId++) {
2287 gridb().a_weight(gridCellId) = initWeight;
2288 }
2289
2290 if(!restoreDefaultWeights) {
2291 // Update the cell weights
2292 MInt weightOffset = 0;
2293 for(MInt solverId = 0; solverId < noSolvers(); solverId++) {
2294 if(solver(solverId).isActive()) { // Only add cell loads for active solvers
2295 for(MInt cellId = 0; cellId < noGridCells; cellId++) {
2296 if(gridb().a_hasProperty(cellId, Cell::IsHalo)) continue;
2297 gridb().a_weight(cellId) += solver(solverId).getCellLoad(cellId, &weights[weightOffset]);
2298 }
2299 }
2300 weightOffset += solver(solverId).noLoadTypes();
2301 }
2302 }
2303
2304 // Calculate the partition-cell workloads
2305 gridb().calculateNoOffspringsAndWorkload(static_cast<Collector<void>*>(nullptr), noGridCells);
2306}
2307
2308
2311template <MInt nDim>
2313 TRACE();
2314
2315 // Check if update of partition workloads is enabled
2316 MBool updateGridPartitionWorkloads = false;
2317 updateGridPartitionWorkloads =
2318 Context::getBasicProperty<MBool>("updateGridPartitionWorkloads", AT_, &updateGridPartitionWorkloads);
2319 if(!updateGridPartitionWorkloads) {
2320 return false;
2321 }
2322
2323 m_log << "Updating partition cell workloads and save them to grid... " << std::endl;
2324
2325 MBool restore = false;
2326 restore = Context::getBasicProperty<MBool>("restoreDefaultWorkloads", AT_, &restore);
2327
2328 // Number of load quantities (should be the same for every rank)
2329 const MInt noLoadTypes = globalNoLoadTypes();
2330 std::vector<MFloat> weights{};
2331
2332 if(noLoadTypes < 1 || restore) {
2333 restore = true;
2334 m_log << "Restoring default weights" << std::endl;
2335 } else {
2336 m_log << "Using specified solver weights" << std::endl;
2337 // Get the weights for all solvers
2338 getSpecifiedSolverWeights(weights);
2339 }
2340
2341 // Use the determined weights to update the partition workloads (or restore default weighting)
2342 updateWeightsAndWorkloads(weights, restore);
2343 // Save new partition cell workloads to grid file
2344 gridb().savePartitionCellWorkloadsGridFile();
2345 m_log << "done" << std::endl;
2346
2347 if(domainId() == 0) {
2348 std::cout << "Updated cell weights. Restart solver with 'updateGridPartitionWorkloads = false'" << std::endl;
2349 }
2350 return true;
2351}
2352
2354template <MInt nDim>
2355void Controller<nDim>::getSpecifiedSolverWeights(std::vector<MFloat>& weights) {
2356 TRACE();
2357
2358 // Determine the weights for all solvers
2359 const MInt noLoadTypes = globalNoLoadTypes();
2360 weights.resize(noLoadTypes);
2361 std::fill(weights.begin(), weights.end(), 0.0);
2362
2363 MInt offset = 0;
2364 for(MInt i = 0; i < noSolvers(); i++) {
2365 const MInt solverCount = solver(i).noLoadTypes();
2366 std::vector<MString> names(solverCount);
2367
2368 // Get default weights (+names) for this solver
2369 solver(i).getDefaultWeights(&weights[offset], names);
2370
2371 // Check for user specified solver weights
2372 const MString propName = "solverWeights_" + std::to_string(i);
2373 if(Context::propertyExists(propName)) {
2374 if(Context::propertyLength(propName) != solverCount) {
2375 TERMM(1, "wrong length of property '" + propName + "', should be of length " + std::to_string(solverCount));
2376 }
2377 for(MInt j = 0; j < solverCount; j++) {
2378 weights[offset + j] = Context::getBasicProperty<MFloat>(propName, AT_, j);
2379 }
2380 }
2381
2382 for(MInt j = 0; j < solverCount; j++) {
2383 m_log << "Solver #" << i << ", weight #" << j << ": " << names[j] << ", " << weights[offset + j] << std::endl;
2384 }
2385
2386 offset += solverCount;
2387 }
2388}
2389
2390
2391template <MInt nDim>
2392void Controller<nDim>::writeRestartFile(const MBool forceRestart, const MBool writeBackup) {
2393 if(m_grid == nullptr) {
2394 return;
2395 }
2396 MBoolScratchSpace writeRestart(noSolvers(), FUN_, "writeRestart");
2397 MBoolScratchSpace writeGridRestart(noSolvers(), FUN_, "writeGridRestart");
2398
2399 // disable load timers:
2400 std::vector<MInt> dlbTimersStatus(noSolvers());
2401
2402 for(MInt i = 0; i < noSolvers(); i++) {
2403 dlbTimersStatus[i] = solver(i).dlbTimersEnabled();
2404 if(dlbTimersStatus[i] != 0) {
2405 solver(i).disableDlbTimers(); // Disable DLB timers if active
2406 }
2407 }
2408
2409 for(MInt i = 0; i < noSolvers(); i++) {
2410 // prepareRestart() and reInitAfterRestart must be called for all ranks!
2411 writeRestart[i] = solver(i).prepareRestart(forceRestart, writeGridRestart[i]);
2412 }
2413
2414 MBool allsolversrestart = false;
2415 MBool gridrestart = false;
2416
2417 for(MInt i = 0; i < noSolvers(); i++) {
2418 if(writeRestart[i]) allsolversrestart = true;
2419 if(writeRestart[i] && writeGridRestart[i]) gridrestart = true;
2420 }
2421
2422 gridb().deletePeriodicConnection(true);
2423
2424 if(gridrestart || (allsolversrestart && m_saveGridNewPartitionCells)) {
2425 accumulateCellWeights();
2426
2427 {
2428 std::stringstream s;
2429 s << "restartGrid";
2430 if(writeBackup) s << "Backup";
2431 if(!m_useNonSpecifiedRestartFile || writeBackup) s << "_00" << globalTimeStep;
2432 // TODO labels:CONTROLLER,IO,totest change to the version below, fix testcases
2433 // if (!m_useNonSpecifiedRestartFile || writeBackup) {
2434 // s << "_" << setw(8) << setfill('0') << globalTimeStep;
2435 //}
2436 s << ParallelIo::fileExt();
2437 m_currentGridFileName = s.str();
2438
2439 // Cancel any open MPI requests in the solver since these might conflict with communication
2440 // necessary when writing the grid file
2441 for(MInt i = 0; i < noSolvers(); i++) {
2442 solver(i).cancelMpiRequests();
2443 }
2444
2445 cerr0 << "Saving adapted grid file for all solvers " << s.str() << "... ";
2446
2447 gridb().saveGrid((m_outputDir + m_currentGridFileName).c_str(), m_recalcIds);
2448
2449 cerr0 << "ok." << std::endl;
2450
2451 m_saveGridNewPartitionCells = false;
2452
2453 }
2454 }
2455
2456 for(MInt c = 0; c < noCouplers(); c++) {
2457 coupler(c).writeRestartFile(allsolversrestart);
2458 }
2459
2460 if(allsolversrestart) {
2461 cerr0 << "Writing restart files at time step " << globalTimeStep << " :" << std::endl;
2462
2463 for(MInt i = 0; i < noSolvers(); i++) {
2464 if(solver(i).isActive()) {
2465 solver(i).writeRestartFile(writeRestart[i], writeBackup, m_currentGridFileName, m_recalcIds);
2466 }
2467 }
2468 // savePartitionFile();
2469 }
2470
2471 for(MInt i = 0; i < noSolvers(); i++) {
2472 solver(i).reIntAfterRestart(writeRestart[i]);
2473 }
2474
2475 gridb().restorePeriodicConnection();
2476
2477 // reanable all previously running timers:
2478 for(MInt i = 0; i < noSolvers(); i++) {
2479 if(dlbTimersStatus[i] != 0) {
2480 solver(i).enableDlbTimers(); // Reenable previously active DLB timers
2481 }
2482 }
2483}
2484
2485
2487template <MInt nDim>
2489 MInt noLoadTypes = 0;
2490 for(MInt i = 0; i < noSolvers(); i++) {
2491 noLoadTypes += solver(i).noLoadTypes();
2492 }
2493 return noLoadTypes;
2494}
2495
2496
2498template <MInt nDim>
2499void Controller<nDim>::getLoadQuantities(MInt* const loadQuantities) {
2500 // Fill with zeros since inactive solvers will not write anything
2501 std::fill_n(&loadQuantities[0], globalNoLoadTypes(), 0);
2502
2503 MInt offset = 0;
2504 for(MInt i = 0; i < noSolvers(); i++) {
2505 solver(i).getLoadQuantities(&loadQuantities[offset]);
2506 offset += solver(i).noLoadTypes();
2507 }
2508}
2509
2510
2518template <MInt nDim>
2519MBool Controller<nDim>::needLoadBalancing(const MFloat localRunTime, MFloat* const loads, MFloat& imbalance) {
2520 TRACE();
2521
2522 // Nothing to do in serial
2523 if(noDomains() == 1) {
2524 return false;
2525 }
2526
2527 MFloatScratchSpace localRunTimes(noDomains(), FUN_, "localRunTimes");
2528 MFloatScratchSpace localIdleTimes(noDomains(), FUN_, "localIdleTimes");
2529
2530 // Gather the local runtimes on all ranks
2531 MPI_Allgather(&localRunTime, 1, type_traits<MFloat>::mpiType(), &localRunTimes[0], 1, type_traits<MFloat>::mpiType(),
2532 gridb().mpiComm(), AT_, "localRunTime", "localRunTimes[0]");
2533
2534 // Calculate average and maximum runtime of all ranks
2535 const MFloat averageRunTime = std::accumulate(localRunTimes.begin(), localRunTimes.end(), 0.0) / noDomains();
2536 const MFloat maxRunTime = *std::max_element(localRunTimes.begin(), localRunTimes.end());
2537
2538 // Imbalance percentage: I_% = (t_max - t_avg)/t_max * N/(N-1)
2539 // (DeRose 2007, Detecting Application Load Imbalance on High End Massively Parallel Systems)
2540 imbalance = (maxRunTime - averageRunTime) / maxRunTime * noDomains() / (noDomains() - 1.0);
2541 const MBool loadBalance = (imbalance > m_dlbImbalanceThreshold);
2542
2543 // Compute domain loads as fraction of local run time to average run time
2544 for(MInt i = 0; i < noDomains(); i++) {
2545 const MFloat load = localRunTimes[i] / averageRunTime;
2546 loads[i] = load;
2547 }
2548
2549 const MFloat maxLoad = *std::max_element(&loads[0], &loads[0] + noDomains());
2550 const MFloat minLoad = *std::min_element(&loads[0], &loads[0] + noDomains());
2551
2552 char imb[10];
2553 std::sprintf(imb, "%.2f", imbalance * 100.0);
2554 std::stringstream message;
2555 message << globalTimeStep << " * Imbalance percentage: " << imb << "%, t_avg = " << averageRunTime
2556 << ", t_max = " << maxRunTime << std::endl;
2557 message << globalTimeStep << " * Loads: max = " << maxLoad << ", min = " << minLoad << std::endl;
2558 m_log << message.str();
2559 cerr0 << message.str();
2560
2561 // TODO labels:CONTROLLER,DLB generalize histogram output for other purposes?
2562 const MFloat binWidth = 0.05;
2563 const MInt noBins = std::ceil(maxLoad / binWidth) + 1;
2564 std::vector<MInt> loadBins(noBins);
2565 std::fill(loadBins.begin(), loadBins.end(), 0);
2566 for(MInt i = 0; i < noDomains(); i++) {
2567 const MInt binId = std::floor(loads[i] / binWidth);
2568 loadBins[binId] += 1;
2569 }
2570
2571 const MInt sumBins = std::accumulate(loadBins.begin(), loadBins.end(), 0);
2572 if(sumBins != noDomains()) {
2573 m_log << "ERROR in load bins, count " << sumBins << " != " << noDomains() << std::endl;
2574 }
2575
2576 const MInt noBinsPrev = m_previousLoadBins.size();
2577 const MInt maxNoBins = std::max(noBins, noBinsPrev);
2578 const MInt maxBinCountCurr = *std::max_element(loadBins.begin(), loadBins.end());
2579 const MInt maxBinCountPrev =
2580 (noBinsPrev > 0) ? *std::max_element(m_previousLoadBins.begin(), m_previousLoadBins.end()) : 0;
2581 const MInt maxBinCount = std::max(maxBinCountCurr, maxBinCountPrev);
2582
2583 const MInt firstBin =
2584 (std::find_if(loadBins.begin(), loadBins.end(), [](MInt i) { return i > 0; }) - loadBins.begin()) - 1;
2585 const MInt firstBinPrev =
2586 (std::find_if(m_previousLoadBins.begin(), m_previousLoadBins.end(), [](MInt i) { return i > 0; })
2587 - m_previousLoadBins.begin())
2588 - 1;
2589
2590 const MInt minFirstBin = (noBinsPrev > 0) ? std::max(std::min(firstBin, firstBinPrev), 0) : std::max(firstBin, 0);
2591
2592 // Set maximum line length
2593 const MInt maxLineLength = 256;
2594 const MString dashes(maxLineLength, '-');
2595 // Create sprintf buffer and string stream
2596 MChar b[maxLineLength];
2597 std::stringstream hist;
2598
2599 snprintf(b, maxLineLength, " |%.126s|\n", dashes.c_str());
2600 MString separatorTop(b);
2601
2602 snprintf(b, maxLineLength, " |%.18s|%.42s|%.42s|%.21s|\n", dashes.c_str(), dashes.c_str(), dashes.c_str(),
2603 dashes.c_str());
2604 MString separator(b);
2605
2606 // Create header
2607 snprintf(b, maxLineLength,
2608 " | Load distribution at global timestep %-8d%*s - imbalance %5.2f%% - t_avg %5.3e - t_max %5.3e - loads "
2609 "[%5.3f,%5.3f] |\n",
2610 globalTimeStep, 2, " ", imbalance * 100.0, averageRunTime, maxRunTime, minLoad, maxLoad);
2611 hist << "\n" << separatorTop << b;
2612
2613 if(noBinsPrev > 0) {
2614 snprintf(b, maxLineLength,
2615 " | Load distribution at global timestep %-8d%*s - imbalance %5.2f%% - t_avg %5.3e - t_max %5.3e - loads "
2616 "[%5.3f,%5.3f] |\n",
2617 m_previousLoadInfoStep, 2, " ", m_previousLoadInfo[0], m_previousLoadInfo[1], m_previousLoadInfo[2],
2618 m_previousLoadInfo[3], m_previousLoadInfo[4]);
2619 hist << separatorTop << b;
2620 }
2621
2622 snprintf(b, maxLineLength, " | load bin | %-32s%8d | %-32s%8d | curr/prev count |\n",
2623 "current distribution - timestep", globalTimeStep, "previous distribution - timestep",
2624 m_previousLoadInfoStep);
2625 hist << separator << b << separator;
2626
2627 MInt checksum = 0, checksumPrev = 0;
2628 // Output horizontal histogram
2629 for(MInt i = maxNoBins - 1; i >= minFirstBin; i--) {
2630 const MInt maxBarWidth = 40;
2631 const MInt count = (i < noBins) ? loadBins[i] : 0;
2632 const MInt countPrev = (i < noBinsPrev) ? m_previousLoadBins[i] : 0;
2633 // Display at least one char if the count is > 0
2634 const MInt width = (count > 0) ? std::max((MInt)(maxBarWidth * (MFloat)count / (MFloat)maxBinCount), 1) : 0;
2635 const MInt widthPrev =
2636 (countPrev > 0) ? std::max((MInt)(maxBarWidth * (MFloat)countPrev / (MFloat)maxBinCount), 1) : 0;
2637 const MString bar(width, '#');
2638 const MString barPrev(widthPrev, '@');
2639
2640 if(approx(i * binWidth, 1.0, 0.0001)) {
2641 // Add horizontal separator marks between overloaded/underloaded ranks
2642 snprintf(b, maxLineLength, " | [%6.3f, %-6.3f) |_%-40s_|_%-40s_| %8d | %8d |\n", i * binWidth, (i + 1) * binWidth,
2643 bar.c_str(), barPrev.c_str(), count, countPrev);
2644 } else {
2645 snprintf(b, maxLineLength, " | [%6.3f, %-6.3f) | %-40s | %-40s | %8d | %8d |\n", i * binWidth, (i + 1) * binWidth,
2646 bar.c_str(), barPrev.c_str(), count, countPrev);
2647 }
2648 hist << b;
2649
2650 // Make sure all ranks appear in the histogram
2651 checksum += count;
2652 checksumPrev += countPrev;
2653 }
2654 hist << separator;
2655
2656 if(checksum != noDomains() || (checksumPrev != noDomains() && noBinsPrev > 0)) {
2657 m_log << "ERROR in load histogram" << std::endl;
2658 cerr0 << "ERROR in load histogram" << std::endl;
2659 } else {
2660 m_log << hist.str() << std::endl;
2661 cerr0 << hist.str() << std::endl;
2662 }
2663
2664 // Store current information for next output
2665 m_previousLoadBins = loadBins;
2666 m_previousLoadInfoStep = globalTimeStep;
2667 m_previousLoadInfo[0] = imbalance * 100.0;
2668 m_previousLoadInfo[1] = averageRunTime;
2669 m_previousLoadInfo[2] = maxRunTime;
2670 m_previousLoadInfo[3] = minLoad;
2671 m_previousLoadInfo[4] = maxLoad;
2672
2673 return loadBalance;
2674}
2675
2676
2686template <MInt nDim>
2687void Controller<nDim>::loadBalancingCalcNewGlobalOffsets(const MLong* const oldPartitionCellOffsets,
2688 const MLong* const newPartitionCellOffsets,
2689 MLong* const globalOffsets) {
2690 TRACE();
2691
2692 const MInt oldNoPartitionCells = gridb().m_noPartitionCells;
2693 MLongScratchSpace partitionCellsGlobalId(oldNoPartitionCells, AT_, "partitionCellsGlobalId");
2694
2695 for(MInt i = 0; i < oldNoPartitionCells; i++) {
2696 const MLong globalCellId = gridb().m_localPartitionCellGlobalIds[i]; // sorted by global id
2697
2698 partitionCellsGlobalId(i) = globalCellId;
2699 ASSERT(i == 0 || partitionCellsGlobalId(i) > partitionCellsGlobalId(i - 1),
2700 "Partition cell global ids not in ascending order.");
2701 }
2702
2703 // Fill with zeros
2704 std::fill_n(&globalOffsets[0], noDomains() + 1, 0);
2705
2706 // Calculate global offsets for the given new partition cell offsets
2707 for(MInt i = 1; i < noDomains(); i++) {
2708 const MLong partitionCellId = newPartitionCellOffsets[i];
2709 // Determine if this partition cell is currently on this domain
2710 const MBool hasPartitionCell = (oldPartitionCellOffsets[domainId()] <= partitionCellId
2711 && partitionCellId < oldPartitionCellOffsets[domainId() + 1]);
2712
2713 // Set global domain offset if the partition cell is present, else it remains zero
2714 if(hasPartitionCell) {
2715 // Determine local partition cell index and the global cell id
2716 const MLong partitionCellLocalId = partitionCellId - oldPartitionCellOffsets[domainId()];
2717 globalOffsets[i] = partitionCellsGlobalId[partitionCellLocalId];
2718
2719 // Local cell id of the partition cell and its parent
2720 MInt currentId = gridb().m_localPartitionCellLocalIds[partitionCellLocalId];
2721 MInt parentId = gridb().a_parentId(currentId);
2722
2723 MInt shift = 0;
2724 // Partition level shift: if the partition cell is not on the min-level check the global id of
2725 // its parent, if 'globalId(parent) == globalId(partitionCell) - 1' the parent is a partition
2726 // level ancestor that belongs to this domain, i.e., it has no offspring cells on the previous
2727 // domain. Continue to loop up the parent cells and check this condition to find the shift
2728 // that yields the correct domain offset.
2729 while(gridb().a_level(currentId) != gridb().minLevel()
2730 && gridb().a_globalId(parentId) == gridb().a_globalId(currentId) - 1) {
2731 shift++;
2732 currentId = parentId;
2733 parentId = gridb().a_parentId(currentId);
2734 }
2735
2736 TERMM_IF_COND(shift > 0 && gridb().m_maxPartitionLevelShift == 0,
2737 "Error: domain offset has a shift but the maximum partition level shift is 0.");
2738
2739 // Correct global domain offset
2740 globalOffsets[i] -= shift;
2741 }
2742 }
2743
2744 // Set only once since distributed via allreduce with sum to all domains
2745 if(domainId() == 0) {
2746 globalOffsets[noDomains()] = gridb().domainOffset(noDomains());
2747 }
2748
2749 // Combine the new domain offset information of all ranks, i.e. each new
2750 // global domain offset is determined by the domain where the corresponding
2751 // partition cell is present, other domains will add a value of zero to this offset
2752 MPI_Allreduce(MPI_IN_PLACE, &globalOffsets[0], noDomains() + 1, type_traits<MLong>::mpiType(), MPI_SUM,
2753 gridb().mpiComm(), AT_, "MPI_IN_PLACE", "globalOffsets[0]");
2754}
2755
2756
2767template <MInt nDim>
2769 MInt* const noCellsToSendByDomain,
2770 MInt* const noCellsToReceiveByDomain,
2771 MInt* const sortedCellId,
2772 MInt* const bufferIdToCellId) {
2773 TRACE();
2774
2775 const MInt noCells = gridb().treeb().size();
2776
2777 MIntScratchSpace targetDomainsByCell(noCells, AT_, "targetDomainsByCell");
2778 std::fill_n(&targetDomainsByCell[0], noCells, -1);
2779 std::fill_n(&noCellsToSendByDomain[0], noDomains() + 1, 0);
2780
2781 // Loop over all cells and determine to which domain they belong under the new domain
2782 // decomposition
2783 for(MInt cellId = 0; cellId < noCells; cellId++) {
2784 // Skip halo cells
2785 if(gridb().a_hasProperty(cellId, Cell::IsHalo)) {
2786 continue;
2787 }
2788
2789 const MLong globalCellId = gridb().a_globalId(cellId);
2790 // Search for global cell id in domain offsets array
2791 auto lowerBound = std::lower_bound(&offsets[0], &offsets[0] + noDomains(), globalCellId);
2792 const MInt dist = std::distance(&offsets[0], lowerBound);
2793 // Check if this cell is a domain offset (i.e. in the offsets list)
2794 const MBool isDomainOffset = (*lowerBound == globalCellId);
2795 // Determine target domain id
2796 const MInt globalDomain = (isDomainOffset) ? dist : dist - 1;
2797
2798 // Tag cell with target domain
2799 targetDomainsByCell[cellId] = globalDomain;
2800 // Increase number of cells to send to this domain
2801 noCellsToSendByDomain[globalDomain]++;
2802 }
2803
2804 // Use all-to-all communication to determine how many cells to send/receive to/from each domain
2805 MPI_Alltoall(&noCellsToSendByDomain[0], 1, type_traits<MInt>::mpiType(), &noCellsToReceiveByDomain[0], 1,
2806 type_traits<MInt>::mpiType(), gridb().mpiComm(), AT_, "noCellsToSendByDomain[0]",
2807 "noCellsToReceiveByDomain[0]");
2808
2809 // Determine total number of cells to send/recv (store as last entry)
2810 noCellsToSendByDomain[noDomains()] =
2811 std::accumulate(&noCellsToSendByDomain[0], &noCellsToSendByDomain[0] + noDomains(), 0);
2812 noCellsToReceiveByDomain[noDomains()] =
2813 std::accumulate(&noCellsToReceiveByDomain[0], &noCellsToReceiveByDomain[0] + noDomains(), 0);
2814
2815 if(noCellsToSendByDomain[noDomains()] < 1) {
2816 TERMM(1, std::to_string(domainId()) + " noCellsToSend = " + std::to_string(noCellsToSendByDomain[noDomains()]));
2817 }
2818 if(noCellsToReceiveByDomain[noDomains()] < 1) {
2819 TERMM(1, std::to_string(domainId()) + " noCellsToRecv = " + std::to_string(noCellsToReceiveByDomain[noDomains()]));
2820 }
2821
2822 // Sort cells depending on the domain to send to
2823 std::fill_n(&sortedCellId[0], noCells, -1);
2824
2825 // @ansgar TODO labels:CONTROLLER,DLB this might be very inefficient for large number of cores
2826 MInt currentBufferId = 0;
2827 for(MInt dom = 0; dom < noDomains(); ++dom) {
2828 // Map from global cell id to local cell id for cells that will belong to the current domain
2829 std::map<MLong, MInt> cellMap;
2830 for(MInt cellId = 0; cellId < noCells; ++cellId) {
2831 // Halo cells are automatically skipped as they have tag[] of -1.
2832 // Halo cells will have a sortedCellId[] of -1.
2833 if(targetDomainsByCell[cellId] == dom) {
2834 cellMap[gridb().a_globalId(cellId)] = cellId;
2835 }
2836 }
2837
2838 // Set buffer location for all cells in global id order -> received data is stored in
2839 // global id order and does not need to be resorted
2840 for(auto&& cell : cellMap) {
2841 const MInt cellId = cell.second;
2842 sortedCellId[cellId] = currentBufferId;
2843 bufferIdToCellId[currentBufferId] = cellId; // map from buffer id to cell id
2844 ++currentBufferId;
2845 }
2846 }
2847}
2848
2849
2853template <MInt nDim>
2855 TRACE();
2856
2857 // Determine solver local root domain
2858 const MInt localRootGlobalDomain = solverLocalRootDomain(solver);
2859
2860 std::vector<MInt> globalIdVars(0);
2861 std::vector<MFloat> globalFloatVars(0);
2862
2863 // Request global solver variables
2864 solver->getGlobalSolverVars(globalFloatVars, globalIdVars);
2865
2866 const MInt noIdVars = globalIdVars.size();
2867 const MInt noFloatVars = globalFloatVars.size();
2868
2869 // Distribute all global variables from local rank 0 to all domains
2870 MPI_Bcast(&globalIdVars[0], noIdVars, type_traits<MInt>::mpiType(), localRootGlobalDomain, gridb().mpiComm(), AT_,
2871 "globalIdVars[0]");
2872 MPI_Bcast(&globalFloatVars[0], noFloatVars, type_traits<MFloat>::mpiType(), localRootGlobalDomain, gridb().mpiComm(),
2873 AT_, "globalFloatVars[0]");
2874
2875 // Set variables in solver
2876 solver->setGlobalSolverVars(globalFloatVars, globalIdVars);
2877}
2878
2879
2881template <MInt nDim>
2882void Controller<nDim>::determineDataSizesDlb(const MInt id, const MInt mode, const MInt* const noCellsToSend,
2883 const MInt* const bufferIdToCellId,
2884 std::vector<std::vector<MInt>>& sendSizeVector,
2885 std::vector<std::vector<MInt>>& recvSizeVector) {
2886 TRACE();
2887
2888 const MBool isSolver = (mode == 0); // mode 0: solver; mode 1: coupler
2889 const MInt timerId = (isSolver) ? id : noSolvers() + id;
2890 // Get the number of quantities that need to be communicated
2891 MInt dataCount = (isSolver) ? solver(id).noCellDataDlb() : coupler(id).noCellDataDlb();
2892
2893 RECORD_TIMER_START(m_solverTimers[timerId][SolverTimers::CalcDataSizesMpiBlocking]);
2894 // Note: since inactive ranks might not return the correct count, take the max among all domains
2895 MPI_Allreduce(MPI_IN_PLACE, &dataCount, 1, type_traits<MInt>::mpiType(), MPI_MAX, gridb().mpiComm(), AT_,
2896 "MPI_IN_PLACE", "dataCount");
2897 RECORD_TIMER_STOP(m_solverTimers[timerId][SolverTimers::CalcDataSizesMpiBlocking]);
2898
2899 std::vector<MInt> dataSendSize(dataCount * (globalNoDomains() + 1));
2900 std::vector<MInt> dataRecvSize(dataCount * (globalNoDomains() + 1));
2901
2902 // Determine data size to send/receive to/from each domain for all quantities
2903 // last entry holds the sum
2904 for(MInt dataId = 0; dataId < dataCount; dataId++) {
2905 const MInt offset = dataId * (globalNoDomains() + 1);
2906 MInt bufferId = 0;
2907
2908 // Loop over all domains and determine the data send size for this domain
2909 for(MInt domain = 0; domain < globalNoDomains(); domain++) {
2910 MInt domainDataSize = 0;
2911
2912 // Loop over all grid cells that are send to this domain
2913 for(MInt i = 0; i < noCellsToSend[domain]; i++) {
2914 // Inquire the data size for this cell
2915 // TODO labels:DLB @ansgar inquire all data sizes at once?
2916 const MInt cellId = bufferIdToCellId[bufferId];
2917 ASSERT(cellId > -1, "");
2918 const MInt dataSize =
2919 (isSolver) ? solver(id).cellDataSizeDlb(dataId, cellId) : coupler(id).cellDataSizeDlb(dataId, cellId);
2920 domainDataSize += dataSize;
2921 bufferId++;
2922 }
2923
2924 // Store total data size to send to this domain for this quantity
2925 dataSendSize[offset + domain] = domainDataSize;
2926 }
2927
2928 // TODO labels:DLB @ansgar perform just one alltoall for all data ids?
2929 RECORD_TIMER_START(m_solverTimers[timerId][SolverTimers::CalcDataSizesMpi]);
2930 // Exchange data send size with all other domains and store as receive size
2931 MPI_Alltoall(&dataSendSize[offset], 1, type_traits<MInt>::mpiType(), &dataRecvSize[offset], 1,
2932 type_traits<MInt>::mpiType(), gridb().mpiComm(), AT_, "dataSendSize[offset]", "dataRecvSize[offset]");
2933 RECORD_TIMER_STOP(m_solverTimers[timerId][SolverTimers::CalcDataSizesMpi]);
2934
2935 // Compute the total data send size and store as last entry
2936 dataSendSize[offset + globalNoDomains()] =
2937 std::accumulate(&dataSendSize[offset], &dataSendSize[offset + globalNoDomains()], 0);
2938
2939 // Compute the total data receive size and store as last entry
2940 dataRecvSize[offset + globalNoDomains()] =
2941 std::accumulate(&dataRecvSize[offset], &dataRecvSize[offset + globalNoDomains()], 0);
2942 }
2943
2944 sendSizeVector.push_back(dataSendSize);
2945 recvSizeVector.push_back(dataRecvSize);
2946}
2947
2948
2950template <MInt nDim>
2951void Controller<nDim>::redistributeDataDlb(const MInt id, const MInt mode, std::vector<MInt>& dataSendSize,
2952 std::vector<MInt>& dataRecvSize, const MInt* const bufferIdToCellId,
2953 const MInt oldNoCells, std::vector<MInt*>& intDataRecv,
2954 std::vector<MLong*>& longDataRecv, std::vector<MFloat*>& floatDataRecv,
2955 std::vector<MInt>& dataTypes) {
2956 TRACE();
2957
2958 const MBool isSolver = (mode == 0); // mode 0: solver; mode 1: coupler
2959 const MInt timerId = (isSolver) ? id : noSolvers() + id;
2960 // Solver pointer for easier access
2961 Solver* const solverP = (isSolver) ? &solver(id) : nullptr;
2962 Coupling* const couplerP = (isSolver) ? nullptr : &coupler(id);
2963
2964 RECORD_TIMER_START(m_solverTimers[timerId][SolverTimers::RedistributeMpiBlocking]);
2965
2966 // Determine solver local root domain
2967 // TODO labels:CONTROLLER,DLB @ansgar_coupler check this!
2968 const MInt localRootGlobalDomain = (isSolver) ? solverLocalRootDomain(solverP) : 0;
2969
2970 // Get the number of quantities that need to be communicated
2971 MInt dataCount = (isSolver) ? solverP->noCellDataDlb() : couplerP->noCellDataDlb();
2972
2973 // Note: since inactive ranks might not return the correct count, take the max among all domains
2974 MPI_Allreduce(MPI_IN_PLACE, &dataCount, 1, type_traits<MInt>::mpiType(), MPI_MAX, gridb().mpiComm(), AT_,
2975 "MPI_IN_PLACE", "dataCount");
2976 RECORD_TIMER_STOP(m_solverTimers[timerId][SolverTimers::RedistributeMpiBlocking]);
2977
2978 MInt intDataCount = 0;
2979 MInt longDataCount = 0;
2980 MInt floatDataCount = 0;
2981
2982 // Get solver/solver data and exchange
2983 for(MInt dataId = 0; dataId < dataCount; dataId++) {
2984 const MInt offset = dataId * (globalNoDomains() + 1);
2985 // Inquire data type (communicate from local rank 0 to avoid errors with inactive ranks) and
2986 // total data send size for this quantity
2987 // MInt dataType = (solverP->domainId() == 0) ? solverP->cellDataTypeDlb(dataId) : -1;
2988 MInt dataType = -1;
2989 if(isSolver) {
2990 dataType = (solverP->domainId() == 0) ? solverP->cellDataTypeDlb(dataId) : -1;
2991 } else {
2992 // TODO labels:CONTROLLER,DLB @ansgar_coupler
2993 /* dataType = (couplerP->domainId() == 0) ? couplerP->cellDataTypeDlb(dataId) : -1; */
2994 dataType = couplerP->cellDataTypeDlb(dataId);
2995 }
2996
2997 RECORD_TIMER_START(m_solverTimers[timerId][SolverTimers::RedistributeMpi]);
2998 MPI_Bcast(&dataType, 1, type_traits<MInt>::mpiType(), localRootGlobalDomain, gridb().mpiComm(), AT_, "dataType");
2999 RECORD_TIMER_STOP(m_solverTimers[timerId][SolverTimers::RedistributeMpi]);
3000
3001 const MInt dataSize = dataSendSize[offset + globalNoDomains()];
3002 const MInt recvSize = dataRecvSize[offset + globalNoDomains()];
3003
3004 // Check data type (MInt, MLong or MFloat for now)
3005 switch(dataType) {
3006 case MINT: {
3007 dataTypes.push_back(MINT);
3008 // Allocate (persistent) data receive buffer and store pointer
3009 MInt* newIntData = nullptr;
3010 mAlloc(newIntData, std::max(recvSize, 1), "newIntData", AT_);
3011 intDataRecv.push_back(newIntData);
3012
3013 // Data send buffer, temporary since not needed anymore after exchange
3014 ScratchSpace<MInt> intDataSend(std::max(dataSize, 1), AT_, "intDataSend");
3015
3016 if(dataSize > 0) {
3017 // Get the solver/solver data and store in send buffer (sorted according to buffer mapping)
3018 if(isSolver) {
3019 solverP->getCellDataDlb(dataId, oldNoCells, &bufferIdToCellId[0], &intDataSend[0]);
3020 } else {
3021 couplerP->getCellDataDlb(dataId, oldNoCells, &bufferIdToCellId[0], &intDataSend[0]);
3022 }
3023 }
3024
3025 RECORD_TIMER_START(m_solverTimers[timerId][SolverTimers::RedistributeMpi]);
3026 // Exchange
3027 maia::mpi::exchangeData(&intDataSend[0], globalDomainId(), globalNoDomains(), gridb().mpiComm(), 1,
3028 &dataSendSize[offset], &dataRecvSize[offset], &intDataRecv[intDataCount][0]);
3029 RECORD_TIMER_STOP(m_solverTimers[timerId][SolverTimers::RedistributeMpi]);
3030
3031 intDataCount++;
3032 break;
3033 }
3034 case MLONG: {
3035 dataTypes.push_back(MLONG);
3036 // Allocate (persistent) data receive buffer and store pointer
3037 MLong* newLongData = nullptr;
3038 mAlloc(newLongData, std::max(recvSize, 1), "newLongData", AT_);
3039 longDataRecv.push_back(newLongData);
3040
3041 // Data send buffer, temporary since not needed anymore after exchange
3042 ScratchSpace<MLong> longDataSend(std::max(dataSize, 1), AT_, "longDataSend");
3043
3044 if(dataSize > 0) {
3045 // Get the solver/solver data and store in send buffer (sorted according to buffer mapping)
3046 if(isSolver) {
3047 solverP->getCellDataDlb(dataId, oldNoCells, &bufferIdToCellId[0], &longDataSend[0]);
3048 } else {
3049 couplerP->getCellDataDlb(dataId, oldNoCells, &bufferIdToCellId[0], &longDataSend[0]);
3050 }
3051 }
3052
3053 RECORD_TIMER_START(m_solverTimers[timerId][SolverTimers::RedistributeMpi]);
3054 // Exchange
3055 maia::mpi::exchangeData(&longDataSend[0], globalDomainId(), globalNoDomains(), gridb().mpiComm(), 1,
3056 &dataSendSize[offset], &dataRecvSize[offset], &longDataRecv[longDataCount][0]);
3057 RECORD_TIMER_STOP(m_solverTimers[timerId][SolverTimers::RedistributeMpi]);
3058
3059 longDataCount++;
3060 break;
3061 }
3062 case MFLOAT: {
3063 dataTypes.push_back(MFLOAT);
3064 // Allocate (persistent) data receive buffer and store pointer
3065 MFloat* newFloatData = nullptr;
3066 mAlloc(newFloatData, std::max(recvSize, 1), "newFloatData", AT_);
3067 floatDataRecv.push_back(newFloatData);
3068
3069 // Data send buffer, temporary since not needed anymore after exchange
3070 ScratchSpace<MFloat> floatDataSend(std::max(dataSize, 1), AT_, "floatDataSend");
3071
3072 if(dataSize > 0) {
3073 // Get the solver/solver data and store in send buffer
3074 if(isSolver) {
3075 solverP->getCellDataDlb(dataId, oldNoCells, &bufferIdToCellId[0], &floatDataSend[0]);
3076 } else {
3077 couplerP->getCellDataDlb(dataId, oldNoCells, &bufferIdToCellId[0], &floatDataSend[0]);
3078 }
3079 }
3080
3081 RECORD_TIMER_START(m_solverTimers[timerId][SolverTimers::RedistributeMpi]);
3082 // Exchange
3083 maia::mpi::exchangeData(&floatDataSend[0], globalDomainId(), globalNoDomains(), gridb().mpiComm(), 1,
3084 &dataSendSize[offset], &dataRecvSize[offset], &floatDataRecv[floatDataCount][0]);
3085 RECORD_TIMER_STOP(m_solverTimers[timerId][SolverTimers::RedistributeMpi]);
3086
3087 floatDataCount++;
3088 break;
3089 }
3090 default:
3091 TERMM(1, "Unknown data type: " + std::to_string(dataType) + ".");
3092 break;
3093 }
3094 }
3095}
3096
3097
3099template <MInt nDim>
3100void Controller<nDim>::setDataDlb(const MInt id, const MInt mode, std::vector<MInt*>& intDataRecv,
3101 std::vector<MLong*>& longDataRecv, std::vector<MFloat*>& floatDataRecv,
3102 std::vector<MInt>& dataTypes, const MBool freeMemory) {
3103 TRACE();
3104
3105 const MBool isSolver = (mode == 0); // mode 0: solver; mode 1: coupler
3106 const MInt timerId = (isSolver) ? id : noSolvers() + id;
3107 // Pointer for easier access
3108 Solver* const solverP = (isSolver) ? &solver(id) : nullptr;
3109 Coupling* const couplerP = (isSolver) ? nullptr : &coupler(id);
3110
3111 // Get the number of quantities that need to be communicated
3112 MInt dataCount = (isSolver) ? solverP->noCellDataDlb() : couplerP->noCellDataDlb();
3113
3114 RECORD_TIMER_START(m_solverTimers[timerId][SolverTimers::SetDataMpiBlocking]);
3115 // Note: since inactive ranks might return the correct count, take the max among all domains
3116 MPI_Allreduce(MPI_IN_PLACE, &dataCount, 1, type_traits<MInt>::mpiType(), MPI_MAX, gridb().mpiComm(), AT_,
3117 "MPI_IN_PLACE", "dataCount");
3118 RECORD_TIMER_STOP(m_solverTimers[timerId][SolverTimers::SetDataMpiBlocking]);
3119
3120 MInt intDataCount = 0;
3121 MInt longDataCount = 0;
3122 MInt floatDataCount = 0;
3123
3124 // Set solver/solver data
3125 for(MInt dataId = 0; dataId < dataCount; dataId++) {
3126 const MInt dataType = dataTypes[dataId];
3127
3128 switch(dataType) {
3129 case MINT: {
3130 if(isSolver) {
3131 solverP->setCellDataDlb(dataId, &intDataRecv[intDataCount][0]);
3132 } else {
3133 couplerP->setCellDataDlb(dataId, &intDataRecv[intDataCount][0]);
3134 }
3135 if(freeMemory) {
3136 mDeallocate(intDataRecv[intDataCount]);
3137 }
3138 intDataCount++;
3139 break;
3140 }
3141 case MLONG: {
3142 if(isSolver) {
3143 solverP->setCellDataDlb(dataId, &longDataRecv[longDataCount][0]);
3144 } else {
3145 couplerP->setCellDataDlb(dataId, &longDataRecv[longDataCount][0]);
3146 }
3147 if(freeMemory) {
3148 mDeallocate(longDataRecv[longDataCount]);
3149 }
3150 longDataCount++;
3151 break;
3152 }
3153 case MFLOAT: {
3154 if(isSolver) {
3155 solverP->setCellDataDlb(dataId, &floatDataRecv[floatDataCount][0]);
3156 } else {
3157 couplerP->setCellDataDlb(dataId, &floatDataRecv[floatDataCount][0]);
3158 }
3159 if(freeMemory) {
3160 mDeallocate(floatDataRecv[floatDataCount]);
3161 }
3162 floatDataCount++;
3163 break;
3164 }
3165 default:
3166 TERMM(1, "Unknown data type.");
3167 break;
3168 }
3169 }
3170}
3171
3172
3177template <MInt nDim>
3179 TRACE();
3180 using namespace maia::parallel_io;
3181
3182 RECORD_TIMER_START(m_timers[Timers::IO]);
3183
3184 // Number of timings
3185 const MInt noTimings = m_dlbTimeStepsAll.size();
3186 const MInt noValues = 2; // runTime and idleTime
3187
3188 if(noTimings == 0) {
3189 RECORD_TIMER_STOP(m_timers[Timers::IO]);
3190 return;
3191 }
3192
3193 // Timings output file name
3194 std::stringstream fileName;
3195 fileName << m_outputDir << "timings_" << noDomains() << "_" << globalTimeStep << ParallelIo::fileExt();
3196
3197 m_log << "Write timings to file: " << fileName.str() << std::endl;
3198
3199 ParallelIo file(fileName.str(), PIO_REPLACE, gridb().mpiComm());
3200
3201 file.defineArray(PIO_INT, "timeStep", noTimings);
3202
3203 // Dimensions of timings: noDomains x noTimings x noValues
3204 ParallelIo::size_type dimSizes[] = {noDomains(), noTimings, noValues};
3205
3206 file.defineArray(PIO_FLOAT, "timings", 3, &dimSizes[0]);
3207
3208 file.setAttribute("domain index", "dim_0", "timings");
3209 file.setAttribute("time step index", "dim_1", "timings");
3210 file.setAttribute("timings index", "dim_2", "timings");
3211
3212 file.setAttribute("run time", "var_0", "timings");
3213 file.setAttribute("idle time", "var_1", "timings");
3214
3215 // Assemble timesteps and timings
3216 MIntScratchSpace timeSteps(noTimings, AT_, "timeSteps");
3217 MFloatScratchSpace data(noTimings, noValues, AT_, "data");
3218 for(MInt i = 0; i < noTimings; i++) {
3219 timeSteps[i] = m_dlbTimeStepsAll[i];
3220 data(i, 0) = m_dlbRunTimeAll[i];
3221 data(i, 1) = m_dlbIdleTimeAll[i];
3222 }
3223
3224 // root writes timestep data
3225 if(domainId() == 0) {
3226 file.setOffset(noTimings, 0);
3227 } else {
3228 file.setOffset(0, 0);
3229 }
3230 file.writeArray(&timeSteps[0], "timeStep");
3231
3232 // Write timings of all domains
3233 file.setOffset(1, domainId(), 3);
3234 file.writeArray(&data[0], "timings");
3235
3236 RECORD_TIMER_STOP(m_timers[Timers::IO]);
3237}
3238
3239
3241template <MInt nDim>
3242void Controller<nDim>::storeLoadsAndWeights(const MFloat* const loads, const MInt noLoadTypes,
3243 const MInt* const loadQuantities, const MFloat domainWeight,
3244 const MFloat* const weights) {
3245 TRACE();
3246 using namespace maia::parallel_io;
3247
3248 RECORD_TIMER_START(m_timers[Timers::IO]);
3249
3250 if(domainId() == 0) {
3251 std::cerr << "Store loads and weights" << std::endl;
3252 }
3253
3254 std::stringstream fileName;
3255 fileName << m_outputDir << "loads_" << noDomains() << "_" << std::setw(8) << std::setfill('0') << globalTimeStep;
3256 fileName << ParallelIo::fileExt();
3257
3258 ParallelIo file(fileName.str(), PIO_REPLACE, gridb().mpiComm());
3259
3260 file.defineArray(PIO_FLOAT, "weights", noLoadTypes);
3261 file.defineArray(PIO_FLOAT, "loads", noDomains());
3262 file.defineArray(PIO_FLOAT, "domainWeights", noDomains());
3263
3264 // Dimensions of load quantities: noDomains x noLoadTypes
3265 ParallelIo::size_type dimSizes[] = {noDomains(), noLoadTypes};
3266 file.defineArray(PIO_INT, "loadQuantities", 2, &dimSizes[0]);
3267
3268 // root writes estimated weights
3269 if(domainId() == 0) {
3270 file.setOffset(noLoadTypes, 0);
3271 } else {
3272 file.setOffset(0, 0);
3273 }
3274 file.writeArray(&weights[0], "weights");
3275
3276 file.setOffset(1, domainId());
3277 file.writeArray(&loads[domainId()], "loads");
3278 file.writeArray(&domainWeight, "domainWeights");
3279
3280 // Write load quantities of all domains
3281 file.setOffset(1, domainId(), 2);
3282 file.writeArray(&loadQuantities[0], "loadQuantities");
3283
3284 RECORD_TIMER_STOP(m_timers[Timers::IO]);
3285}
3286
3288template <MInt nDim>
3290 MInt localRootGlobalDomain = (solver->domainId() == 0) ? globalDomainId() : 0;
3291 MPI_Allreduce(MPI_IN_PLACE, &localRootGlobalDomain, 1, type_traits<MInt>::mpiType(), MPI_SUM, gridb().mpiComm(), AT_,
3292 "MPI_IN_PLACE", "localRootGlobalDomain");
3293 return localRootGlobalDomain;
3294}
3295
3296/* brief: Cast adaptation interval to multiple to of timestep on coarsest level
3297 details: Needed for mesh adaptation with LB in case of Dupuis initialization of
3298 newly created cells.
3299 author: Philipp Brokof, Moritz Waldmann
3300*/
3301template <MInt nDim>
3303 MInt maxUniformRefinementLevel) {
3304 this->m_adaptationInterval = this->m_adaptationInterval / IPOW2(maxLevel - maxUniformRefinementLevel)
3305 * IPOW2(maxLevel - maxUniformRefinementLevel);
3306 this->m_adaptationStart = this->m_adaptationStart / IPOW2(maxLevel - maxUniformRefinementLevel)
3307 * IPOW2(maxLevel - maxUniformRefinementLevel);
3308 std::cout << "Set adaptationStart to: " << this->m_adaptationStart
3309 << " and adaptationInterval to: " << this->m_adaptationInterval << "\n";
3310}
3311
3312template <MInt nDim>
3314 // Reset timer records
3316
3317 m_dlbPreviousLocalRunTime = 0.0;
3318 m_dlbPreviousLocalIdleTime = 0.0;
3319
3320 // ... clear timings ...
3321 std::vector<MFloat>().swap(m_dlbTimings);
3322
3323 m_dlbLastResetTimeStep = globalTimeStep;
3324
3325 m_log << "Resetting DLB timers at timestep " << globalTimeStep << std::endl;
3326 cerr0 << "Resetting DLB timers at timestep " << globalTimeStep << std::endl;
3327}
3328
3329template <MInt nDim>
3331 m_log << "DLB: Timer statistics ";
3332 if(!status.empty()) m_log << status << " ";
3333 m_log << "at global time step " << globalTimeStep << std::endl;
3334
3335 // Accumulate timer records of all dlb timers
3336 MFloat localRunTime = 0.0;
3337 MFloat localIdleTime = 0.0;
3338 const MInt noDlbTimers = maia::dlb::g_dlbTimerController.noDlbTimers();
3339 for(MInt i = 0; i < noDlbTimers; i++) {
3340 const MFloat loadRecord = maia::dlb::g_dlbTimerController.returnLoadRecord(i, m_loadBalancingTimerMode);
3341 const MFloat idleRecord = maia::dlb::g_dlbTimerController.returnIdleRecord(i, m_loadBalancingTimerMode);
3342
3343 if(m_loadBalancingMode == 1 && !m_testDynamicLoadBalancing && (loadRecord < 0.0 || idleRecord < 0.0)) {
3344 TERMM(1, "Load/Idle record for dlb timer #" + std::to_string(i) + " is less than zero on global domain #"
3345 + std::to_string(domainId()));
3346 }
3347
3348 localRunTime += loadRecord;
3349 localIdleTime += idleRecord;
3350 }
3351
3352 const MInt noTimeSteps = globalTimeStep - m_dlbLastResetTimeStep;
3353
3354 MFloat timePerStep = (localRunTime + localIdleTime) / noTimeSteps;
3355 MFloat maxRunTime = localRunTime / noTimeSteps;
3356 const MFloat localTimePerStep = timePerStep;
3357
3358 // Communicate and take maximum value -> assure same value on all domains
3359 MPI_Allreduce(MPI_IN_PLACE, &timePerStep, 1, MPI_DOUBLE, MPI_MAX, gridb().mpiComm(), AT_, "MPI_IN_PLACE",
3360 "timePerStep");
3361 MPI_Allreduce(MPI_IN_PLACE, &maxRunTime, 1, MPI_DOUBLE, MPI_MAX, gridb().mpiComm(), AT_, "MPI_IN_PLACE",
3362 "maxRunTime");
3363
3364 MFloat maxIdleTime = localIdleTime / noTimeSteps;
3365 MFloat minIdleTime = localIdleTime / noTimeSteps;
3366 MPI_Allreduce(MPI_IN_PLACE, &maxIdleTime, 1, MPI_DOUBLE, MPI_MAX, gridb().mpiComm(), AT_, "MPI_IN_PLACE",
3367 "maxIdleTime");
3368 MPI_Allreduce(MPI_IN_PLACE, &minIdleTime, 1, MPI_DOUBLE, MPI_MIN, gridb().mpiComm(), AT_, "MPI_IN_PLACE",
3369 "minIdleTime");
3370
3371
3372 m_log << globalTimeStep << " Average time per step: " << timePerStep << " ; local: " << localTimePerStep
3373 << ", idle/comp = " << localIdleTime / localRunTime
3374 << ", idle/timePerStep = " << localIdleTime / (noTimeSteps * timePerStep) << std::endl;
3375 m_log << globalTimeStep << " Relative idle time: max = " << maxIdleTime / timePerStep << ", min "
3376 << minIdleTime / timePerStep << std::endl;
3377 m_log << globalTimeStep << " maxRunTime " << maxRunTime << std::endl;
3378}
3379
3380template <MInt nDim>
3382 TRACE();
3383 MBool dlbTimeStep = ((globalTimeStep - m_loadBalancingOffset) % m_loadBalancingInterval == 0)
3384 && (m_loadBalancingStopTimeStep < 0 || globalTimeStep < m_loadBalancingStopTimeStep);
3385 if(m_balanceAfterAdaptation) {
3386 dlbTimeStep = dlbTimeStep
3387 || (((globalTimeStep - m_lastAdaptationTimeStep) == m_loadBalancingOffset)
3388 && (m_nAdaptationsSinceBalance >= m_balanceAdaptationInterval || m_dlbStep < 2)
3389 && (m_loadBalancingStopTimeStep < 0 || globalTimeStep < m_loadBalancingStopTimeStep));
3390
3391 if((m_syncTimerSteps && ((globalTimeStep - m_lastAdaptationTimeStep) < m_loadBalancingOffset)) || m_syncTimeSteps) {
3392 cerr0 << "Applying Barrier for correct timer!" << std::endl;
3393 if(m_syncTimeSteps && !((globalTimeStep - m_lastAdaptationTimeStep) < m_loadBalancingOffset)) {
3394 solver(0).startLoadTimer(AT_);
3395 }
3396 MPI_Barrier(gridb().mpiComm(), AT_);
3397 if(m_syncTimeSteps && !((globalTimeStep - m_lastAdaptationTimeStep) < m_loadBalancingOffset)) {
3398 solver(0).stopLoadTimer(AT_);
3399 }
3400 }
3401 }
3402
3403 return dlbTimeStep;
3404}
3405template <MInt nDim>
3407 TRACE();
3408
3409 return (m_adaptationInterval > 0 && globalTimeStep % m_adaptationInterval == 0 && globalTimeStep > m_adaptationStart);
3410}
3411
3412
3413} // namespace grid
3414} // namespace maia
3415
3416
3417#endif // ifndef GRIDCONTROLLER_H_
MLong allocatedBytes()
Return the number of allocated bytes.
Definition: alloc.cpp:121
void mAlloc(T *&a, const MLong N, const MString &objectName, MString function)
allocates memory for one-dimensional array 'a' of size N
Definition: alloc.h:173
MBool mDeallocate(T *&a)
deallocates the memory previously allocated for element 'a'
Definition: alloc.h:544
MInt noDomains() const
MString m_gridInputFileName
MInt maxNoCells() const
void savePartitionFile()
Save current grid partitioning to file.
typename Tree::Cell Cell
Definition: cartesiangrid.h:80
MInt domainId() const
Return the domainId (rank)
static MInt propertyLength(const MString &name, MInt solverId=m_noSolvers)
Returns the number of elements of a property.
Definition: context.cpp:538
static MBool propertyExists(const MString &name, MInt solver=m_noSolvers)
This function checks if a property exists in general.
Definition: context.cpp:494
virtual void setCellDataDlb(const MInt NotUsed(dataId), const MInt *const NotUsed(data))
Definition: coupling.h:117
virtual MInt cellDataTypeDlb(const MInt NotUsed(dataId)) const
Definition: coupling.h:103
virtual MInt noCellDataDlb() const
Methods to inquire coupler data during balancing.
Definition: coupling.h:102
virtual void getCellDataDlb(const MInt NotUsed(dataId), const MInt NotUsed(oldNoCells), const MInt *const NotUsed(bufferIdToCellId), MInt *const NotUsed(data))
Definition: coupling.h:105
MInt noDlbTimers() const
Return the number of DLB timers.
Definition: dlbtimer.h:484
MFloat returnLoadRecord(const MInt dlbTimerId, const MInt mode=0)
Return the load record of a DLB timer.
Definition: dlbtimer.h:464
void enableAllDlbTimers(const MBool *const wasEnabled=nullptr)
Enable all DLB timers (or those given by the array wasEnabled)
Definition: dlbtimer.h:265
void resetRecords()
Reset the records of all DLB timers.
Definition: dlbtimer.h:453
MFloat returnIdleRecord(const MInt dlbTimerId, const MInt mode=0)
Return the idle record of a DLB timer.
Definition: dlbtimer.h:474
void disableAllDlbTimers(MBool *const wasEnabled=nullptr)
Disable all (enabled) DLB timers.
Definition: dlbtimer.h:300
This class is a ScratchSpace.
Definition: scratch.h:758
void fill(T val)
fill the scratch with a given value
Definition: scratch.h:311
iterator end()
Definition: scratch.h:281
iterator begin()
Definition: scratch.h:273
Parent class of all solvers This class is the base for all solvers. I.e. all solver class (e....
Definition: solver.h:29
virtual void removeCell(const MInt)
Remove the given cell.
Definition: solver.h:359
virtual void setGlobalSolverVars(std::vector< MFloat > &NotUsed(globalFloatVars), std::vector< MInt > &NotUsed(globalIdVars))
Definition: solver.h:272
virtual MInt domainId() const
Return the domainId (rank)
Definition: solver.h:383
virtual void removeChilds(const MInt)
Coarsen the given cell.
Definition: solver.h:356
virtual void resizeGridMap()
Swap the given cells.
Definition: solver.h:371
virtual void getGlobalSolverVars(std::vector< MFloat > &NotUsed(globalFloatVars), std::vector< MInt > &NotUsed(globalIntVars))
Definition: solver.h:264
virtual void getCellDataDlb(const MInt NotUsed(dataId), const MInt NotUsed(oldNoCells), const MInt *const NotUsed(bufferIdToCellId), MInt *const NotUsed(data))
Definition: solver.h:241
virtual MInt cellDataTypeDlb(const MInt NotUsed(dataId)) const
Definition: solver.h:236
virtual MInt noCellDataDlb() const
Methods to inquire solver data information.
Definition: solver.h:235
virtual void setCellDataDlb(const MInt NotUsed(dataId), const MInt *const NotUsed(data))
Definition: solver.h:253
virtual void refineCell(const MInt)
Refine the given cell.
Definition: solver.h:353
virtual void swapProxy(const MInt, const MInt)
Swap the given cells.
Definition: solver.h:365
virtual MInt cellOutside(const MFloat *, const MInt, const MInt)
Check whether cell is outside the fluid domain.
Definition: solver.h:368
Grid controller manages adaptation and load balancing in multi-solver environment.
std::vector< MFloat > m_domainWeights
void writeRestartFile(const MBool, const MBool)
MInt solverLocalRootDomain(Solver *const solver)
Determine the global domain id of the solver local root domain.
std::vector< MInt > m_lastOffsetShiftDirection
std::vector< std::function< MInt(const MFloat *, const MInt, const MInt)> > cellOutsideVec()
void printDomainStatistics(const MString &status="")
Print statistics regarding the cell distribution among domains.
void logTimerStatistics(const MString &)
void setDataDlb(const MInt solverId, const MInt mode, std::vector< MInt * > &intDataRecv, std::vector< MLong * > &longDataRecv, std::vector< MFloat * > &floatDataRecv, std::vector< MInt > &dataTypes, const MBool freeMemory)
Set the solver/coupler data for load balancing.
Solver & solver(const MInt solverId)
void partition(MLong *partitionCellOffsets, MLong *globalIdOffsets, const MBool onlyPartitionOffsets)
Compute new domain decomposition.
void getSpecifiedSolverWeights(std::vector< MFloat > &weights)
Return the specified (or default) solver weights for all solvers.
const std::vector< std::function< void(const MInt)> > m_refineCellSolver
const std::vector< std::function< void(const MInt)> > m_removeCellSolver
MInt globalNoLoadTypes()
Return global number of load types of all solvers.
typename CartesianGrid< nDim >::Cell Cell
MBool adaptation(const MBool force=false)
performs mesh adaptation
void computeWeights(const MFloat *loads, const MFloat domainWeight, std::vector< MFloat > &weights)
Compute computational weights for different components in the simulation based on the current distrib...
std::array< MInt, Timers::_count > m_timers
const std::vector< std::unique_ptr< Solver > > * m_solvers
void initTimers()
Initialize timers.
void determineDataSizesDlb(const MInt solverId, const MInt mode, const MInt *const noCellsToSend, const MInt *const bufferIdToCellId, std::vector< std::vector< MInt > > &sendSizeVector, std::vector< std::vector< MInt > > &recvSizeVector)
Determine the data sizes for a solver/coupler that need to be communicated during load balancing.
const std::vector< std::function< void()> > m_resizeGridMapSolver
const std::vector< std::unique_ptr< Coupling > > * m_couplers
const Coupling & coupler(const MInt couplerId) const
void redistributeDataDlb(const MInt id, const MInt mode, std::vector< MInt > &sendSizeVector, std::vector< MInt > &recvSizeVector, const MInt *const bufferIdToCellId, const MInt noCells, std::vector< MInt * > &intDataRecv, std::vector< MLong * > &longDataRecv, std::vector< MFloat * > &floatDataRecv, std::vector< MInt > &dataTypes)
Communicate all solver data for load balancing according to the send/recv sizes.
std::vector< MFloat > m_dlbRunTimeAll
typename maia::grid::Proxy< nDim > GridProxy
void communicateGlobalSolverVars(Solver *const solver)
MBool needLoadBalancing(const MFloat localRunTime, MFloat *const loads, MFloat &imbalance)
Return if dynamic load balancing is needed and compute domain loads.
void loadBalancingCalcNewGlobalOffsets(const MLong *const oldPartitionCellOffsets, const MLong *const newPartitionCellOffsets, MLong *const globalOffsets)
Calculate new global domain offsets given the current and new global partition cell offsets.
void initDlbProperties()
Read Dynamic Load Balancing properties.
const std::vector< std::function< MInt(const MFloat *, const MInt, const MInt)> > m_cellOutside
MBool loadBalancingPartition(const MFloat *loads, const MFloat imbalance, MLong *const partitionCellOffsets, MLong *const globalIdOffsets)
Determine new partitioning for dynamic load balancing.
std::vector< std::function< void(const MInt)> > refineCellVec()
std::vector< MInt > m_dlbTimeStepsAll
std::vector< MFloat > m_dlbIdleTimeAll
std::vector< std::function< void()> > resizeGridMapVec()
std::vector< MInt > m_solverTimerGroups
MBool balance(const MBool force=false, const MBool finalTimeStep=false, const MBool adaptation=false)
void loadBalancingCalcNoCellsToSend(const MLong *const offsets, MInt *const noCellsToSendByDomain, MInt *const noCellsToReceiveByDomain, MInt *const sortedCellId, MInt *const bufferIdToCellId)
Based on new domain offsets calculate the number of cells to send/receive to/from each domain.
void storeLoadsAndWeights(const MFloat *const loads, const MInt noLoadTypes, const MInt *const loadQuantities, const MFloat domainWeight, const MFloat *const weights)
Store domain loads and additional information to file.
void accumulateCellWeights()
This function handels the setCellWeights-functionality of all solvers and writes the accumulated-cell...
void castAdaptationIntervalToMultipleOfCoarsestTimeStep(MInt maxLevel, MInt maxUniformRefinementLevel)
std::vector< std::function< void(const MInt)> > removeCellVec()
void updateWeightsAndWorkloads(const std::vector< MFloat > &weights, const MBool restoreDefaultWeights)
Determine the cell weights using the given weighting factors for the different solver load quantities...
Controller(Grid *grid_, std::vector< std::unique_ptr< Solver > > *solvers, std::vector< std::unique_ptr< Coupling > > *couplers)
std::array< MFloat, 5 > m_previousLoadInfo
const std::vector< std::function< void(const MInt)> > m_removeChildsSolver
void storeTimings()
Store timings of all timesteps for all domains for performance evaluations (i.e. runtime and idle tim...
void getLoadQuantities(MInt *const loadQuantities)
Return load quantities of all solvers on this domain.
std::vector< std::array< MInt, SolverTimers::_count > > m_solverTimers
std::vector< MFloat > m_dlbTimings
Coupling & coupler(const MInt couplerId)
MBool updateGridPartitionWorkloads()
Update the partition cell workloads in the grid file using user specified or default weights for the ...
std::vector< MInt > m_previousLoadBins
std::vector< std::function< void(const MInt)> > removeChildsVec()
const std::vector< std::function< void(const MInt, const MInt)> > m_swapProxySolver
const Solver & solver(const MInt solverId) const
std::vector< std::function< void(const MInt, const MInt)> > swapProxyVec()
void estimateParameters(MInt m, MInt n, const MFloat *const A, const MFloat *const b, MFloat *const x)
Solve the parameter estimation problem A*x=b.
MInt string2enum(MString theString)
This global function translates strings in their corresponding enum values (integer values)....
Definition: enums.cpp:20
@ DLB_PARTITION_DEFAULT
Definition: enums.h:369
@ DLB_PARTITION_WEIGHT
Definition: enums.h:370
@ MINT
Definition: enums.h:269
@ MFLOAT
Definition: enums.h:269
@ MLONG
Definition: enums.h:269
const MString const MString & message
Definition: functions.h:37
constexpr Real POW2(const Real x)
Definition: functions.h:119
MBool approx(const T &, const U &, const T)
Definition: functions.h:272
MFloat wallTime()
Definition: functions.h:80
MInt globalTimeStep
void printAllocatedMemory(const MLong oldAllocatedBytes, const MString &solverName, const MPI_Comm comm)
Prints currently allocated memory.
MInt globalNoDomains()
Return global number of domains.
MInt globalDomainId()
Return global domain id.
MInt g_timeSteps
MInt g_restartTimeStep
InfoOutFile m_log
std::ostream cerr0
constexpr MLong IPOW2(MInt x)
int32_t MInt
Definition: maiatypes.h:62
uint32_t MUint
Definition: maiatypes.h:63
std::basic_string< char > MString
Definition: maiatypes.h:55
MInt noSolvers
Definition: maiatypes.h:73
double MFloat
Definition: maiatypes.h:52
MInt * solvers
Definition: maiatypes.h:72
int64_t MLong
Definition: maiatypes.h:64
bool MBool
Definition: maiatypes.h:58
char MChar
Definition: maiatypes.h:56
MInt id
Definition: maiatypes.h:71
int MPI_Barrier(MPI_Comm comm, const MString &name)
same as MPI_Barrier
int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, const MString &name, const MString &sndvarname, const MString &rcvvarname)
same as MPI_Allreduce
int MPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, const MString &name, const MString &sndvarname, const MString &rcvvarname)
same as MPI_Alltoall
int MPI_Bcast(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm, const MString &name, const MString &varname)
same as MPI_Bcast
int MPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, const MString &name, const MString &sndvarname, const MString &rcvvarname)
same as MPI_Allgather
DlbTimerController g_dlbTimerController
void partitionParallelSplit(const WeightType *const localWeights, const IdType noLocalWeights, const IdType localWeightOffset, const IdType noDomains, const IdType domainId, const MPI_Comm mpiComm, IdType *const offsets)
Definition: partition.h:325
void exchangeData(const MInt noNghbrDomains, const MInt *const nghbrDomains, const MInt *const noHaloCells, const MInt **const, const MInt *const noWindowCells, const MInt **const windowCells, const MPI_Comm comm, const U *const data, U *const haloBuffer, const MInt noDat=1)
Generic exchange of data.
Definition: mpiexchange.h:295
Namespace for auxiliary functions/classes.
PARALLELIO_DEFAULT_BACKEND ParallelIo
Definition: parallelio.h:292
MFloat dist(const Point< DIM > &p, const Point< DIM > &q)
Definition: pointbox.h:54
void logDurations(std::vector< std::pair< MFloat, MString > > &durations, const MString module, const MPI_Comm comm, const MInt domainId, const MInt noDomains)
Output the min/max/average durations of provided timed code sections over the ranks in a communicator...
Definition: timer.cpp:182