9#ifdef MAIA_GCC_COMPILER
10#pragma GCC diagnostic push
11#pragma GCC diagnostic ignored "-Wduplicated-branches"
12#pragma GCC diagnostic ignored "-Wfloat-equal"
13#pragma GCC diagnostic ignored "-Wuseless-cast"
14#pragma GCC diagnostic ignored "-Wdeprecated-copy-dtor"
15#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
16#pragma GCC diagnostic ignored "-Wunused-result"
18#ifdef MAIA_CLANG_COMPILER
19#pragma clang diagnostic push
20#pragma clang diagnostic ignored "-Wc99-extensions"
21#pragma clang diagnostic ignored "-Wextra-semi-stmt"
22#pragma clang diagnostic ignored "-Wfloat-equal"
24#ifdef MAIA_GCC_COMPILER
25#pragma GCC diagnostic pop
27#ifdef MAIA_CLANG_COMPILER
28#pragma clang diagnostic pop
41 std::vector<MFloat>& weights) {
46 const MInt noLoadTypes = globalNoLoadTypes();
49 "There are no load quantities present in the solver(s), but needed to determine a new "
50 "partition for dynamic load balancing!");
52 weights.resize(noLoadTypes);
53 std::fill(weights.begin(), weights.end(), 1.0);
57 MFloatScratchSpace globalLoadQuantities(noDomains(), noLoadTypes, AT_,
"globalLoadQuantities");
60 getLoadQuantities(&loadQuantities[0]);
63 for(
MInt i = 0; i < noLoadTypes; i++) {
64 loadQuantitiesScaled[i] = domainWeight * loadQuantities[i];
70 "globalLoadQuantities[0]");
74 estimateParameters(noDomains(), noLoadTypes, &globalLoadQuantities[0], &loads[0], &weights[0]);
76 if(!m_testDynamicLoadBalancing) {
79 solver(i).limitWeights(&weights[offset]);
80 offset += solver(i).noLoadTypes();
83 const MFloat norm = std::accumulate(&weights[0], &weights[0] + noLoadTypes, 0.0);
84 for(
MInt i = 0; i < noLoadTypes; i++) {
96 m_log <<
" * computed weights:";
99 const MInt solverCount = solver(i).noLoadTypes();
100 std::vector<MString> names(solverCount);
101 std::vector<MFloat> weightsTmp(solverCount);
103 solver(i).getDefaultWeights(&weightsTmp[0], names);
104 for(
MInt j = 0; j < solverCount; j++) {
105 m_log <<
" s" << i <<
"_" << names[j] <<
":" << count;
110 m_log << std::endl <<
" * computed weights:";
111 for(
MInt i = 0; i < noLoadTypes; i++) {
112 TERMM_IF_COND(std::isnan(weights[i]),
"computed weight is NaN");
113 m_log <<
" " << std::scientific << weights[i];
119 if(m_dlbStaticWeights !=
nullptr && m_dlbStaticWeightMode > -1) {
120 if(m_dlbStaticWeightMode == 0
121 || (m_dlbStaticWeightMode > 0 && m_dlbStep < m_dlbStaticWeightMode && m_dlbStaticWeightMode != 98)
122 || (m_dlbStaticWeightMode == 98 && m_dlbStep % 2 == 0)) {
123 m_log <<
"Using static weights at dlb-step " << m_dlbStep <<
" :" << std::endl;
124 for(
MInt i = 0; i < noLoadTypes; i++) {
125 weights[i] = m_dlbStaticWeights[i];
126 m_log <<
" " << std::scientific << weights[i];
130 if(m_dlbStaticWeightMode > 0 && m_dlbStaticWeightMode < 90 && m_dlbStep == m_dlbStaticWeightMode) {
131 m_dlbStaticWeightMode = -1;
132 m_log <<
"Switching to computed weights from now on!" << std::endl;
138 if(m_dlbLastWeights ==
nullptr) {
139 mAlloc(m_dlbLastWeights, noLoadTypes,
"dlbLastWeights", AT_);
141 for(
MInt i = 0; i < noLoadTypes; i++) {
142 m_dlbLastWeights[i] = weights[i];
164 if(m_testDynamicLoadBalancing) {
166 std::cerr <<
"estimateParameters: n = " << n <<
", setting weights to 1.0" << std::endl;
167 std::fill_n(x, n, 1.0);
168 if(n > 1 && m_dlbStep % 2 == 0) {
170 std::vector<MFloat> weights;
171 getSpecifiedSolverWeights(weights);
172 std::copy(weights.begin(), weights.end(), &x[0]);
173 std::cerr <<
"estimateParameters: using specified solver weights" << std::endl;
175 std::cerr <<
"estimateParameters: n = " << n <<
", setting w[n-1] = 5.0" << std::endl;
191 const MInt maxNoIt = 30;
195 for(
MInt j = 0; j < n; j++) {
197 for(
MInt i = 0; i < m; i++) {
198 sum_j[j] += A[i * n + j];
201 m_log <<
" * estimateParameters regularize " << j <<
" proportional to " << sum_j[j] << std::endl;
209 while(it < maxNoIt) {
216 std::copy_n(&A[0], m * n, &A_work[0]);
217 std::fill_n(&A_work[m * n], n * n, 0.0);
220 for(
MInt i = 0; i < n; i++) {
222 const MFloat reg = (sum_j[i] > 0) ? alpha * sum_j[i] / noDomains() : 1.0;
223 A_work[m * n + i * (n + 1)] = reg;
227 std::copy_n(&
b[0], m, &b_work[0]);
228 std::fill_n(&b_work[m], n, 0.0);
240 const MFloat cond_2 = singularValues[0] / singularValues[std::min(m, n) - 1];
248 for(
MInt i = 0; i < m; i++) {
250 for(
MInt j = 0; j < n; j++) {
251 mxv[i] += A[i * n + j] * x[j];
253 residual +=
POW2(mxv[i] -
b[i]);
254 maxDiff = std::max(maxDiff, mxv[i] -
b[i]);
258 m_log <<
" * estimateParameters iteration " << it <<
":";
259 for(
MInt i = 0; i < n; i++) {
260 m_log << std::scientific <<
" " << x[i];
262 m_log <<
"; residual " << residual <<
"; maxDiff " << maxDiff << std::endl;
271 if(it == 0 || residual <= oldResidual) {
275 oldResidual = residual;
276 std::copy_n(&x[0], n, &oldParam[0]);
280 m_log <<
" * estimateParameters max iterations reached " << it <<
", alpha " << alpha
281 <<
", condition number " << cond_2 << std::endl;
283 m_log <<
" * estimateParameters increase alpha to " << alpha <<
", condition number " << cond_2
284 <<
", residual " << residual << std::endl;
289 std::copy_n(&oldParam[0], n, &x[0]);
292 m_log <<
" * estimateParameters found solution! iteration " << it <<
", condition number " << cond_2
293 <<
", residual " << residual <<
", previous " << oldResidual <<
", maxDiff " << maxDiff;
294 for(
MInt i = 0; i < n; i++) {
295 m_log << std::scientific <<
" " << x[i];
305 for(
MInt i = 0; i < n; i++) {
306 if(x[i] < 0.0 || std::isnan(x[i])) {
312 const MFloat norm = std::accumulate(&x[0], &x[0] + n, 0.0);
313 for(
MInt i = 0; i < n; i++) {
346 MLong*
const partitionCellOffsets,
347 MLong*
const globalIdOffsets) {
350 using namespace maia;
352 m_log <<
" * determine new partition" << std::endl;
354 MLongScratchSpace localPartitionCellCounts(noDomains(), AT_,
"localPartitionCellCounts");
355 MLongScratchSpace localPartitionCellOffsets(noDomains() + 1, AT_,
"localPartitionCellOffsets");
357 gridb().determineNoPartitionCellsAndOffsets(&localPartitionCellCounts[0], &localPartitionCellOffsets[0]);
359 const MInt noLocalPartitionCells = localPartitionCellCounts[domainId()];
362 MInt partitionMethod = m_dlbPartitionMethod;
365 switch(m_dlbPartitionMethod) {
369 const MInt noWeightPartitionSteps = 3;
370 if(m_dlbStep < noWeightPartitionSteps) {
383 TERMM(1,
"Unknown DLB partition method.");
389 m_domainWeights.assign(noDomains(), 1.0);
390 m_lastOffsetShiftDirection.assign(noDomains() + 1, 0);
397 switch(partitionMethod) {
399 m_log <<
"Partition method #0 (compute weights, determine min cells workload and use the "
400 "partition() algorithm)"
404 std::vector<MFloat> weights;
405 computeWeights(loads, 1.0, weights);
407 updateWeightsAndWorkloads(weights,
false);
411 partition(&partitionCellOffsets[0], &globalIdOffsets[0],
true);
416 for(
MInt i = 0; i < noDomains() + 1; i++) {
417 partitionCellOffsets[i] = localPartitionCellOffsets[i];
419 if(domainId() == 0) {
420 std::cerr <<
"Using original partition!" << std::endl;
425 m_log <<
"Partition method #3 (compute weights and domain weights iteratively, determine "
426 "summed load error at each domain offset and vary min cell offset such that "
427 "predicted error is zero)"
430 std::vector<MFloat> weights;
431 std::vector<MFloat> oldWeights;
432 std::vector<MFloat> oldDomainWeights;
433 oldDomainWeights = m_domainWeights;
434 std::fill(oldDomainWeights.begin(), oldDomainWeights.end(),
438 MFloat averageWorkLoad = -1.0;
440 MFloatScratchSpace localPartitionCellsWorkload(noLocalPartitionCells, AT_,
"localPartitionCellsWorkload");
441 MFloatScratchSpace partitionCellsWorkload(gridb().m_noPartitionCellsGlobal, AT_,
"partitionCellsWorkload");
443 const MInt maxNoIt = 20;
445 for(
MInt it = 0; it < maxNoIt; it++) {
447 computeWeights(loads, m_domainWeights[domainId()], weights);
450 updateWeightsAndWorkloads(weights,
false);
453 for(
MInt i = 0; i < noLocalPartitionCells; i++) {
454 const MLong globalPartitionCellId = gridb().m_localPartitionCellGlobalIds[i];
455 const MInt partitionCellId = gridb().globalIdToLocalId(globalPartitionCellId,
true);
456 TERMM_IF_NOT_COND(gridb().a_hasProperty(partitionCellId, Cell::IsPartitionCell),
457 "Error: cell is not a partition cell.");
459 const MFloat workload = gridb().a_workload(partitionCellId);
460 TERMM_IF_NOT_COND(workload > 0.0,
"Error: partition cell workload needs to be > 0.0");
461 localPartitionCellsWorkload[i] = workload;
465 MIntScratchSpace localPartitionCellCounts_(noDomains(), AT_,
"localPartitionCellCounts_");
466 MIntScratchSpace localPartitionCellOffsets_(noDomains() + 1, AT_,
"localPartitionCellOffsets_");
467 for(
MInt i = 0; i < noDomains(); i++) {
468 localPartitionCellCounts_[i] = (
MInt)localPartitionCellCounts[i];
469 localPartitionCellOffsets_[i] = (
MInt)localPartitionCellOffsets[i];
471 localPartitionCellOffsets_[noDomains()] = localPartitionCellOffsets[noDomains()];
475 &partitionCellsWorkload[0], &localPartitionCellCounts_[0], &localPartitionCellOffsets_[0],
477 "partitionCellsWorkload[0]");
479 if(domainId() == 0) {
481 const MFloat maxPartitionWorkload =
482 *std::max_element(partitionCellsWorkload.
begin(), partitionCellsWorkload.
end());
483 const MFloat minPartitionWorkload =
484 *std::min_element(partitionCellsWorkload.
begin(), partitionCellsWorkload.
end());
485 const MFloat avgPartitionWorkload =
486 std::accumulate(partitionCellsWorkload.
begin(), partitionCellsWorkload.
end(), 0.0)
487 /
static_cast<MFloat>(gridb().m_noPartitionCellsGlobal);
488 m_log <<
" * Maximum/minimum/average partition cell workload: " << maxPartitionWorkload <<
", "
489 << minPartitionWorkload <<
", " << avgPartitionWorkload << std::endl;
490 if(minPartitionWorkload <= 0) {
491 m_log <<
globalTimeStep <<
" ERROR: minimum partition cell workload is " << minPartitionWorkload
493 cerr0 <<
globalTimeStep <<
" ERROR: minimum partition cell workload is " << minPartitionWorkload
499 for(
MInt i = 0; i < noDomains(); i++) {
500 const MInt offset = localPartitionCellOffsets[i];
501 const MInt count = localPartitionCellCounts[i];
503 std::accumulate(&partitionCellsWorkload[offset], &partitionCellsWorkload[offset] + count, 0.0);
506 averageWorkLoad = std::accumulate(domainWorkLoad.
begin(), domainWorkLoad.
end(), 0.0) / noDomains();
508 m_log <<
" * Average domain workload: " << averageWorkLoad << std::endl;
511 for(
MInt i = 0; i < noDomains(); i++) {
512 m_domainWeights[i] = (m_useDomainFactor) ? loads[i] * averageWorkLoad / domainWorkLoad[i] : 1.0;
515 const MFloat domainWeightMinLoadThreshold = 0.2;
517 MFloat averageDomainWeight = 0.0;
518 MInt countDomainWeights = 0;
519 for(
MInt i = 0; i < noDomains(); i++) {
520 if(loads[i] >= domainWeightMinLoadThreshold) {
521 averageDomainWeight += m_domainWeights[i];
522 countDomainWeights++;
525 averageDomainWeight /= (
MFloat)countDomainWeights;
527 for(
MInt i = 0; i < noDomains(); i++) {
529 if(loads[i] < domainWeightMinLoadThreshold) {
530 m_domainWeights[i] = 1.0;
533 m_domainWeights[i] /= averageDomainWeight;
536 m_domainWeights[i] = 0.5 * (m_domainWeights[i] + oldDomainWeights[i]);
540 const MFloat maxDomainWeight = *std::max_element(m_domainWeights.begin(), m_domainWeights.end());
541 const MFloat minDomainWeight = *std::min_element(m_domainWeights.begin(), m_domainWeights.end());
542 m_log <<
" * Domain weights: max = " << maxDomainWeight <<
", min = " << minDomainWeight << std::endl;
553 "m_domainWeights[0]");
557 const MInt noWeights = weights.size();
558 MFloat weightResidual = 0.0;
559 for(
MInt i = 0; i < noWeights; i++) {
560 weightResidual +=
ABS(weights[i] - oldWeights[i]);
562 weightResidual /= noWeights;
564 MFloat domainWeightResidual = 0.0;
565 for(
MInt i = 0; i < noDomains(); i++) {
566 domainWeightResidual +=
ABS(m_domainWeights[i] - oldDomainWeights[i]);
568 domainWeightResidual /= noDomains();
570 const MFloat weightResThreshold = 1e-3;
571 const MFloat domainWeightResThreshold = 1e-2;
572 if(weightResidual < weightResThreshold && domainWeightResidual < domainWeightResThreshold) {
573 m_log <<
" * weights and domain weights converged, residuals: " << weightResidual <<
", "
574 << domainWeightResidual << std::endl;
576 }
else if(it == maxNoIt - 1) {
577 m_log <<
" * WARNING: Weights and domain weights not converged, but "
578 "maximum number of iterations reached. "
579 << weightResidual <<
" " << domainWeightResidual << std::endl;
581 m_log <<
" * Weights and domain weights iteration " << it <<
" " << weightResidual <<
" "
582 << domainWeightResidual << std::endl;
585 oldWeights = weights;
586 oldDomainWeights = m_domainWeights;
590 const MInt noLoadTypes = globalNoLoadTypes();
592 getLoadQuantities(&loadQuantities[0]);
593 storeLoadsAndWeights(&loads[0], noLoadTypes, &loadQuantities[0], m_domainWeights[domainId()], &weights[0]);
598 if(domainId() == 0) {
605 for(
MInt i = 0; i < noDomains(); i++) {
606 errors[i] = loads[i] - 1.0;
609 summedErrors[0] = 0.0;
610 summedErrAbs[0] = 0.0;
612 for(
MInt i = 1; i < noDomains() + 1; i++) {
613 summedErrors[i] = summedErrors[i - 1] + errors[i - 1];
614 summedErrAbs[i] =
ABS(summedErrors[i]);
615 summedErrDir[i] = -
signum(summedErrors[i]);
618 const MFloat maxSummedErr = *std::max_element(summedErrors.
begin(), summedErrors.
end());
619 const MFloat minSummedErr = *std::min_element(summedErrors.
begin(), summedErrors.
end());
620 m_log <<
globalTimeStep <<
" * Summed load deviations: max = " << maxSummedErr <<
", min = " << minSummedErr
624 const MFloat errorThreshold = m_dlbImbalanceThreshold;
628 const MFloat restrictionFactor = 1.0 - std::min(0.5, 0.005 * noDomains());
630 MFloat loadVariance = 0.0;
631 for(
MInt i = 0; i < noDomains(); i++) {
632 loadVariance +=
POW2(loads[i] - 1.0);
634 loadVariance /= noDomains();
635 const MFloat loadStd = std::sqrt(loadVariance);
636 m_log <<
globalTimeStep <<
" * load distribution: variance = " << loadVariance <<
", stdev = " << loadStd
639 const MFloat imbalancePenaltyThreshold = 2.0 * errorThreshold;
640 const MFloat imbPenaltyFactor =
641 (imbalance < imbalancePenaltyThreshold)
642 ? (1.0 + (imbalancePenaltyThreshold - imbalance) / imbalancePenaltyThreshold)
644 m_log <<
globalTimeStep <<
" * Imbalance penalty factor " << imbPenaltyFactor <<
" (imbalance " << imbalance
645 <<
"); restriction factor " << restrictionFactor << std::endl;
648 const MBool finalLocalShift =
649 m_dlbNoFinalLocalShifts > 0
650 &&
globalTimeStep >= m_loadBalancingStopTimeStep - m_loadBalancingInterval * (m_dlbNoFinalLocalShifts + 1);
652 const MBool intermediateLocalShift =
653 !finalLocalShift && (m_dlbNoLocalShifts > 0 && (m_dlbStep % (1 + m_dlbNoLocalShifts) != 1));
655 const MBool globalShiftStep = !finalLocalShift && !intermediateLocalShift;
657 m_log <<
globalTimeStep <<
" * Shift step: global " << globalShiftStep <<
"; intermediate local "
658 << intermediateLocalShift <<
"; final local " << finalLocalShift << std::endl;
662 globalShiftFlag.
fill(0);
665 minGlobalDist.
fill(noDomains());
669 globalShiftFlagTmp.
fill(0);
671 MInt noGlobalShifts = 0;
674 for(
MInt i = 1; i < noDomains(); i++) {
676 const MInt firstOffset = std::max(0, i -
dist);
677 const MInt lastOffset = std::min(noDomains(), i +
dist);
679 const MFloat summedErrorDiff =
ABS(summedErrors[lastOffset] - summedErrors[firstOffset]);
680 const MBool sameSign = (summedErrDir[lastOffset] == summedErrDir[firstOffset]);
681 const MBool absErrCondition = (summedErrAbs[i] > errorThreshold);
683 const MFloat smoothShiftThreshold = (lastOffset - firstOffset) * errorThreshold;
684 const MBool smoothShiftCondition =
685 (absErrCondition && (sameSign && summedErrorDiff > smoothShiftThreshold));
687 MBool globalShiftCondition =
false;
688 if(globalShiftStep) {
689 globalShiftCondition = (m_dlbSmoothGlobalShifts) ? smoothShiftCondition : absErrCondition;
692 globalShiftFlagTmp[i] = (globalShiftCondition) ? summedErrDir[i] : 0;
693 if(globalShiftFlagTmp[i] != 0) {
697 <<
" tr=" << smoothShiftThreshold <<
" s_first=" << summedErrors[firstOffset]
698 <<
" s_last=" << summedErrors[lastOffset] <<
" s_abs=" << summedErrAbs[i] << std::endl;
709 for(
MInt i = 1; i < noDomains(); i++) {
711 if(globalShiftFlagTmp[i] != 0) {
712 minGlobalDist[i] = 0;
713 globalShiftFlag[i] = globalShiftFlagTmp[i];
716 const MInt dist = std::ceil(summedErrAbs[i] / errorThreshold);
719 << summedErrAbs[i] << std::endl;
723 for(
MInt nId = i - 1; nId >= std::max(i -
dist, 1); nId--) {
724 const MInt distance =
ABS(nId - i);
729 if(summedErrDir[nId] != summedErrDir[i] || errorThreshold * distance > summedErrAbs[nId]) {
733 minGlobalDist[nId] = std::min(minGlobalDist[nId], distance);
734 globalShiftFlag[nId] = summedErrDir[nId];
737 for(
MInt nId = i + 1; nId <= std::min(i +
dist, noDomains() - 1); nId++) {
738 const MInt distance =
ABS(nId - i);
740 if(summedErrDir[nId] != summedErrDir[i] || errorThreshold * distance > summedErrAbs[nId]) {
744 minGlobalDist[nId] = std::min(minGlobalDist[nId], distance);
745 globalShiftFlag[nId] = summedErrDir[nId];
750 MInt noGlobalShiftsTotal = 0;
751 for(
MInt i = 1; i < noDomains(); i++) {
752 if(globalShiftFlag[i] != 0) {
753 noGlobalShiftsTotal++;
756 m_log <<
globalTimeStep <<
" * number of total global shifts " << noGlobalShiftsTotal << std::endl;
761 MInt noShiftsWithoutChange = 0;
768 shiftedWorkload.
fill(0.0);
770 partitionCellOffsets[0] = 0;
772 for(
MInt offsetId = 1; offsetId < noDomains(); offsetId++) {
773 MFloat summedWorkload = 0.0;
775 for(
MInt i = partitionCellOffsets[offsetId - 1]; i < localPartitionCellOffsets[offsetId]; i++) {
776 summedWorkload += (m_useDomainFactor) ? m_domainWeights[offsetId - 1] * partitionCellsWorkload[i]
777 : partitionCellsWorkload[i];
781 const MBool globalShiftCondition = (globalShiftFlag[offsetId] != 0);
782 MBool localShiftCondition =
false;
783 MInt localShiftDir = 0;
784 MFloat localShiftDiff = 0.0;
788 if(!globalShiftCondition && globalShiftFlag[offsetId - 1] == 0 && globalShiftFlag[offsetId + 1] == 0) {
789 const MFloat errLeft = errors[offsetId - 1];
790 const MFloat errRight = errors[offsetId];
791 localShiftDiff = 0.5 * (errLeft - errRight);
793 if(errLeft > errorThreshold && errRight < errLeft) {
796 }
else if(errRight > errorThreshold && errRight > errLeft) {
799 }
else if(errLeft < -errorThreshold && errLeft < errRight) {
802 }
else if(errRight < -errorThreshold && errRight < errLeft) {
806 localShiftCondition = (localShiftDir != 0);
809 const MBool shift = globalShiftCondition || localShiftCondition;
813 <<
" g=" << globalShiftCondition <<
" l=" << localShiftCondition <<
" err=" << summedErrors[offsetId]
814 <<
" minDist=" << minGlobalDist[offsetId] <<
" lDir=" << localShiftDir <<
" diff=" << localShiftDiff
815 <<
" eL=" << errors[offsetId - 1] <<
" eR=" << errors[offsetId] << std::endl;
820 const MInt dir = (globalShiftCondition) ? summedErrDir[offsetId] : localShiftDir;
821 MFloat penaltyFactor = 1.25;
823 const MInt lastDir = (globalShiftCondition) ? m_lastOffsetShiftDirection[offsetId] : 0;
825 if(lastDir != dir && lastDir != 0) {
828 m_log <<
"Opposite shift " << offsetId <<
", lastDir " << lastDir <<
", dir " << dir << std::endl;
831 penaltyFactor *= imbPenaltyFactor;
835 const MInt domainOld = (dir < 0) ? offsetId - 1 : offsetId;
836 const MInt domainNew = (dir < 0) ? offsetId : offsetId - 1;
842 const MFloat summedErrorTmp =
843 restrictionFactor * (summedErrors[offsetId] + dir * errorThreshold * minGlobalDist[offsetId]);
845 const MFloat summedError = summedErrorTmp;
846 MFloat oldEstimate = (globalShiftCondition) ? summedError : localShiftDiff;
847 const MFloat initialEstimate = oldEstimate;
852 MInt partitionCellDomain = domainOld;
855 m_log <<
"DLB_DEBUG: Start offset shift " << offsetId <<
", domainOld " << domainOld <<
", domainNew "
856 << domainNew <<
", oldEstimate " << oldEstimate <<
", summedError " << summedErrors[offsetId]
857 <<
", penaltyFactor " << penaltyFactor << std::endl;
862 for(
MInt i = 1;; i++) {
865 const MInt newPartitionCellId = localPartitionCellOffsets[offsetId] + dir * i;
868 if(dir > 0 && newPartitionCellId == localPartitionCellOffsets[partitionCellDomain + 1]) {
870 partitionCellDomain++;
871 }
else if(dir < 0 && newPartitionCellId == localPartitionCellOffsets[partitionCellDomain] - 1) {
873 partitionCellDomain--;
876 const MBool domainFactorShift =
true;
878 const MFloat domainFactor = (m_useDomainFactor && domainFactorShift)
879 ? (m_domainWeights[domainNew] / m_domainWeights[partitionCellDomain])
883 const MFloat estimate = oldEstimate
884 + dir * penaltyFactor * penalty * domainFactor * loads[partitionCellDomain]
885 * partitionCellsWorkload[newPartitionCellId]
886 / domainWorkLoad[partitionCellDomain];
887 const MFloat diff = estimate - oldEstimate;
891 if(newPartitionCellId <= partitionCellOffsets[offsetId - 1]) {
894 const MFloat workloadIncrement =
895 (m_useDomainFactor) ? m_domainWeights[offsetId - 1] * partitionCellsWorkload[newPartitionCellId]
896 : partitionCellsWorkload[newPartitionCellId];
897 const MFloat summedWorkloadNew = summedWorkload + dir * workloadIncrement;
898 const MFloat summedWorkloadRel = summedWorkload / averageWorkLoad;
903 m_log <<
"DLB_DEBUG: offset " << offsetId <<
", partCellId " << newPartitionCellId
904 <<
", partCellDomain " << partitionCellDomain <<
", domainFactor " << domainFactor
905 <<
", oldEstimate " << oldEstimate <<
", estimate " << estimate <<
", diff " << diff
906 <<
", penalty " << penalty <<
", partCellWorkload " << partitionCellsWorkload[newPartitionCellId]
907 <<
", domainWorkload " << domainWorkLoad[partitionCellDomain] <<
", load "
908 << loads[partitionCellDomain] << std::endl;
915 const MBool prevOffsetReached = (dir < 0 && newPartitionCellId == partitionCellOffsets[offsetId - 1]);
916 const MBool lastCellReached = (newPartitionCellId == gridb().m_noPartitionCellsGlobal - 1);
919 const MBool workloadCheck = (m_dlbMaxWorkloadLimit > 1.0 && globalShiftCondition)
920 ? (summedWorkloadRel < m_dlbMaxWorkloadLimit)
923 if(prevOffsetReached || lastCellReached || (
ABS(estimate) >=
ABS(oldEstimate) && workloadCheck)
924 || (dir == 1 && !workloadCheck)) {
926 partitionCellOffsets[offsetId] = localPartitionCellOffsets[offsetId] + dir * (i - 1);
930 << localPartitionCellOffsets[offsetId] <<
" " << partitionCellOffsets[offsetId] <<
" "
931 << summedErrors[offsetId] <<
" " << estimate <<
" " << oldEstimate <<
" " << penalty <<
" "
932 << summedWorkloadRel << std::endl;
934 shifts[offsetId] = dir * (i - 1);
937 m_lastOffsetShiftDirection[offsetId] = (i > 1 && globalShiftCondition) ? dir : 0;
941 if(partitionCellOffsets[offsetId] <= partitionCellOffsets[offsetId - 1]) {
942 partitionCellOffsets[offsetId] = partitionCellOffsets[offsetId - 1] + 1;
943 shifts[offsetId] = partitionCellOffsets[offsetId] - localPartitionCellOffsets[offsetId];
944 m_log <<
"WARNING: setting domain offset #" << offsetId
945 <<
" with just a single partition cell on this domain. " << partitionCellOffsets[offsetId - 1]
946 <<
" " << partitionCellOffsets[offsetId] << std::endl;
947 }
else if(partitionCellOffsets[offsetId] == partitionCellOffsets[offsetId - 1] + 1) {
948 m_log <<
"WARNING: domain #" << offsetId - 1 <<
" has just a single partition cell. "
949 << partitionCellOffsets[offsetId - 1] <<
" " << partitionCellOffsets[offsetId] << std::endl;
952 if(shifts[offsetId] != 0) {
955 noShiftsWithoutChange++;
962 oldEstimate = estimate;
964 summedWorkload = summedWorkloadNew;
967 maxDiff = std::max(maxDiff,
ABS(diff));
968 maxWeight = std::max(maxWeight, partitionCellsWorkload[newPartitionCellId]);
970 shiftedWorkload[offsetId] += dir * partitionCellsWorkload[newPartitionCellId];
975 const MFloat s = initialEstimate;
977 penalty = 1.0 / cos(M_PI / 2.0 * x);
982 partitionCellOffsets[offsetId] = localPartitionCellOffsets[offsetId];
983 m_lastOffsetShiftDirection[offsetId] = 0;
984 shifts[offsetId] = 0;
987 m_log <<
globalTimeStep <<
" * keep offset " << offsetId <<
" " << summedErrors[offsetId] << std::endl;
993 for(
MInt offsetId = 1; offsetId < noDomains(); offsetId++) {
994 if(partitionCellOffsets[offsetId] <= partitionCellOffsets[offsetId - 1]) {
995 partitionCellOffsets[offsetId] = partitionCellOffsets[offsetId - 1] + 1;
996 shifts[offsetId] = partitionCellOffsets[offsetId] - localPartitionCellOffsets[offsetId];
997 m_log <<
"WARNING: correcting domain offset #" << offsetId
998 <<
" with now just a single partition cell on this domain. " << partitionCellOffsets[offsetId - 1]
999 <<
" " << partitionCellOffsets[offsetId] << std::endl;
1003 m_log <<
" * number of shifted offsets: " << noShifts
1004 <<
"; offsets to shift without change: " << noShiftsWithoutChange << std::endl;
1005 m_log <<
" * max weight during shifting: " << maxWeight <<
"; max difference: " << maxDiff << std::endl;
1008 if(m_debugBalance) {
1009 FILE* shiftsFile =
nullptr;
1011 shiftsFile = fopen(fileName.c_str(),
"w");
1013 fprintf(shiftsFile,
"# 1:offsetId 2:partitionCellShift 3:shiftedWorkload "
1014 "4:summedImbalance 5:load 6:domainWorkload 7:domainWeight\n");
1015 for(
MInt i = 0; i < noDomains(); i++) {
1016 fprintf(shiftsFile,
"%d %d %.8e %.8e %.8e %.8e %.8e\n", i, shifts[i], shiftedWorkload[i], summedErrors[i],
1017 loads[i], domainWorkLoad[i], m_domainWeights[i]);
1025 "partitionCellOffsets[0]");
1026 partitionCellOffsets[noDomains()] = gridb().m_noPartitionCellsGlobal;
1031 TERMM(1,
"Partition method not known.");
1037 const MBool newPartition =
1038 !std::equal(&localPartitionCellOffsets[0], &localPartitionCellOffsets[0] + noDomains(), &partitionCellOffsets[0]);
1040 m_log <<
" * checking new partition" << std::endl;
1041 MBool validPartition =
1042 (partitionCellOffsets[0] == 0) && (partitionCellOffsets[noDomains() - 1] < gridb().m_noPartitionCellsGlobal);
1043 if(!validPartition) {
1044 m_log <<
" * invalid first/last domain offset:" << partitionCellOffsets[0] <<
" "
1045 << partitionCellOffsets[noDomains() - 1] <<
"; noPartitionCellsGlobal = " << gridb().m_noPartitionCellsGlobal
1049 for(
MInt i = 1; i < noDomains(); i++) {
1050 if(partitionCellOffsets[i] <= partitionCellOffsets[i - 1]) {
1051 validPartition =
false;
1052 m_log <<
" * invalid offsets " << i - 1 <<
" " << partitionCellOffsets[i - 1] <<
" and " << i <<
" "
1053 << partitionCellOffsets[i] << std::endl;
1059 if(!validPartition) {
1060 m_log <<
" * new partition is not valid!" << std::endl;
1065 if(!newPartition && !m_forceLoadBalancing) {
1066 m_log <<
" * partitioning did not change, return." << std::endl;
1069 m_log <<
" * new partition (" << newPartition <<
"), forced balance (" << m_forceLoadBalancing <<
")" << std::endl;
1073 loadBalancingCalcNewGlobalOffsets(&localPartitionCellOffsets[0], &partitionCellOffsets[0], &globalIdOffsets[0]);
void mAlloc(T *&a, const MLong N, const MString &objectName, MString function)
allocates memory for one-dimensional array 'a' of size N
static MBool propertyExists(const MString &name, MInt solver=m_noSolvers)
This function checks if a property exists in general.
This class is a ScratchSpace.
void fill(T val)
fill the scratch with a given value
Grid controller manages adaptation and load balancing in multi-solver environment.
void computeWeights(const MFloat *loads, const MFloat domainWeight, std::vector< MFloat > &weights)
Compute computational weights for different components in the simulation based on the current distrib...
MBool loadBalancingPartition(const MFloat *loads, const MFloat imbalance, MLong *const partitionCellOffsets, MLong *const globalIdOffsets)
Determine new partitioning for dynamic load balancing.
void estimateParameters(MInt m, MInt n, const MFloat *const A, const MFloat *const b, MFloat *const x)
Solve the parameter estimation problem A*x=b.
@ DLB_PARTITION_SHIFT_OFFSETS
std::enable_if< std::is_unsigned< T >::value, int >::type constexpr signum(T x)
constexpr Real POW2(const Real x)
std::basic_string< char > MString
int MPI_Gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, int root, MPI_Comm comm, const MString &name, const MString &sndvarname, const MString &rcvvarname)
same as MPI_Gatherv
int MPI_Bcast(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm, const MString &name, const MString &varname)
same as MPI_Bcast
int MPI_Gather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, const MString &name, const MString &sndvarname, const MString &rcvvarname)
same as MPI_Gather
std::vector< MFloat > svd(MFloat *const A, MFloat *const b, const MInt m, const MInt n, MFloat *const x)
Namespace for auxiliary functions/classes.
MFloat dist(const Point< DIM > &p, const Point< DIM > &q)