Based on the domain loads and current domain decomposition determine a new partitioning that is expected to reduce imbalances among domains.
347 {
348 TRACE();
349
350 using namespace maia;
351
352 m_log <<
" * determine new partition" << std::endl;
353
356
358
359 const MInt noLocalPartitionCells = localPartitionCellCounts[
domainId()];
360
361
363
364
367
368
369 const MInt noWeightPartitionSteps = 3;
372 } else {
374 }
375 break;
376 }
377
381 break;
382 default:
383 TERMM(1, "Unknown DLB partition method.");
384 break;
385 }
386
387
391 }
392
393
395
396
397 switch(partitionMethod) {
399 m_log <<
"Partition method #0 (compute weights, determine min cells workload and use the "
400 "partition() algorithm)"
401 << std::endl;
402
403
404 std::vector<MFloat> weights;
406
408
409
410
411 partition(&partitionCellOffsets[0], &globalIdOffsets[0],
true);
412 break;
413 }
415
417 partitionCellOffsets[i] = localPartitionCellOffsets[i];
418 }
420 std::cerr << "Using original partition!" << std::endl;
421 }
422 break;
423 }
425 m_log <<
"Partition method #3 (compute weights and domain weights iteratively, determine "
426 "summed load error at each domain offset and vary min cell offset such that "
427 "predicted error is zero)"
428 << std::endl;
429
430 std::vector<MFloat> weights;
431 std::vector<MFloat> oldWeights;
432 std::vector<MFloat> oldDomainWeights;
434 std::fill(oldDomainWeights.begin(), oldDomainWeights.end(),
435 1.0);
436
438 MFloat averageWorkLoad = -1.0;
439
440 MFloatScratchSpace localPartitionCellsWorkload(noLocalPartitionCells, AT_,
"localPartitionCellsWorkload");
442
443 const MInt maxNoIt = 20;
444
445 for(
MInt it = 0; it < maxNoIt; it++) {
446
448
449
451
452
453 for(
MInt i = 0; i < noLocalPartitionCells; i++) {
456 TERMM_IF_NOT_COND(
gridb().a_hasProperty(partitionCellId, Cell::IsPartitionCell),
457 "Error: cell is not a partition cell.");
458
460 TERMM_IF_NOT_COND(workload > 0.0, "Error: partition cell workload needs to be > 0.0");
461 localPartitionCellsWorkload[i] = workload;
462 }
463
464
468 localPartitionCellCounts_[i] = (
MInt)localPartitionCellCounts[i];
469 localPartitionCellOffsets_[i] = (
MInt)localPartitionCellOffsets[i];
470 }
472
473
475 &partitionCellsWorkload[0], &localPartitionCellCounts_[0], &localPartitionCellOffsets_[0],
477 "partitionCellsWorkload[0]");
480
481 const MFloat maxPartitionWorkload =
482 *std::max_element(partitionCellsWorkload.begin(), partitionCellsWorkload.end());
483 const MFloat minPartitionWorkload =
484 *std::min_element(partitionCellsWorkload.begin(), partitionCellsWorkload.end());
485 const MFloat avgPartitionWorkload =
486 std::accumulate(partitionCellsWorkload.begin(), partitionCellsWorkload.end(), 0.0)
488 m_log <<
" * Maximum/minimum/average partition cell workload: " << maxPartitionWorkload <<
", "
489 << minPartitionWorkload << ", " << avgPartitionWorkload << std::endl;
490 if(minPartitionWorkload <= 0) {
491 m_log <<
globalTimeStep <<
" ERROR: minimum partition cell workload is " << minPartitionWorkload
492 << std::endl;
493 cerr0 <<
globalTimeStep <<
" ERROR: minimum partition cell workload is " << minPartitionWorkload
494 << std::endl;
495 error = 1;
496 }
497
498
500 const MInt offset = localPartitionCellOffsets[i];
501 const MInt count = localPartitionCellCounts[i];
502 domainWorkLoad[i] =
503 std::accumulate(&partitionCellsWorkload[offset], &partitionCellsWorkload[offset] + count, 0.0);
504 }
505
506 averageWorkLoad = std::accumulate(domainWorkLoad.begin(), domainWorkLoad.end(), 0.0) /
noDomains();
507
508 m_log <<
" * Average domain workload: " << averageWorkLoad << std::endl;
509
510
513 }
514
515 const MFloat domainWeightMinLoadThreshold = 0.2;
516
517 MFloat averageDomainWeight = 0.0;
518 MInt countDomainWeights = 0;
520 if(loads[i] >= domainWeightMinLoadThreshold) {
522 countDomainWeights++;
523 }
524 }
525 averageDomainWeight /= (
MFloat)countDomainWeights;
526
528
529 if(loads[i] < domainWeightMinLoadThreshold) {
531 } else {
532
534
535
537 }
538 }
539
542 m_log <<
" * Domain weights: max = " << maxDomainWeight <<
", min = " << minDomainWeight << std::endl;
543 }
544
545
547 if(error > 0) {
548 return false;
549 }
550
551
553 "m_domainWeights[0]");
554
555
556 if(it > 0) {
557 const MInt noWeights = weights.size();
558 MFloat weightResidual = 0.0;
559 for(
MInt i = 0; i < noWeights; i++) {
560 weightResidual +=
ABS(weights[i] - oldWeights[i]);
561 }
562 weightResidual /= noWeights;
563
564 MFloat domainWeightResidual = 0.0;
567 }
569
570 const MFloat weightResThreshold = 1e-3;
571 const MFloat domainWeightResThreshold = 1e-2;
572 if(weightResidual < weightResThreshold && domainWeightResidual < domainWeightResThreshold) {
573 m_log <<
" * weights and domain weights converged, residuals: " << weightResidual <<
", "
574 << domainWeightResidual << std::endl;
575 break;
576 } else if(it == maxNoIt - 1) {
577 m_log <<
" * WARNING: Weights and domain weights not converged, but "
578 "maximum number of iterations reached. "
579 << weightResidual << " " << domainWeightResidual << std::endl;
580 } else {
581 m_log <<
" * Weights and domain weights iteration " << it <<
" " << weightResidual <<
" "
582 << domainWeightResidual << std::endl;
583 }
584 }
585 oldWeights = weights;
587 }
588
594 }
595
596
597
603
604
606 errors[i] = loads[i] - 1.0;
607 }
608
609 summedErrors[0] = 0.0;
610 summedErrAbs[0] = 0.0;
611 summedErrDir[0] = 0;
613 summedErrors[i] = summedErrors[i - 1] + errors[i - 1];
614 summedErrAbs[i] =
ABS(summedErrors[i]);
615 summedErrDir[i] = -
signum(summedErrors[i]);
616 }
617
618 const MFloat maxSummedErr = *std::max_element(summedErrors.begin(), summedErrors.end());
619 const MFloat minSummedErr = *std::min_element(summedErrors.begin(), summedErrors.end());
620 m_log <<
globalTimeStep <<
" * Summed load deviations: max = " << maxSummedErr <<
", min = " << minSummedErr
621 << std::endl;
622
623
625
626
627
628 const MFloat restrictionFactor = 1.0 - std::min(0.5, 0.005 *
noDomains());
629
630 MFloat loadVariance = 0.0;
632 loadVariance +=
POW2(loads[i] - 1.0);
633 }
635 const MFloat loadStd = std::sqrt(loadVariance);
636 m_log <<
globalTimeStep <<
" * load distribution: variance = " << loadVariance <<
", stdev = " << loadStd
637 << std::endl;
638
639 const MFloat imbalancePenaltyThreshold = 2.0 * errorThreshold;
640 const MFloat imbPenaltyFactor =
641 (imbalance < imbalancePenaltyThreshold)
642 ? (1.0 + (imbalancePenaltyThreshold - imbalance) / imbalancePenaltyThreshold)
643 : 1.0;
644 m_log <<
globalTimeStep <<
" * Imbalance penalty factor " << imbPenaltyFactor <<
" (imbalance " << imbalance
645 << "); restriction factor " << restrictionFactor << std::endl;
646
647
648 const MBool finalLocalShift =
651
652 const MBool intermediateLocalShift =
654
655 const MBool globalShiftStep = !finalLocalShift && !intermediateLocalShift;
656
657 m_log <<
globalTimeStep <<
" * Shift step: global " << globalShiftStep <<
"; intermediate local "
658 << intermediateLocalShift << "; final local " << finalLocalShift << std::endl;
659
660
662 globalShiftFlag.fill(0);
663
666
667 {
669 globalShiftFlagTmp.fill(0);
670
671 MInt noGlobalShifts = 0;
672
673
676 const MInt firstOffset = std::max(0, i -
dist);
678
679 const MFloat summedErrorDiff =
ABS(summedErrors[lastOffset] - summedErrors[firstOffset]);
680 const MBool sameSign = (summedErrDir[lastOffset] == summedErrDir[firstOffset]);
681 const MBool absErrCondition = (summedErrAbs[i] > errorThreshold);
682
683 const MFloat smoothShiftThreshold = (lastOffset - firstOffset) * errorThreshold;
684 const MBool smoothShiftCondition =
685 (absErrCondition && (sameSign && summedErrorDiff > smoothShiftThreshold));
686
687 MBool globalShiftCondition =
false;
688 if(globalShiftStep) {
690 }
691
692 globalShiftFlagTmp[i] = (globalShiftCondition) ? summedErrDir[i] : 0;
693 if(globalShiftFlagTmp[i] != 0) {
694 noGlobalShifts++;
697 << " tr=" << smoothShiftThreshold << " s_first=" << summedErrors[firstOffset]
698 << " s_last=" << summedErrors[lastOffset] << " s_abs=" << summedErrAbs[i] << std::endl;
699 }
700 }
701 }
703
704
705
706
707
708
710
711 if(globalShiftFlagTmp[i] != 0) {
712 minGlobalDist[i] = 0;
713 globalShiftFlag[i] = globalShiftFlagTmp[i];
714
715
716 const MInt dist = std::ceil(summedErrAbs[i] / errorThreshold);
719 << summedErrAbs[i] << std::endl;
720 }
721
722
723 for(
MInt nId = i - 1; nId >= std::max(i -
dist, 1); nId--) {
725
726
727
728
729 if(summedErrDir[nId] != summedErrDir[i] || errorThreshold * distance > summedErrAbs[nId]) {
730 break;
731 }
732
733 minGlobalDist[nId] = std::min(minGlobalDist[nId], distance);
734 globalShiftFlag[nId] = summedErrDir[nId];
735 }
736
739
740 if(summedErrDir[nId] != summedErrDir[i] || errorThreshold * distance > summedErrAbs[nId]) {
741 break;
742 }
743
744 minGlobalDist[nId] = std::min(minGlobalDist[nId], distance);
745 globalShiftFlag[nId] = summedErrDir[nId];
746 }
747 }
748 }
749
750 MInt noGlobalShiftsTotal = 0;
752 if(globalShiftFlag[i] != 0) {
753 noGlobalShiftsTotal++;
754 }
755 }
756 m_log <<
globalTimeStep <<
" * number of total global shifts " << noGlobalShiftsTotal << std::endl;
757 }
758
759
761 MInt noShiftsWithoutChange = 0;
764
766 shifts.fill(0);
768 shiftedWorkload.fill(0.0);
769
770 partitionCellOffsets[0] = 0;
771
773 MFloat summedWorkload = 0.0;
774
775 for(
MInt i = partitionCellOffsets[offsetId - 1]; i < localPartitionCellOffsets[offsetId]; i++) {
777 : partitionCellsWorkload[i];
778 }
779
780
781 const MBool globalShiftCondition = (globalShiftFlag[offsetId] != 0);
782 MBool localShiftCondition =
false;
783 MInt localShiftDir = 0;
784 MFloat localShiftDiff = 0.0;
785
786
787
788 if(!globalShiftCondition && globalShiftFlag[offsetId - 1] == 0 && globalShiftFlag[offsetId + 1] == 0) {
789 const MFloat errLeft = errors[offsetId - 1];
790 const MFloat errRight = errors[offsetId];
791 localShiftDiff = 0.5 * (errLeft - errRight);
792
793 if(errLeft > errorThreshold && errRight < errLeft) {
794
795 localShiftDir = -1;
796 } else if(errRight > errorThreshold && errRight > errLeft) {
797
798 localShiftDir = 1;
799 } else if(errLeft < -errorThreshold && errLeft < errRight) {
800
801 localShiftDir = 1;
802 } else if(errRight < -errorThreshold && errRight < errLeft) {
803
804 localShiftDir = -1;
805 }
806 localShiftCondition = (localShiftDir != 0);
807 }
808
809 const MBool shift = globalShiftCondition || localShiftCondition;
810
813 << " g=" << globalShiftCondition << " l=" << localShiftCondition << " err=" << summedErrors[offsetId]
814 << " minDist=" << minGlobalDist[offsetId] << " lDir=" << localShiftDir << " diff=" << localShiftDiff
815 << " eL=" << errors[offsetId - 1] << " eR=" << errors[offsetId] << std::endl;
816 }
817
818 if(shift) {
819
820 const MInt dir = (globalShiftCondition) ? summedErrDir[offsetId] : localShiftDir;
821 MFloat penaltyFactor = 1.25;
822
824
825 if(lastDir != dir && lastDir != 0) {
826 penaltyFactor = 2.0;
828 m_log <<
"Opposite shift " << offsetId <<
", lastDir " << lastDir <<
", dir " << dir << std::endl;
829 }
830 }
831 penaltyFactor *= imbPenaltyFactor;
832
833
834
835 const MInt domainOld = (dir < 0) ? offsetId - 1 : offsetId;
836 const MInt domainNew = (dir < 0) ? offsetId : offsetId - 1;
837
838
839
840
841
842 const MFloat summedErrorTmp =
843 restrictionFactor * (summedErrors[offsetId] + dir * errorThreshold * minGlobalDist[offsetId]);
844
845 const MFloat summedError = summedErrorTmp;
846 MFloat oldEstimate = (globalShiftCondition) ? summedError : localShiftDiff;
847 const MFloat initialEstimate = oldEstimate;
848
850
851
852 MInt partitionCellDomain = domainOld;
853
855 m_log <<
"DLB_DEBUG: Start offset shift " << offsetId <<
", domainOld " << domainOld <<
", domainNew "
856 << domainNew << ", oldEstimate " << oldEstimate << ", summedError " << summedErrors[offsetId]
857 << ", penaltyFactor " << penaltyFactor << std::endl;
858 }
859
860
861
862 for(
MInt i = 1;; i++) {
863
864
865 const MInt newPartitionCellId = localPartitionCellOffsets[offsetId] + dir * i;
866
867
868 if(dir > 0 && newPartitionCellId == localPartitionCellOffsets[partitionCellDomain + 1]) {
869
870 partitionCellDomain++;
871 } else if(dir < 0 && newPartitionCellId == localPartitionCellOffsets[partitionCellDomain] - 1) {
872
873 partitionCellDomain--;
874 }
875
876 const MBool domainFactorShift =
true;
877
880 : 1.0;
881
882
883 const MFloat estimate = oldEstimate
884 + dir * penaltyFactor * penalty * domainFactor * loads[partitionCellDomain]
885 * partitionCellsWorkload[newPartitionCellId]
886 / domainWorkLoad[partitionCellDomain];
887 const MFloat diff = estimate - oldEstimate;
888
889
890
891 if(newPartitionCellId <= partitionCellOffsets[offsetId - 1]) {
892 summedWorkload = 0;
893 }
894 const MFloat workloadIncrement =
896 : partitionCellsWorkload[newPartitionCellId];
897 const MFloat summedWorkloadNew = summedWorkload + dir * workloadIncrement;
898 const MFloat summedWorkloadRel = summedWorkload / averageWorkLoad;
899
900#ifndef NDEBUG
901
903 m_log <<
"DLB_DEBUG: offset " << offsetId <<
", partCellId " << newPartitionCellId
904 << ", partCellDomain " << partitionCellDomain << ", domainFactor " << domainFactor
905 << ", oldEstimate " << oldEstimate << ", estimate " << estimate << ", diff " << diff
906 << ", penalty " << penalty << ", partCellWorkload " << partitionCellsWorkload[newPartitionCellId]
907 << ", domainWorkload " << domainWorkLoad[partitionCellDomain] << ", load "
908 << loads[partitionCellDomain] << std::endl;
909 }
910#endif
911
912
913
914
915 const MBool prevOffsetReached = (dir < 0 && newPartitionCellId == partitionCellOffsets[offsetId - 1]);
917
918
921 : true;
922
923 if(prevOffsetReached || lastCellReached || (
ABS(estimate) >=
ABS(oldEstimate) && workloadCheck)
924 || (dir == 1 && !workloadCheck)) {
925
926 partitionCellOffsets[offsetId] = localPartitionCellOffsets[offsetId] + dir * (i - 1);
927
930 << localPartitionCellOffsets[offsetId] << " " << partitionCellOffsets[offsetId] << " "
931 << summedErrors[offsetId] << " " << estimate << " " << oldEstimate << " " << penalty << " "
932 << summedWorkloadRel << std::endl;
933 }
934 shifts[offsetId] = dir * (i - 1);
935
936
938
939
940
941 if(partitionCellOffsets[offsetId] <= partitionCellOffsets[offsetId - 1]) {
942 partitionCellOffsets[offsetId] = partitionCellOffsets[offsetId - 1] + 1;
943 shifts[offsetId] = partitionCellOffsets[offsetId] - localPartitionCellOffsets[offsetId];
944 m_log <<
"WARNING: setting domain offset #" << offsetId
945 << " with just a single partition cell on this domain. " << partitionCellOffsets[offsetId - 1]
946 << " " << partitionCellOffsets[offsetId] << std::endl;
947 } else if(partitionCellOffsets[offsetId] == partitionCellOffsets[offsetId - 1] + 1) {
948 m_log <<
"WARNING: domain #" << offsetId - 1 <<
" has just a single partition cell. "
949 << partitionCellOffsets[offsetId - 1] << " " << partitionCellOffsets[offsetId] << std::endl;
950 }
951
952 if(shifts[offsetId] != 0) {
953 noShifts++;
954 } else {
955 noShiftsWithoutChange++;
956 }
957
958
959 break;
960 } else {
961
962 oldEstimate = estimate;
963
964 summedWorkload = summedWorkloadNew;
965
966
967 maxDiff = std::max(maxDiff,
ABS(diff));
968 maxWeight = std::max(maxWeight, partitionCellsWorkload[newPartitionCellId]);
969
970 shiftedWorkload[offsetId] += dir * partitionCellsWorkload[newPartitionCellId];
971
972
973
974
975 const MFloat s = initialEstimate;
977 penalty = 1.0 /
cos(M_PI / 2.0 * x);
978 }
979 }
980 } else {
981
982 partitionCellOffsets[offsetId] = localPartitionCellOffsets[offsetId];
984 shifts[offsetId] = 0;
985
987 m_log <<
globalTimeStep <<
" * keep offset " << offsetId <<
" " << summedErrors[offsetId] << std::endl;
988 }
989 }
990 }
991
992
994 if(partitionCellOffsets[offsetId] <= partitionCellOffsets[offsetId - 1]) {
995 partitionCellOffsets[offsetId] = partitionCellOffsets[offsetId - 1] + 1;
996 shifts[offsetId] = partitionCellOffsets[offsetId] - localPartitionCellOffsets[offsetId];
997 m_log <<
"WARNING: correcting domain offset #" << offsetId
998 << " with now just a single partition cell on this domain. " << partitionCellOffsets[offsetId - 1]
999 << " " << partitionCellOffsets[offsetId] << std::endl;
1000 }
1001 }
1002
1003 m_log <<
" * number of shifted offsets: " << noShifts
1004 << "; offsets to shift without change: " << noShiftsWithoutChange << std::endl;
1005 m_log <<
" * max weight during shifting: " << maxWeight <<
"; max difference: " << maxDiff << std::endl;
1006
1007
1009 FILE* shiftsFile = nullptr;
1011 shiftsFile = fopen(fileName.c_str(), "w");
1012
1013 fprintf(shiftsFile, "# 1:offsetId 2:partitionCellShift 3:shiftedWorkload "
1014 "4:summedImbalance 5:load 6:domainWorkload 7:domainWeight\n");
1016 fprintf(shiftsFile, "%d %d %.8e %.8e %.8e %.8e %.8e\n", i, shifts[i], shiftedWorkload[i], summedErrors[i],
1018 }
1019 fclose(shiftsFile);
1020 }
1021 }
1022
1023
1025 "partitionCellOffsets[0]");
1027
1028 break;
1029 }
1030 default: {
1031 TERMM(1, "Partition method not known.");
1032 break;
1033 }
1034 }
1035
1036
1037 const MBool newPartition =
1038 !std::equal(&localPartitionCellOffsets[0], &localPartitionCellOffsets[0] +
noDomains(), &partitionCellOffsets[0]);
1039
1040 m_log <<
" * checking new partition" << std::endl;
1041 MBool validPartition =
1043 if(!validPartition) {
1044 m_log <<
" * invalid first/last domain offset:" << partitionCellOffsets[0] <<
" "
1046 << std::endl;
1047 }
1048
1050 if(partitionCellOffsets[i] <= partitionCellOffsets[i - 1]) {
1051 validPartition = false;
1052 m_log <<
" * invalid offsets " << i - 1 <<
" " << partitionCellOffsets[i - 1] <<
" and " << i <<
" "
1053 << partitionCellOffsets[i] << std::endl;
1054 }
1055 }
1056
1057
1058
1059 if(!validPartition) {
1060 m_log <<
" * new partition is not valid!" << std::endl;
1061 return false;
1062 }
1063
1064
1066 m_log <<
" * partitioning did not change, return." << std::endl;
1067 return false;
1068 } else {
1070 }
1071
1072
1074
1075 return true;
1076}
void determineNoPartitionCellsAndOffsets(MLong *const noPartitionCells, MLong *const partitionCellOffsets)
Determine the number of partition cells on each domain and the corresponding offsets.
MInt globalIdToLocalId(const MLong &globalId, const MBool termIfNotExisting=false)
Global to local id mapping.
MFloat a_workload(const MInt cellId) const
Returns the workload of the cell cellId.
std::vector< MFloat > m_domainWeights
std::vector< MInt > m_lastOffsetShiftDirection
MFloat m_dlbImbalanceThreshold
void computeWeights(const MFloat *loads, const MFloat domainWeight, std::vector< MFloat > &weights)
Compute computational weights for different components in the simulation based on the current distrib...
MFloat m_dlbMaxWorkloadLimit
MBool m_dlbSmoothGlobalShifts
MInt m_dlbPartitionMethod
void storeLoadsAndWeights(const MFloat *const loads, const MInt noLoadTypes, const MInt *const loadQuantities, const MFloat domainWeight, const MFloat *const weights)
Store domain loads and additional information to file.
void updateWeightsAndWorkloads(const std::vector< MFloat > &weights, const MBool restoreDefaultWeights)
Determine the cell weights using the given weighting factors for the different solver load quantities...
@ DLB_PARTITION_SHIFT_OFFSETS
std::enable_if< std::is_unsigned< T >::value, int >::type constexpr signum(T x)
int MPI_Gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, int root, MPI_Comm comm, const MString &name, const MString &sndvarname, const MString &rcvvarname)
same as MPI_Gatherv
T cos(const T a, const T b, const T x)
Cosine slope filter.
MFloat distance(const MFloat *a, const MFloat *b)