Creates a rough estimate of the domain boundaries based on heuristics. This is used to create a coarse partitioning into several groups. Within the groups, a more accurate partition algorithm will be executed at a later stage. Note that for noGroups=1 one obtains a domain decompositioning similar to the one we had previously. For details on the overall scheme see 'Scalable high-quality 1D partitioning', by M. Lieber and W.E. Nagel (HPCS 2014)
230 {
231
232 const IdType noRanksPerGroup = noGlobalPartitions / noGroups;
233 groupId = (globalPartitionId >= noGroups * noRanksPerGroup) ? noGroups - 1 : globalPartitionId / noRanksPerGroup;
234 groupLocalId = globalPartitionId - groupId * noRanksPerGroup;
235 MBool isGroupMaster = (groupLocalId == 0);
236 IdType masterTag = isGroupMaster ? 0 : MPI_UNDEFINED;
237
238 MPI_Comm_split(comm, groupId, globalPartitionId, &commGroup, AT_,
"commGroup");
239 MPI_Comm_split(comm, masterTag, globalPartitionId, &commMasters, AT_,
"commMaster");
240
241
242 WeightType localOffset0 = F0;
243 WeightType localWorkload = F0;
244 for(IdType i = 0; i < noLocalWeights; i++) {
245 localWorkload += localWeights[i];
246 }
247 MPI_Exscan(&localWorkload, &localOffset0, 1, MPI_DOUBLE, MPI_SUM, comm, AT_,
"localWorkload",
"localOffset0");
249 for(IdType i = 0; i < noGroups; i++)
250 groupWeight[i] = ((WeightType)(i * noRanksPerGroup)) / ((WeightType)noGlobalPartitions);
251
252
253 std::vector<std::pair<IdType, MLong>> localPartitionCellOffsets;
254 localWorkload = localOffset0;
255 for(IdType i = 0; i < noLocalWeights; i++) {
256 WeightType f0 = localWorkload / totalWorkload;
257 WeightType f1 = (localWorkload + localWeights[i]) / totalWorkload;
258 for(IdType g = 1; g < noGroups; g++) {
259 if(f0 <= groupWeight(g) && f1 > groupWeight(g)) {
260
261 if(fabs(groupWeight(g) - f0) < fabs(f1 - groupWeight(g)))
262 localPartitionCellOffsets.push_back(std::make_pair(g, localOffset + i));
263 else
264 localPartitionCellOffsets.push_back(std::make_pair(g, localOffset + i + 1));
265 }
266 }
267 localWorkload += localWeights[i];
268 }
269
270
271 IdType recvCount = 0;
272 IdType noOffsetsFound = (signed)localPartitionCellOffsets.size();
273 if(noOffsetsFound > 0) {
275 for(IdType i = 0; i < noOffsetsFound; i++) {
276 sendBuf(i, 0) = localPartitionCellOffsets[i].first;
277 sendBuf(i, 1) = localPartitionCellOffsets[i].second;
278 }
279 if(globalPartitionId == 0) {
280 for(IdType i = 0; i < noOffsetsFound; i++) {
281 groupOffsets[sendBuf(i, 0)] = sendBuf(i, 1);
282 recvCount++;
283 }
284 } else {
285 MPI_Send(&sendBuf[0], 2 * noOffsetsFound, MPI_LONG, 0, 100, comm, AT_,
"sendBuf[0]");
286 }
287 }
288
289
290 if(globalPartitionId == 0) {
291 const WeightType time0 = MPI_Wtime();
292 WeightType time1 = MPI_Wtime();
293 groupOffsets[0] = 0;
294 groupOffsets[noGroups] = noGlobalWeights;
295 while(recvCount < noGroups - 1) {
296 MPI_Status status;
299 MPI_Iprobe(MPI_ANY_SOURCE, 100, comm, &flag, &status);
300 if(flag) {
302 MInt source = status.MPI_SOURCE;
303 MInt recvOffsets = recvSize / 2;
305 MPI_Recv(&recvBuf[0], recvSize, MPI_LONG, source, 100, comm, &status, AT_,
"recvBuf[0]");
306 for(IdType i = 0; i < recvOffsets; i++) {
307 groupOffsets[recvBuf(i, 0)] = recvBuf(i, 1);
308 }
309 recvCount += recvOffsets;
310 }
311 if((
MInt)((MPI_Wtime() - time0) / 10) > (
MInt)((time1 - time0) / 10)) {
312 std::cerr << "Rank " << globalPartitionId << " already waiting " << MPI_Wtime() - time0
313 << " seconds for incoming group offsets..." << std::endl;
314 }
315 time1 = MPI_Wtime();
316 }
317 }
318
319
320 MPI_Bcast(&groupOffsets[0], noGroups + 1, MPI_LONG, 0, comm, AT_,
"groupOffsets[0]");
321}
This class is a ScratchSpace.
int MPI_Recv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Status *status, const MString &name, const MString &varname)
same as MPI_Recv
int MPI_Get_count(const MPI_Status *status, MPI_Datatype datatype, int *count, const MString &name)
same as MPI_Get_count
int MPI_Exscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, const MString &name, const MString &sndvarname, const MString &rcvvarname)
same as MPI_Exscan
int MPI_Comm_split(MPI_Comm comm, int color, int key, MPI_Comm *newcomm, const MString &name, const MString &varname)
same as MPI_Comm_split, but updates the number of MPI communicators
int MPI_Iprobe(int source, int tag, MPI_Comm comm, int *flag, MPI_Status *status, const MString &name)
Iprobe MPI to get status without actually receiving the message.
int MPI_Bcast(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm, const MString &name, const MString &varname)
same as MPI_Bcast
int MPI_Send(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, const MString &name, const MString &varname)
same as MPI_Send