MAIA bb96820c
Multiphysics at AIA
Loading...
Searching...
No Matches
globalmpiinfo.h
Go to the documentation of this file.
1// Copyright (C) 2024 The m-AIA AUTHORS
2//
3// This file is part of m-AIA (https://git.rwth-aachen.de/aia/m-AIA/m-AIA)
4//
5// SPDX-License-Identifier: LGPL-3.0-only
6
7#ifndef MAIA_GLOBALMPIINFO_H
8#define MAIA_GLOBALMPIINFO_H
9
10#include <iostream>
11#include "INCLUDE/maiamacro.h"
12#include "INCLUDE/maiatypes.h"
13#include "mpioverride.h"
14
16void printMpiInfo(MPI_Info& mpiInfo);
17
19// Accessors and storage for global MPI information
20
23 public:
24 void init(const MInt domainId, const MInt noDomains) {
25 m_globalDomainId = domainId;
26 m_globalNoDomains = noDomains;
27
29 }
30
31 private:
34
35 // Set header align size to 10KB for netCDF files. Allows to append header data without the need
36 // to move all variable data if the header size is exceeded (which may cause MPI I/O errors).
37 // Source: https://trac.mcs.anl.gov/projects/parallel-netcdf/wiki/VariableAlignment
38 MPI_Info_set(m_mpiInfo, "nc_header_align_size", "10240");
39 // Note: possibility to set variable align size
40 /* MPI_Info_set(m_mpiInfo, "nc_var_align_size", "4194304"); */
41
42#if !defined(WITH_HDF5) && defined(MPI_IO_OPT) && defined(HOST_HAZELHEN)
43 // taken from Cray Wiki: https://wickie.hlrs.de/platforms/index.php/MPI-IO,
44 // see also PNetcdf documentation: http://trac.mcs.anl.gov/projects/parallel-netcdf/wiki/HintsForPnetcdf
45 if(m_globalNoDomains > 256) {
46 MPI_Info_set(m_mpiInfo, (char*)"cb_align", (char*)"2"); /* Default: OMPI: none, CrayMPT: 2 */
47 MPI_Info_set(m_mpiInfo, (char*)"cb_nodes_list", (char*)"*:*"); /* Default: OMPI: *:1, CrayMPT: *:* */
48 MPI_Info_set(m_mpiInfo, (char*)"direct_io", (char*)"false"); /* Default: OMPI: none, CrayMPT: false */
49 MPI_Info_set(m_mpiInfo, (char*)"romio_ds_read", (char*)"disable"); /* Default: OMPI: none, CrayMPT: disable */
50 MPI_Info_set(m_mpiInfo, (char*)"romio_ds_write", (char*)"disable"); /* Default: OMPI: none, CrayMPT: disable */
51 /* Let's reduce the number of aggregators, should be roughly 2 to 4 times the stripe-factor */
52 // MPI_Info_set (m_mpiInfo, (char*)"cb_nodes", (char*)"8");
53 /* Default: OMPI: set automatically to the number of distinct nodes; However TOO High */
54
55 MPI_Info_set(m_mpiInfo, (char*)"ind_wr_buffer_size", (char*)"16777216");
56 /* proposed by PNetcdf documentation */
57 MPI_Info_set(m_mpiInfo, (char*)"striping_factor", (char*)"64");
58 /* no. of I/O devices across which the file should be striped */
59 MPI_Info_set(m_mpiInfo, (char*)"cb_nodes", (char*)"128");
60 }
61#endif
62
63#if defined(MPI_IO_OPT) && defined(HOST_Hawk)
64 if(m_globalNoDomains > 10000) { // TODO labels:HAWK,IO
65 // NOTE: PNetcdf memory issue for large scale simulations. During the pnetcdf write call a significant amount of
66 // memory is allocated (at least on Hawk; scales linear with noDomains), which is not freed thereafter.
67 // Setting these romio hints solves the memory allocation problem, however it is not clear if this was responsible
68 // for some incomplete written data files.
69 // To be able to check for erroneous files you can enabled the fill mode for PNetcdf in config.h with
70 // MAIA_NCMPI_FILL_VARIABLES = true
71 // and check your files for fill values in the data (which should have been overwritten).
72 MPI_Info_set(m_mpiInfo, (char*)"romio_cb_read", (char*)"disable");
73 MPI_Info_set(m_mpiInfo, (char*)"romio_cb_write", (char*)"disable");
74 if(m_globalDomainId == 0) {
75 std::cerr << std::endl
76 << std::endl
77 << "NOTE: disabling ROMIO hints romio_cb_read/write to avoid PNetcdf/Hdf5 memory allocation issues "
78 "on HAWK... "
79 << std::endl
80 << "NOTE: see comment at " << AT_ << std::endl
81 << "NOTE: undefine MPI_IO_OPT to turn off the ROMIO hint changes." << std::endl
82 << std::endl
83 << std::endl;
84 }
85
86 // TODO labels:HAWK,IO check if it makes sense to disable these
87 // MPI_Info_set(m_mpiInfo, (char*)"romio_ds_read", (char*)"disable");
88 // MPI_Info_set(m_mpiInfo, (char*)"romio_ds_write", (char*)"disable");
89 }
90#endif
91
92#ifdef MPI_IO_PRINT_INFO
93 // Print MPI information on global rank 0
94 if(m_globalDomainId == 0) {
95 std::cerr << std::endl << "Global MPI information" << std::endl;
97 }
98#endif
99 }
100
101 friend MInt globalDomainId();
102 friend MInt globalNoDomains();
103 friend const MPI_Info& globalMpiInfo();
104
107 MPI_Info m_mpiInfo = MPI_INFO_NULL;
108};
109
111
117inline const MPI_Info& globalMpiInfo() { return g_mpiInformation.m_mpiInfo; }
118
119#endif // MAIA_GLOBALMPIINFO_H
Class to store global MPI information and to prevent accidental changes.
Definition: globalmpiinfo.h:22
friend MInt globalNoDomains()
Return global number of domains.
friend MInt globalDomainId()
Return global domain id.
friend const MPI_Info & globalMpiInfo()
Return global MPI information.
void init(const MInt domainId, const MInt noDomains)
Definition: globalmpiinfo.h:24
MInt globalNoDomains()
Return global number of domains.
MInt globalDomainId()
Return global domain id.
const MPI_Info & globalMpiInfo()
Return global MPI information.
void printMpiInfo(MPI_Info &mpiInfo)
Print all information of given MPI_Info object.
GlobalMpiInformation g_mpiInformation
int32_t MInt
Definition: maiatypes.h:62
int MPI_Info_create(MPI_Info *info, const MString &name)
same as MPI_Info_create