35 std::unique_ptr<Foam::cpuTime> Foam::profilingPstream::timer_(
nullptr);
37 bool Foam::profilingPstream::suspend_(
false);
51 counts_ = uint64_t(0);
59 timer_.reset(
nullptr);
67 counts_ = uint64_t(0);
74 for (
const double val : times_)
102 const label numProc = result.
size();
104 const label stride = allValues.
size() / numProc;
112 for (label proci = 0; proci < numProc; ++proci,
values += stride)
114 result[proci] =
values[index];
121 template<
class Type,
class Extract>
125 const UList<Type>& allValues,
134 const label numProc = result.size();
135 const Type*
values = allValues.cbegin();
136 const label stride = allValues.size() / numProc;
144 for (label proci = 0; proci < numProc; ++proci,
values += stride)
153 const label numProc =
values.size();
159 for (label proci = 0; proci < numProc; ++proci)
161 if (proci)
Info<<
' ';
172 const label numProc =
values.size();
181 Info<<
indent <<
" counts " << numProc <<
'(';
183 for (label proci = 0; proci < numProc; ++proci)
185 if (proci)
os <<
' ';
214 List<double> allTimes;
215 List<uint64_t> allCounts;
218 const bool oldSuspend = suspend();
222 const auto& procValues = times_;
226 allTimes.resize(numProc * procValues.size());
231 procValues.cdata_bytes(),
232 allTimes.data_bytes(),
233 procValues.size_bytes(),
241 const auto& procValues = counts_;
245 allCounts.resize(numProc * procValues.size());
250 procValues.cdata_bytes(),
251 allCounts.data_bytes(),
252 procValues.size_bytes(),
265 typedef FixedList<Tuple2<double, int>, 3> statData;
268 auto calcStats = [](
const UList<double>& data) -> statData
271 stats = Tuple2<double, int>((data.empty() ? 0 : data[0]), 0);
273 const label np = data.size();
274 for (label proci = 1; proci < np; ++proci)
276 Tuple2<double, int> tup(data[proci], proci);
279 if (stats[0].first() > tup.first()) stats[0] = tup;
280 if (stats[1].first() < tup.first()) stats[1] = tup;
281 stats[2].first() += tup.first();
285 if (np) { stats[2].first() /= np; }
291 const auto printTimingStats =
292 [&](Ostream&
os,
const char* tag,
const statData& stats)
294 os <<
indent << tag <<
": avg = " << stats[2].first()
295 <<
", min = " << stats[0].first()
296 <<
" (proc " << stats[0].second() <<
')' 297 <<
", max = " << stats[1].first()
298 <<
" (proc " << stats[1].second() <<
')' 305 Info<<
"profiling(parallel):" <<
nl 309 List<double> extractedTimes(numProc);
310 List<uint64_t> extractedCounts;
314 extractedCounts.resize(numProc);
323 [=](
const double values[])
326 for (
unsigned i = 0; i < timingType::nCategories; ++i)
333 stats = calcStats(extractedTimes);
335 printTimingStats(
Info(),
"total ", stats);
341 const int index = int(timingType::ALL_TO_ALL);
345 stats = calcStats(extractedTimes);
347 printTimingStats(
Info(),
"all-all ", stats);
354 const int index = int(timingType::BROADCAST);
358 stats = calcStats(extractedTimes);
360 printTimingStats(
Info(),
"broadcast ", stats);
367 const int index = int(timingType::PROBE);
371 stats = calcStats(extractedTimes);
373 printTimingStats(
Info(),
"probe ", stats);
386 [=](
const double values[])
390 values[timingType::REDUCE]
391 +
values[timingType::GATHER]
392 +
values[timingType::SCATTER]
400 [=](
const uint64_t
values[])
404 values[timingType::REDUCE]
405 +
values[timingType::GATHER]
406 +
values[timingType::SCATTER]
410 stats = calcStats(extractedTimes);
412 printTimingStats(
Info(),
"reduce ", stats);
426 [=](
const double values[])
431 +
values[timingType::SEND]
439 [=](
const uint64_t
values[])
444 +
values[timingType::SEND]
448 stats = calcStats(extractedTimes);
450 printTimingStats(
Info(),
"send/recv ", stats);
458 const int index = int(timingType::REQUEST);
462 stats = calcStats(extractedTimes);
464 printTimingStats(
Info(),
"request ", stats);
472 const int index = int(timingType::WAIT);
476 stats = calcStats(extractedTimes);
478 printTimingStats(
Info(),
"wait ", stats);
void size(const label n)
Older name for setAddressableSize.
Ostream & indent(Ostream &os)
Indent stream.
A 1D vector of objects of type <T> with a fixed length <N>.
constexpr char nl
The newline '\n' character (0x0a)
bool empty() const noexcept
True if List is empty (ie, size() is zero)
std::ostream & stdStream()
Return std::ostream for output operations.
static bool & parRun() noexcept
Test if this a parallel run.
static List< T > extract(const word &key, const UPtrList< entry > &entries, const T &initValue)
List< T > values(const HashTable< T, Key, Hash > &tbl, const bool doSort=false)
List of values from HashTable, optionally sorted.
static label nProcs(const label communicator=worldComm)
Number of ranks in parallel run (for given communicator). It is 1 for serial run. ...
static double elapsedTime()
The total of times.
static void mpiGather(const char *sendData, char *recvData, int count, const label communicator=worldComm)
Receive identically-sized char data from all ranks.
OBJstream os(runTime.globalPath()/outputName)
Ostream & decrIndent(Ostream &os)
Decrement the indent level.
static label commWorld() noexcept
Communicator for all ranks (respecting any local worlds)
static void reset()
Reset times/counts. Does not affect the timer itself.
static void disable() noexcept
Remove timer for measuring communication activity. Does not affect times/counts.
static bool master(const label communicator=worldComm)
True if process corresponds to the master rank in the communicator.
messageStream Info
Information stream (stdout output on master, null elsewhere)
const_iterator cbegin() const noexcept
Return const_iterator to begin traversing the constant UList.
static void printTimingDetail(const UList< double > &values)
Ostream & incrIndent(Ostream &os)
Increment the indent level.
static void report(const int reportLevel=0)
Report current information. Uses parallel communication!
static void enable()
Create timer for measuring communication or un-suspend existing.
Starts timing CPU usage and return elapsed time from start.
static void extractValues(UList< Type > &result, const int index, const UList< Type > &allValues)