profilingPstream.C
Go to the documentation of this file.
1 /*---------------------------------------------------------------------------*\
2  ========= |
3  \\ / F ield | OpenFOAM: The Open Source CFD Toolbox
4  \\ / O peration |
5  \\ / A nd | www.openfoam.com
6  \\/ M anipulation |
7 -------------------------------------------------------------------------------
8  Copyright (C) 2019-2023 OpenCFD Ltd.
9 -------------------------------------------------------------------------------
10 License
11  This file is part of OpenFOAM.
12 
13  OpenFOAM is free software: you can redistribute it and/or modify it
14  under the terms of the GNU General Public License as published by
15  the Free Software Foundation, either version 3 of the License, or
16  (at your option) any later version.
17 
18  OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
19  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
20  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
21  for more details.
22 
23  You should have received a copy of the GNU General Public License
24  along with OpenFOAM. If not, see <http://www.gnu.org/licenses/>.
25 
26 \*---------------------------------------------------------------------------*/
27 
28 #include "profilingPstream.H"
29 #include "List.H"
30 #include "Tuple2.H"
31 #include "UPstream.H"
32 
33 // * * * * * * * * * * * * * * Static Data Members * * * * * * * * * * * * * //
34 
35 std::unique_ptr<Foam::cpuTime> Foam::profilingPstream::timer_(nullptr);
36 
37 bool Foam::profilingPstream::suspend_(false);
38 
39 Foam::profilingPstream::timingList Foam::profilingPstream::times_(double(0));
40 Foam::profilingPstream::countList Foam::profilingPstream::counts_(uint64_t(0));
41 
42 
43 // * * * * * * * * * * * * * Static Member Functions * * * * * * * * * * * * //
44 
46 {
47  if (!timer_)
48  {
49  timer_.reset(new cpuTime);
50  times_ = double(0);
51  counts_ = uint64_t(0);
52  }
53  suspend_ = false;
54 }
55 
56 
58 {
59  timer_.reset(nullptr);
60  suspend_ = false;
61 }
62 
63 
65 {
66  times_ = double(0);
67  counts_ = uint64_t(0);
68 }
69 
70 
72 {
73  double total = 0;
74  for (const double val : times_)
75  {
76  total += val;
77  }
78 
79  return total;
80 }
81 
82 
83 // * * * * * * * * * * * * * * * Local Functions * * * * * * * * * * * * * * //
84 
85 namespace Foam
86 {
87 
88 // Loop over all values (with striding) and extract the value at given index
89 template<class Type>
90 inline static void extractValues
91 (
92  UList<Type>& result,
93  const int index,
94  const UList<Type>& allValues
95 )
96 {
97  if (result.empty())
98  {
99  return;
100  }
101 
102  const label numProc = result.size();
103  const Type* values = allValues.cbegin();
104  const label stride = allValues.size() / numProc;
105 
106  if (!values || !stride)
107  {
108  result = Type(0);
109  return;
110  }
111 
112  for (label proci = 0; proci < numProc; ++proci, values += stride)
113  {
114  result[proci] = values[index];
115  }
116 }
117 
118 
119 // Loop over all values (with striding) and extract combined value
120 // using the given unary function
121 template<class Type, class Extract>
122 inline static void extractValues
123 (
124  UList<Type>& result,
125  const UList<Type>& allValues,
126  const Extract& extract
127 )
128 {
129  if (result.empty())
130  {
131  return;
132  }
133 
134  const label numProc = result.size();
135  const Type* values = allValues.cbegin();
136  const label stride = allValues.size() / numProc;
137 
138  if (!values || !stride)
139  {
140  result = Type(0);
141  return;
142  }
143 
144  for (label proci = 0; proci < numProc; ++proci, values += stride)
145  {
146  result[proci] = extract(values);
147  }
148 }
149 
150 
151 inline static void printTimingDetail(const UList<double>& values)
152 {
153  const label numProc = values.size();
154 
155  if (numProc)
156  {
157  Info<< indent << " times " << numProc << '(';
158 
159  for (label proci = 0; proci < numProc; ++proci)
160  {
161  if (proci) Info<< ' ';
162  Info<< values[proci];
163  }
164 
165  Info<< ')' << nl;
166  }
167 }
168 
169 
170 inline static void printTimingDetail(const UList<uint64_t>& values)
171 {
172  const label numProc = values.size();
173 
174  if (numProc)
175  {
176  // Output via std::ostream to avoid conversion to Foam::label
177  // that Ostream performs
178 
179  auto& os = Info.stdStream();
180 
181  Info<< indent << " counts " << numProc << '(';
182 
183  for (label proci = 0; proci < numProc; ++proci)
184  {
185  if (proci) os << ' ';
186  os << values[proci];
187  }
188 
189  Info<< ')' << nl;
190  }
191 }
192 
193 } // End namespace Foam
194 
195 
196 // * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * //
197 
198 void Foam::profilingPstream::report(const int reportLevel)
199 {
200  const label numProc = (UPstream::parRun() ? UPstream::nProcs() : 1);
201 
202  if (numProc < 2)
203  {
204  return;
205  }
206 
207  // Use mpiGather on all values and perform the combinations
208  // and statistics locally. This reduces the overall number of MPI
209  // calls. For detailed output we need this information anyhow.
210 
211  // NB: profilingPstream uses a FixedList for timings(), counts()
212  // so sizes are guaranteed to be consistent and identical everywhere.
213 
214  List<double> allTimes;
215  List<uint64_t> allCounts;
216 
217  // Avoid disturbing any information
218  const bool oldSuspend = suspend();
219 
220  {
221  // The timings
222  const auto& procValues = times_;
223 
224  if (UPstream::master())
225  {
226  allTimes.resize(numProc * procValues.size());
227  }
228 
230  (
231  procValues.cdata_bytes(), // Send
232  allTimes.data_bytes(), // Recv
233  procValues.size_bytes(), // Num send/recv data per rank
235  );
236  }
237 
238  if (reportLevel > 1)
239  {
240  // The counts
241  const auto& procValues = counts_;
242 
243  if (UPstream::master())
244  {
245  allCounts.resize(numProc * procValues.size());
246  }
247 
249  (
250  procValues.cdata_bytes(), // Send
251  allCounts.data_bytes(), // Recv
252  procValues.size_bytes(), // Num send/recv data per rank
254  );
255  }
256 
257  // Resume if not previously suspended
258  if (!oldSuspend)
259  {
260  resume();
261  }
262 
263 
264  // (Time, Processor) for each of: min/max/sum(avg)
265  typedef FixedList<Tuple2<double, int>, 3> statData;
266 
267  // Extract min/max/average
268  auto calcStats = [](const UList<double>& data) -> statData
269  {
270  statData stats;
271  stats = Tuple2<double, int>((data.empty() ? 0 : data[0]), 0);
272 
273  const label np = data.size();
274  for (label proci = 1; proci < np; ++proci)
275  {
276  Tuple2<double, int> tup(data[proci], proci);
277 
278  // 0: min, 1: max, 2: total(avg)
279  if (stats[0].first() > tup.first()) stats[0] = tup;
280  if (stats[1].first() < tup.first()) stats[1] = tup;
281  stats[2].first() += tup.first();
282  }
283 
284  // From total -> average value
285  if (np) { stats[2].first() /= np; }
286 
287  return stats;
288  };
289 
290 
291  const auto printTimingStats =
292  [&](Ostream& os, const char* tag, const statData& stats)
293  {
294  os << indent << tag << ": avg = " << stats[2].first()
295  << ", min = " << stats[0].first()
296  << " (proc " << stats[0].second() << ')'
297  << ", max = " << stats[1].first()
298  << " (proc " << stats[1].second() << ')'
299  << nl;
300  };
301 
302 
303  if (UPstream::master())
304  {
305  Info<< "profiling(parallel):" << nl
306  << incrIndent;
307 
308  statData stats;
309  List<double> extractedTimes(numProc);
310  List<uint64_t> extractedCounts;
311 
312  if (reportLevel > 1)
313  {
314  extractedCounts.resize(numProc);
315  }
316 
317  // Total times
318  {
320  (
321  extractedTimes,
322  allTimes,
323  [=](const double values[])
324  {
325  double total = 0;
326  for (unsigned i = 0; i < timingType::nCategories; ++i)
327  {
328  total += values[i];
329  }
330  return total;
331  }
332  );
333  stats = calcStats(extractedTimes);
334 
335  printTimingStats(Info(), "total ", stats);
336  if (reportLevel > 0) printTimingDetail(extractedTimes);
337  }
338 
339  // all-all
340  {
341  const int index = int(timingType::ALL_TO_ALL);
342 
343  extractValues(extractedTimes, index, allTimes);
344  extractValues(extractedCounts, index, allCounts);
345  stats = calcStats(extractedTimes);
346 
347  printTimingStats(Info(), "all-all ", stats);
348  if (reportLevel > 0) printTimingDetail(extractedTimes);
349  if (reportLevel > 1) printTimingDetail(extractedCounts);
350  }
351 
352  // broadcast
353  {
354  const int index = int(timingType::BROADCAST);
355 
356  extractValues(extractedTimes, index, allTimes);
357  extractValues(extractedCounts, index, allCounts);
358  stats = calcStats(extractedTimes);
359 
360  printTimingStats(Info(), "broadcast ", stats);
361  if (reportLevel > 0) printTimingDetail(extractedTimes);
362  if (reportLevel > 1) printTimingDetail(extractedCounts);
363  }
364 
365  // probe
366  {
367  const int index = int(timingType::PROBE);
368 
369  extractValues(extractedTimes, index, allTimes);
370  extractValues(extractedCounts, index, allCounts);
371  stats = calcStats(extractedTimes);
372 
373  printTimingStats(Info(), "probe ", stats);
374  if (reportLevel > 0) printTimingDetail(extractedTimes);
375  if (reportLevel > 1) printTimingDetail(extractedCounts);
376  }
377 
378  // Reduce/scatter times
379  {
380  // const int index = int(timingType::REDUCE);
381 
383  (
384  extractedTimes,
385  allTimes,
386  [=](const double values[])
387  {
388  return
389  (
390  values[timingType::REDUCE]
391  + values[timingType::GATHER]
392  + values[timingType::SCATTER]
393  );
394  }
395  );
397  (
398  extractedCounts,
399  allCounts,
400  [=](const uint64_t values[])
401  {
402  return
403  (
404  values[timingType::REDUCE]
405  + values[timingType::GATHER]
406  + values[timingType::SCATTER]
407  );
408  }
409  );
410  stats = calcStats(extractedTimes);
411 
412  printTimingStats(Info(), "reduce ", stats);
413  if (reportLevel > 0) printTimingDetail(extractedTimes);
414  if (reportLevel > 1) printTimingDetail(extractedCounts);
415  }
416 
417  // Recv/send times
418  #if 0 // FUTURE?
419  {
420  // const int index = int(timingType::RECV);
421 
423  (
424  extractedTimes,
425  allTimes,
426  [=](const double values[])
427  {
428  return
429  (
430  values[timingType::RECV]
431  + values[timingType::SEND]
432  );
433  }
434  );
436  (
437  extractedCounts,
438  allCounts,
439  [=](const uint64_t values[])
440  {
441  return
442  (
443  values[timingType::RECV]
444  + values[timingType::SEND]
445  );
446  }
447  );
448  stats = calcStats(extractedTimes);
449 
450  printTimingStats(Info(), "send/recv ", stats);
451  if (reportLevel > 0) printTimingDetail(extractedTimes);
452  if (reportLevel > 1) printTimingDetail(extractedCounts);
453  }
454  #endif
455 
456  // request
457  {
458  const int index = int(timingType::REQUEST);
459 
460  extractValues(extractedTimes, index, allTimes);
461  extractValues(extractedCounts, index, allCounts);
462  stats = calcStats(extractedTimes);
463 
464  printTimingStats(Info(), "request ", stats);
465 
466  if (reportLevel > 0) printTimingDetail(extractedTimes);
467  if (reportLevel > 1) printTimingDetail(extractedCounts);
468  }
469 
470  // wait
471  {
472  const int index = int(timingType::WAIT);
473 
474  extractValues(extractedTimes, index, allTimes);
475  extractValues(extractedCounts, index, allCounts);
476  stats = calcStats(extractedTimes);
477 
478  printTimingStats(Info(), "wait ", stats);
479 
480  if (reportLevel > 0) printTimingDetail(extractedTimes);
481  if (reportLevel > 1) printTimingDetail(extractedCounts);
482  }
483 
484  Info<< decrIndent;
485  }
486 }
487 
488 
489 // ************************************************************************* //
void size(const label n)
Older name for setAddressableSize.
Definition: UList.H:116
Ostream & indent(Ostream &os)
Indent stream.
Definition: Ostream.H:493
A 1D vector of objects of type <T> with a fixed length <N>.
Definition: HashTable.H:107
constexpr char nl
The newline &#39;\n&#39; character (0x0a)
Definition: Ostream.H:50
bool empty() const noexcept
True if List is empty (ie, size() is zero)
Definition: UList.H:666
std::ostream & stdStream()
Return std::ostream for output operations.
static bool & parRun() noexcept
Test if this a parallel run.
Definition: UPstream.H:1049
static List< T > extract(const word &key, const UPtrList< entry > &entries, const T &initValue)
List< T > values(const HashTable< T, Key, Hash > &tbl, const bool doSort=false)
List of values from HashTable, optionally sorted.
Definition: HashOps.H:164
static label nProcs(const label communicator=worldComm)
Number of ranks in parallel run (for given communicator). It is 1 for serial run. ...
Definition: UPstream.H:1065
static double elapsedTime()
The total of times.
static void mpiGather(const char *sendData, char *recvData, int count, const label communicator=worldComm)
Receive identically-sized char data from all ranks.
const direction noexcept
Definition: Scalar.H:258
OBJstream os(runTime.globalPath()/outputName)
Ostream & decrIndent(Ostream &os)
Decrement the indent level.
Definition: Ostream.H:511
static label commWorld() noexcept
Communicator for all ranks (respecting any local worlds)
Definition: UPstream.H:429
static void reset()
Reset times/counts. Does not affect the timer itself.
static void disable() noexcept
Remove timer for measuring communication activity. Does not affect times/counts.
static bool master(const label communicator=worldComm)
True if process corresponds to the master rank in the communicator.
Definition: UPstream.H:1082
messageStream Info
Information stream (stdout output on master, null elsewhere)
const_iterator cbegin() const noexcept
Return const_iterator to begin traversing the constant UList.
Definition: UListI.H:405
static void printTimingDetail(const UList< double > &values)
Ostream & incrIndent(Ostream &os)
Increment the indent level.
Definition: Ostream.H:502
static void report(const int reportLevel=0)
Report current information. Uses parallel communication!
static void enable()
Create timer for measuring communication or un-suspend existing.
Namespace for OpenFOAM.
Starts timing CPU usage and return elapsed time from start.
Definition: cpuTimePosix.H:52
static void extractValues(UList< Type > &result, const int index, const UList< Type > &allValues)