@INPROCEEDINGS{szebenyi11_ea:2011:hybrid_sampling,
     author = {Szebenyi, Zolt{\'{a}}n and Gamblin, Todd and Schulz, Martin and de Supinski, Bronis R. and Wolf, Felix and Wylie, Brian J. N.},
      month = may,
      title = {Reconciling Sampling and Direct Instrumentation for Unintrusive Call-Path Profiling of {MPI} Programs},
  booktitle = {Proc. of the 25th IEEE International Parallel &  Distributed Processing Symposium (IPDPS), Anchorage, AK, USA},
       year = {2011},
      pages = {640-648},
  publisher = {IEEE Computer Society},
       isbn = {978-0-7695-4385-7},
        doi = {10.1109/IPDPS.2011.67},
   abstract = {We can profile the performance behavior of parallel
programs at the level of individual call paths through sampling or
direct instrumentation. While we can easily control measurement
dilation by adjusting the sampling frequency, the statistical
nature of sampling and the difficulty of accessing the parameters
of sampled events make it unsuitable for obtaining certain
communication metrics, such as the size of message payloads.
Alternatively, direct instrumentation, which is preferable for
capturing message-passing events, can excessively dilate measurements,
particularly for C++ programs, which often have many
short but frequently called class member functions. Thus, we
combine these techniques in a unified framework that exploits
the strengths of each approach while avoiding their weaknesses:
We use direct instrumentation to intercept MPI routines while we
record the execution of the remaining code through low-overhead
sampling. One of the main technical hurdles mastered was the
inexpensive and portable determination of call-path information
during the invocation of MPI routines.We show that the overhead
of our implementation is sufficiently low to support substantial
performance improvement of a C++ fluid-dynamics code.}
}