User API

Defines

OMNITRACE_USER_CALLBACKS_INIT

Typedefs

typedef int (*omnitrace_trace_func_t)(void)
typedef int (*omnitrace_region_func_t)(const char*)
typedef int (*omnitrace_annotated_region_func_t)(const char*, omnitrace_annotation*, unsigned long)
typedef struct omnitrace_user_callbacks omnitrace_user_callbacks_t
typedef enum OMNITRACE_USER_CONFIGURE_MODE omnitrace_user_configure_mode_t
typedef enum OMNITRACE_USER_ERROR omnitrace_user_error_t

Enums

enum OMNITRACE_USER_CONFIGURE_MODE

Identifier for errors.

Values:

enumerator OMNITRACE_USER_UNION_CONFIG

Replace the callbacks in the current config with the non-null callbacks in the provided config.

enumerator OMNITRACE_USER_REPLACE_CONFIG

Replace the entire config even if the provided config has null callbacks.

enumerator OMNITRACE_USER_INTERSECT_CONFIG

Produce a config which is the intersection of the current config and the provided config.

enumerator OMNITRACE_USER_CONFIGURE_MODE_LAST
enum OMNITRACE_USER_ERROR

Identifier for errors.

Values:

enumerator OMNITRACE_USER_SUCCESS

No error.

enumerator OMNITRACE_USER_ERROR_NO_BINDING

Function pointer was not assigned.

enumerator OMNITRACE_USER_ERROR_BAD_VALUE

Provided value was invalid.

enumerator OMNITRACE_USER_ERROR_INVALID_CATEGORY

Invalid user binding category.

enumerator OMNITRACE_USER_ERROR_INTERNAL

Internal error occurred within libomnitrace.

enumerator OMNITRACE_USER_ERROR_LAST
struct omnitrace_user_callbacks
#include “omnitrace/types.h”

Struct containing the callbacks for the user API.

Public Members

omnitrace_trace_func_t start_trace

callback for enabling tracing globally

omnitrace_trace_func_t stop_trace

callback for disabling tracing globally

omnitrace_trace_func_t start_thread_trace

callback for enabling tracing on current thread

omnitrace_trace_func_t stop_thread_trace

callback for disabling tracing on current thread

omnitrace_region_func_t push_region

callback for starting a trace region

omnitrace_region_func_t pop_region

callback for ending a trace region

omnitrace_region_func_t progress

callback for marking an causal profiling event

omnitrace_annotated_region_func_t push_annotated_region

callback for starting a trace region + annotations

omnitrace_annotated_region_func_t pop_annotated_region

callback for ending a trace region + annotations

omnitrace_annotated_region_func_t annotated_progress

callback for marking an causal profiling event + annotations

Typedefs

typedef enum OMNITRACE_CATEGORIES omnitrace_category_t

Identifier for categories.

typedef enum OMNITRACE_ANNOTATION_TYPE omnitrace_annotation_type_t
typedef struct omnitrace_annotation omnitrace_annotation_t

Enums

enum OMNITRACE_CATEGORIES

Values:

enumerator OMNITRACE_CATEGORY_NONE
enumerator OMNITRACE_CATEGORY_PYTHON
enumerator OMNITRACE_CATEGORY_USER
enumerator OMNITRACE_CATEGORY_HOST
enumerator OMNITRACE_CATEGORY_DEVICE_HIP
enumerator OMNITRACE_CATEGORY_DEVICE_HSA
enumerator OMNITRACE_CATEGORY_ROCM_HIP
enumerator OMNITRACE_CATEGORY_ROCM_HSA
enumerator OMNITRACE_CATEGORY_ROCM_ROCTX
enumerator OMNITRACE_CATEGORY_ROCM_SMI
enumerator OMNITRACE_CATEGORY_ROCM_SMI_BUSY
enumerator OMNITRACE_CATEGORY_ROCM_SMI_TEMP
enumerator OMNITRACE_CATEGORY_ROCM_SMI_POWER
enumerator OMNITRACE_CATEGORY_ROCM_SMI_MEMORY_USAGE
enumerator OMNITRACE_CATEGORY_ROCM_RCCL
enumerator OMNITRACE_CATEGORY_ROCTRACER
enumerator OMNITRACE_CATEGORY_ROCPROFILER
enumerator OMNITRACE_CATEGORY_SAMPLING
enumerator OMNITRACE_CATEGORY_PTHREAD
enumerator OMNITRACE_CATEGORY_KOKKOS
enumerator OMNITRACE_CATEGORY_MPI
enumerator OMNITRACE_CATEGORY_OMPT
enumerator OMNITRACE_CATEGORY_PROCESS_SAMPLING
enumerator OMNITRACE_CATEGORY_COMM_DATA
enumerator OMNITRACE_CATEGORY_CAUSAL
enumerator OMNITRACE_CATEGORY_CPU_FREQ
enumerator OMNITRACE_CATEGORY_PROCESS_PAGE
enumerator OMNITRACE_CATEGORY_PROCESS_VIRT
enumerator OMNITRACE_CATEGORY_PROCESS_PEAK
enumerator OMNITRACE_CATEGORY_PROCESS_CONTEXT_SWITCH
enumerator OMNITRACE_CATEGORY_PROCESS_PAGE_FAULT
enumerator OMNITRACE_CATEGORY_PROCESS_USER_MODE_TIME
enumerator OMNITRACE_CATEGORY_PROCESS_KERNEL_MODE_TIME
enumerator OMNITRACE_CATEGORY_THREAD_WALL_TIME
enumerator OMNITRACE_CATEGORY_THREAD_CPU_TIME
enumerator OMNITRACE_CATEGORY_THREAD_PAGE_FAULT
enumerator OMNITRACE_CATEGORY_THREAD_PEAK_MEMORY
enumerator OMNITRACE_CATEGORY_THREAD_CONTEXT_SWITCH
enumerator OMNITRACE_CATEGORY_THREAD_HARDWARE_COUNTER
enumerator OMNITRACE_CATEGORY_KERNEL_HARDWARE_COUNTER
enumerator OMNITRACE_CATEGORY_NUMA
enumerator OMNITRACE_CATEGORY_TIMER_SAMPLING
enumerator OMNITRACE_CATEGORY_OVERFLOW_SAMPLING
enumerator OMNITRACE_CATEGORY_LAST
enum OMNITRACE_ANNOTATION_TYPE

Identifier for the data type of the annotation. if the data type is not a pointer, pass the address of data.

Values:

enumerator OMNITRACE_VALUE_NONE
enumerator OMNITRACE_VALUE_CSTR
enumerator OMNITRACE_STRING
enumerator OMNITRACE_VALUE_SIZE_T
enumerator OMNITRACE_SIZE_T
enumerator OMNITRACE_VALUE_INT64
enumerator OMNITRACE_INT64
enumerator OMNITRACE_I64
enumerator OMNITRACE_VALUE_UINT64
enumerator OMNITRACE_UINT64
enumerator OMNITRACE_U64
enumerator OMNITRACE_VALUE_FLOAT64
enumerator OMNITRACE_FLOAT64
enumerator OMNITRACE_FP64
enumerator OMNITRACE_VALUE_VOID_P
enumerator OMNITRACE_VOID_P
enumerator OMNITRACE_PTR
enumerator OMNITRACE_VALUE_INT32
enumerator OMNITRACE_INT32
enumerator OMNITRACE_I32
enumerator OMNITRACE_VALUE_UINT32
enumerator OMNITRACE_UINT32
enumerator OMNITRACE_U32
enumerator OMNITRACE_VALUE_FLOAT32
enumerator OMNITRACE_FLOAT32
enumerator OMNITRACE_FP32
enumerator OMNITRACE_VALUE_INT16
enumerator OMNITRACE_INT16
enumerator OMNITRACE_I16
enumerator OMNITRACE_VALUE_UINT16
enumerator OMNITRACE_UINT16
enumerator OMNITRACE_U16
enumerator OMNITRACE_VALUE_LAST
struct omnitrace_annotation
#include “omnitrace/categories.h”

A struct containing annotation data to be included in the perfetto trace.

#include <cstddef>
#include <cstdint>

double
compute_residual(size_t n, double* data);

double
compute(size_t n, double* data, size_t nitr, double tolerance)
{
    omnitrace_annotation_t _annotations[] = {
        { "iteration", OMNITRACE_VALUE_SIZE_T, nullptr },
        { "residual", OMNITRACE_VALUE_FLOAT64, nullptr },
        { "data", OMNITRACE_VALUE_PTR, data },
        { "size", OMNITRACE_VALUE_SIZE_T, &n },
        { "tolerance", OMNITRACE_VALUE_FLOAT64, &tolerance },
        nullptr
    };

    double residual = tolerance;
    for(size_t i = 0; i < nitr; ++i)
    {
        omnitrace_user_push_annotated_region("compute", &_annotations);

        residual = compute_residual(n, data);

        _annotations[0].value = &i;
        _annotations[1].value = &residual;
        omnitrace_user_pop_annotated_region("compute", &_annotations);
    }

    return residual;
}

Public Members

const char *name

label for annotation

unsigned long type

omnitrace_annotation_type_t

void *value

data to annotate

Functions

int omnitrace_user_start_trace(void)

Enable tracing on this thread and all subsequently created threads.

Returns

omnitrace_user_error_t value

int omnitrace_user_stop_trace(void)

Disable tracing on this thread and all subsequently created threads.

Returns

omnitrace_user_error_t value

int omnitrace_user_start_thread_trace(void)

Enable tracing on this specific thread. Does not apply to subsequently created threads.

Returns

omnitrace_user_error_t value

int omnitrace_user_stop_thread_trace(void)

Disable tracing on this specific thread. Does not apply to subsequently created threads.

Returns

omnitrace_user_error_t value

int omnitrace_user_push_region(const char*)

Start a user defined region.

Parameters

id – The string identifier for the region

Returns

omnitrace_user_error_t value

int omnitrace_user_pop_region(const char*)

End a user defined region. In general, user regions should be popped in the inverse order that they were pushed, i.e. first-in, last-out (FILO). The timemory backend was designed to accommodate asynchronous tasking, where FILO may be violated, and will thus compenstate for out-of-order popping, however, the perfetto backend will not; thus, out-of-order popping will result in different results in timemory vs. perfetto.

Parameters

id – The string identifier for the region

Returns

omnitrace_user_error_t value

int omnitrace_user_push_annotated_region(const char*, omnitrace_annotation_t*, unsigned long)
int omnitrace_user_pop_annotated_region(const char*, omnitrace_annotation_t*, unsigned long)
int omnitrace_user_progress(const char*)

mark causal progress

int omnitrace_user_annotated_progress(const char*, omnitrace_annotation_t*, unsigned long)

mark causal progress with annotations

int omnitrace_user_configure(omnitrace_user_configure_mode_t mode, omnitrace_user_callbacks_t inp, omnitrace_user_callbacks_t *out)

Configure the function pointers invoked by the omnitrace user API. The initial callbacks are set via the omnitrace-dl library when it is loaded but the user can user this feature to turn on/off the user API or customize how the the user callbacks occur. For example, the user could maintain one set of callbacks which discard any annotation data or redirect all unannotated user regions to the annotated user regions with annotations about some global state. Changing the callbacks is thread-safe but not thread-local.

Parameters
  • mode[in] Specifies how the new callbacks are merged with the old callbacks

  • inp[in] An omnitrace_user_callbacks instance specifying the callbacks which should be invoked by the user API.

  • out[out] Pointer to omnitrace_user_callbacks which, when non-NULL, will be assigned the former callbacks.

Returns

omnitrace_user_error_t value

int omnitrace_user_get_callbacks(omnitrace_user_callbacks_t*)
const char *omnitrace_user_error_string(int)

Return a descriptor for the provided error code.

Parameters

error_category – OMNITRACE_USER_ERROR value

Returns

String descripting the error code

Defines

OMNITRACE_CAUSAL_ENABLED

Preprocessor switch to enable/disable instrumentation for causal profiling

OMNITRACE_CAUSAL_LABEL

Default label for a causal progress point

OMNITRACE_CAUSAL_PROGRESS

Adds a throughput progress point with label <file>:<line>

OMNITRACE_CAUSAL_PROGRESS_NAMED(LABEL)

Adds a throughput progress point with user defined label. Each instance should use a unique label.

OMNITRACE_CAUSAL_BEGIN(LABEL)

Starts a latency progress point (region of interest) with user defined label. Each instance should use a unique label.

OMNITRACE_CAUSAL_END(LABEL)

End the latency progress point (region of interest) for the matching user defined label.

By default, when omnitrace detects any omnitrace_user_start_* or omnitrace_user_stop_* function, instrumentation is disabled at start-up – thus, omnitrace_user_stop_trace() is not required at the beginning of main. This is can be manually controlled via the OMNITRACE_INIT_ENABLED environment variable. User-defined regions are always recorded, regardless of whether whether omnitrace_user_start_* or omnitrace_user_stop_* has been called.

Example

Compilation

CMake

find_package(omnitrace REQUIRED COMPONENTS user)

add_executable(foo foo.cpp)

target_link_libraries(foo PRIVATE omnitrace::omnitrace-user-library)

General

Assuming omnitrace installed in /opt/omnitrace:

g++ -I/opt/omnitrace foo.cpp -o foo -lomnitrace-user

User API Implementation

#include <omnitrace/categories.h>
#include <omnitrace/types.h>
#include <omnitrace/user.h>

#include <atomic>
#include <cassert>
#include <cerrno>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <sstream>
#include <thread>
#include <vector>

std::atomic<long> total{ 0 };

long
fib(long n) __attribute__((noinline));

void
run(size_t nitr, long) __attribute__((noinline));

int
custom_push_region(const char* name);

namespace
{
omnitrace_user_callbacks_t custom_callbacks   = OMNITRACE_USER_CALLBACKS_INIT;
omnitrace_user_callbacks_t original_callbacks = OMNITRACE_USER_CALLBACKS_INIT;
}  // namespace

int
main(int argc, char** argv)
{
    custom_callbacks.push_region = &custom_push_region;
    omnitrace_user_configure(OMNITRACE_USER_UNION_CONFIG, custom_callbacks,
                             &original_callbacks);

    omnitrace_user_push_region(argv[0]);
    omnitrace_user_push_region("initialization");
    size_t nthread = std::min<size_t>(16, std::thread::hardware_concurrency());
    size_t nitr    = 50000;
    long   nfib    = 10;
    if(argc > 1) nfib = atol(argv[1]);
    if(argc > 2) nthread = atol(argv[2]);
    if(argc > 3) nitr = atol(argv[3]);
    omnitrace_user_pop_region("initialization");

    printf("[%s] Threads: %zu\n[%s] Iterations: %zu\n[%s] fibonacci(%li)...\n", argv[0],
           nthread, argv[0], nitr, argv[0], nfib);

    omnitrace_user_push_region("thread_creation");
    std::vector<std::thread> threads{};
    threads.reserve(nthread);
    // disable instrumentation for child threads
    omnitrace_user_stop_thread_trace();
    for(size_t i = 0; i < nthread; ++i)
    {
        threads.emplace_back(&run, nitr, nfib);
    }
    // re-enable instrumentation
    omnitrace_user_start_thread_trace();
    omnitrace_user_pop_region("thread_creation");

    omnitrace_user_push_region("thread_wait");
    for(auto& itr : threads)
        itr.join();
    omnitrace_user_pop_region("thread_wait");

    run(nitr, nfib);

    printf("[%s] fibonacci(%li) x %lu = %li\n", argv[0], nfib, nthread, total.load());
    omnitrace_user_pop_region(argv[0]);

    return 0;
}

long
fib(long n)
{
    return (n < 2) ? n : fib(n - 1) + fib(n - 2);
}

#define RUN_LABEL                                                                        \
    std::string{ std::string{ __FUNCTION__ } + "(" + std::to_string(n) + ") x " +        \
                 std::to_string(nitr) }                                                  \
        .c_str()

void
run(size_t nitr, long n)
{
    omnitrace_user_push_region(RUN_LABEL);
    long local = 0;
    for(size_t i = 0; i < nitr; ++i)
        local += fib(n);
    total += local;
    omnitrace_user_pop_region(RUN_LABEL);
}

int
custom_push_region(const char* name)
{
    if(!original_callbacks.push_region || !original_callbacks.push_annotated_region)
        return OMNITRACE_USER_ERROR_NO_BINDING;

    printf("Pushing custom region :: %s\n", name);

    if(original_callbacks.push_annotated_region)
    {
        int32_t _err = errno;
        char*   _msg = nullptr;
        char    _buff[1024];
        if(_err != 0) _msg = strerror_r(_err, _buff, sizeof(_buff));

        omnitrace_annotation_t _annotations[] = {
            { "errno", OMNITRACE_INT32, &_err }, { "strerror", OMNITRACE_STRING, _msg }
        };

        errno = 0;  // reset errno
        return (*original_callbacks.push_annotated_region)(
            name, _annotations, sizeof(_annotations) / sizeof(omnitrace_annotation_t));
    }

    return (*original_callbacks.push_region)(name);
}

User API Output

$ omnitrace-instrument -l --min-instructions=8 -E custom_push_region -o -- ./user-api
...
$ omnitrace-run --profile --use-pid off --time-output off -- ./user-api.inst 20 4 100
Pushing custom region :: ./user-api.inst
[omnitrace][omnitrace_init_tooling] Instrumentation mode: Trace


      ______   .___  ___. .__   __.  __  .___________..______          ___       ______  _______
     /  __  \  |   \/   | |  \ |  | |  | |           ||   _  \        /   \     /      ||   ____|
    |  |  |  | |  \  /  | |   \|  | |  | `---|  |----`|  |_)  |      /  ^  \   |  ,----'|  |__
    |  |  |  | |  |\/|  | |  . `  | |  |     |  |     |      /      /  /_\  \  |  |     |   __|
    |  `--'  | |  |  |  | |  |\   | |  |     |  |     |  |\  \----./  _____  \ |  `----.|  |____
     \______/  |__|  |__| |__| \__| |__|     |__|     | _| `._____/__/     \__\ \______||_______|



Pushing custom region :: initialization
[./user-api.inst] Threads: 4
[./user-api.inst] Iterations: 100
[./user-api.inst] fibonacci(20)...
Pushing custom region :: thread_creation
Pushing custom region :: thread_wait
Pushing custom region :: run(20) x 100
Pushing custom region :: run(20) x 100
Pushing custom region :: run(20) x 100
Pushing custom region :: run(20) x 100
Pushing custom region :: run(20) x 100
[./user-api.inst] fibonacci(20) x 4 = 3382500
[omnitrace][86267][0][omnitrace_finalize] finalizing...


[omnitrace][86267][0] omnitrace : 5.190895 sec wall_clock,    2.748 mb peak_rss, 6.330000 sec cpu_clock,  121.9 % cpu_util [laps: 1]
[omnitrace][86267][0] user-api.inst/thread-0 : 5.078713 sec wall_clock, 4.722415 sec thread_cpu_clock,   93.0 % thread_cpu_util,    1.276 mb peak_rss [laps: 1]
[omnitrace][86267][0] user-api.inst/thread-1 : 0.322248 sec wall_clock, 0.322191 sec thread_cpu_clock,  100.0 % thread_cpu_util,    1.000 mb peak_rss [laps: 1]
[omnitrace][86267][0] user-api.inst/thread-2 : 0.323255 sec wall_clock, 0.323194 sec thread_cpu_clock,  100.0 % thread_cpu_util,    0.000 mb peak_rss [laps: 1]
[omnitrace][86267][0] user-api.inst/thread-3 : 0.323569 sec wall_clock, 0.323484 sec thread_cpu_clock,  100.0 % thread_cpu_util,    1.092 mb peak_rss [laps: 1]
[omnitrace][86267][0] user-api.inst/thread-4 : 0.324178 sec wall_clock, 0.324057 sec thread_cpu_clock,  100.0 % thread_cpu_util,    1.184 mb peak_rss [laps: 1]
[omnitrace][86267][0] Post-processing 51 cpu frequency and memory usage entries...

[omnitrace][wall_clock]|0> Outputting 'omnitrace-user-api.inst-output/wall_clock.json'...
[omnitrace][wall_clock]|0> Outputting 'omnitrace-user-api.inst-output/wall_clock.tree.json'...
[omnitrace][wall_clock]|0> Outputting 'omnitrace-user-api.inst-output/wall_clock.txt'...

[omnitrace][manager::finalize][metadata]> Outputting 'omnitrace-user-api.inst-output/metadata.json' and 'omnitrace-user-api.inst-output/functions.json'...
[omnitrace][86267][0][omnitrace_finalize] Finalized
$ cat omnitrace-example-output/wall_clock.txt
|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|                                                                              REAL-CLOCK TIMER (I.E. WALL-CLOCK TIMER)                                                                              |
|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|                                     LABEL                                       | COUNT  | DEPTH  |   METRIC   | UNITS  |   SUM    |   MEAN   |   MIN    |   MAX    |   VAR    | STDDEV   | % SELF |
|---------------------------------------------------------------------------------|--------|--------|------------|--------|----------|----------|----------|----------|----------|----------|--------|
| |0>>> ./user-api.inst                                                           |      1 |      0 | wall_clock | sec    | 5.078521 | 5.078521 | 5.078521 | 5.078521 | 0.000000 | 0.000000 |    0.0 |
| |0>>> |_initialization                                                          |      1 |      1 | wall_clock | sec    | 0.000004 | 0.000004 | 0.000004 | 0.000004 | 0.000000 | 0.000000 |  100.0 |
| |0>>> |_thread_creation                                                         |      1 |      1 | wall_clock | sec    | 0.000159 | 0.000159 | 0.000159 | 0.000159 | 0.000000 | 0.000000 |  100.0 |
| |0>>> |_thread_wait                                                             |      1 |      1 | wall_clock | sec    | 0.355307 | 0.355307 | 0.355307 | 0.355307 | 0.000000 | 0.000000 |    0.0 |
| |0>>>   |_std::vector<std::thread, std::allocator<std::thread> >::begin         |      1 |      2 | wall_clock | sec    | 0.000001 | 0.000001 | 0.000001 | 0.000001 | 0.000000 | 0.000000 |  100.0 |
| |0>>>   |_std::vector<std::thread, std::allocator<std::thread> >::end           |      1 |      2 | wall_clock | sec    | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |  100.0 |
| |0>>>   |_pthread_join                                                          |      4 |      2 | wall_clock | sec    | 0.355257 | 0.088814 | 0.000001 | 0.333144 | 0.026559 | 0.162970 |  100.0 |
| |2>>>     |_start_thread                                                        |      1 |      3 | wall_clock | sec    | 0.000032 | 0.000032 | 0.000032 | 0.000032 | 0.000000 | 0.000000 |  100.0 |
| |1>>>     |_start_thread                                                        |      1 |      3 | wall_clock | sec    | 0.000036 | 0.000036 | 0.000036 | 0.000036 | 0.000000 | 0.000000 |  100.0 |
| |3>>>     |_start_thread                                                        |      1 |      3 | wall_clock | sec    | 0.000034 | 0.000034 | 0.000034 | 0.000034 | 0.000000 | 0.000000 |  100.0 |
| |4>>>     |_start_thread                                                        |      1 |      3 | wall_clock | sec    | 0.000039 | 0.000039 | 0.000039 | 0.000039 | 0.000000 | 0.000000 |  100.0 |
| |0>>> |_run                                                                     |      1 |      1 | wall_clock | sec    | 4.722993 | 4.722993 | 4.722993 | 4.722993 | 0.000000 | 0.000000 |    0.0 |
| |0>>>   |_std::char_traits<char>::length                                        |      1 |      2 | wall_clock | sec    | 0.000001 | 0.000001 | 0.000001 | 0.000001 | 0.000000 | 0.000000 |  100.0 |
| |0>>>   |_std::distance<char const*>                                            |      1 |      2 | wall_clock | sec    | 0.000001 | 0.000001 | 0.000001 | 0.000001 | 0.000000 | 0.000000 |  100.0 |
| |0>>>   |_std::operator+<char, std::char_traits<char>, std::allocator<char> >   |      2 |      2 | wall_clock | sec    | 0.000002 | 0.000001 | 0.000001 | 0.000001 | 0.000000 | 0.000000 |  100.0 |
| |0>>>   |_run(20) x 100                                                         |      1 |      2 | wall_clock | sec    | 4.722951 | 4.722951 | 4.722951 | 4.722951 | 0.000000 | 0.000000 |    0.0 |
| |0>>>     |_run [{94,25}-{96,25}]                                               |      1 |      3 | wall_clock | sec    | 4.722925 | 4.722925 | 4.722925 | 4.722925 | 0.000000 | 0.000000 |    0.0 |
| |0>>>       |_fib                                                               |    100 |      4 | wall_clock | sec    | 4.722718 | 0.047227 | 0.046713 | 0.051987 | 0.000000 | 0.000625 |    0.0 |
| |0>>>         |_fib                                                             |    200 |      5 | wall_clock | sec    | 4.722302 | 0.023612 | 0.017827 | 0.034091 | 0.000032 | 0.005627 |    0.0 |
| |0>>>           |_fib                                                           |    400 |      6 | wall_clock | sec    | 4.721485 | 0.011804 | 0.006790 | 0.023003 | 0.000016 | 0.004024 |    0.0 |
| |0>>>             |_fib                                                         |    800 |      7 | wall_clock | sec    | 4.719858 | 0.005900 | 0.002564 | 0.016078 | 0.000006 | 0.002498 |    0.1 |
| |0>>>               |_fib                                                       |   1600 |      8 | wall_clock | sec    | 4.716572 | 0.002948 | 0.000977 | 0.011849 | 0.000002 | 0.001465 |    0.1 |
| |0>>>                 |_fib                                                     |   3200 |      9 | wall_clock | sec    | 4.709918 | 0.001472 | 0.000371 | 0.008246 | 0.000001 | 0.000831 |    0.3 |
| |0>>>                   |_fib                                                   |   6400 |     10 | wall_clock | sec    | 4.696775 | 0.000734 | 0.000140 | 0.005111 | 0.000000 | 0.000461 |    0.6 |
| |0>>>                     |_fib                                                 |  12800 |     11 | wall_clock | sec    | 4.670093 | 0.000365 | 0.000050 | 0.003166 | 0.000000 | 0.000253 |    1.1 |
| |0>>>                       |_fib                                               |  25600 |     12 | wall_clock | sec    | 4.617496 | 0.000180 | 0.000017 | 0.001959 | 0.000000 | 0.000137 |    2.3 |
| |0>>>                         |_fib                                             |  51200 |     13 | wall_clock | sec    | 4.512671 | 0.000088 | 0.000004 | 0.001212 | 0.000000 | 0.000074 |    4.6 |
| |0>>>                           |_fib                                           | 102400 |     14 | wall_clock | sec    | 4.304142 | 0.000042 | 0.000000 | 0.000752 | 0.000000 | 0.000039 |    9.6 |
| |0>>>                             |_fib                                         | 202600 |     15 | wall_clock | sec    | 3.892580 | 0.000019 | 0.000000 | 0.000469 | 0.000000 | 0.000021 |   19.0 |
| |0>>>                               |_fib                                       | 363200 |     16 | wall_clock | sec    | 3.151143 | 0.000009 | 0.000000 | 0.000293 | 0.000000 | 0.000011 |   33.2 |
| |0>>>                                 |_fib                                     | 502000 |     17 | wall_clock | sec    | 2.105217 | 0.000004 | 0.000000 | 0.000183 | 0.000000 | 0.000006 |   49.1 |
| |0>>>                                   |_fib                                   | 476000 |     18 | wall_clock | sec    | 1.071652 | 0.000002 | 0.000000 | 0.000114 | 0.000000 | 0.000004 |   63.6 |
| |0>>>                                     |_fib                                 | 294200 |     19 | wall_clock | sec    | 0.390193 | 0.000001 | 0.000000 | 0.000071 | 0.000000 | 0.000003 |   75.3 |
| |0>>>                                       |_fib                               | 115200 |     20 | wall_clock | sec    | 0.096190 | 0.000001 | 0.000000 | 0.000043 | 0.000000 | 0.000002 |   84.4 |
| |0>>>                                         |_fib                             |  27400 |     21 | wall_clock | sec    | 0.015020 | 0.000001 | 0.000000 | 0.000025 | 0.000000 | 0.000001 |   91.1 |
| |0>>>                                           |_fib                           |   3600 |     22 | wall_clock | sec    | 0.001336 | 0.000000 | 0.000000 | 0.000013 | 0.000000 | 0.000001 |   96.3 |
| |0>>>                                             |_fib                         |    200 |     23 | wall_clock | sec    | 0.000050 | 0.000000 | 0.000000 | 0.000001 | 0.000000 | 0.000000 |  100.0 |
| |0>>>     |_std::char_traits<char>::length                                      |      1 |      3 | wall_clock | sec    | 0.000001 | 0.000001 | 0.000001 | 0.000001 | 0.000000 | 0.000000 |  100.0 |
| |0>>>     |_std::distance<char const*>                                          |      1 |      3 | wall_clock | sec    | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |  100.0 |
| |0>>>     |_std::operator+<char, std::char_traits<char>, std::allocator<char> > |      2 |      3 | wall_clock | sec    | 0.000001 | 0.000001 | 0.000000 | 0.000001 | 0.000000 | 0.000000 |  100.0 |
| |0>>> |_std::operator&                                                          |      1 |      1 | wall_clock | sec    | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |  100.0 |
| |0>>> std::vector<std::thread, std::allocator<std::thread> >::~vector           |      1 |      0 | wall_clock | sec    | 0.000045 | 0.000045 | 0.000045 | 0.000045 | 0.000000 | 0.000000 |   32.7 |
| |0>>> |_std::thread::~thread                                                    |      4 |      1 | wall_clock | sec    | 0.000030 | 0.000007 | 0.000007 | 0.000009 | 0.000000 | 0.000001 |   31.2 |
| |0>>>   |_std::thread::joinable                                                 |      4 |      2 | wall_clock | sec    | 0.000021 | 0.000005 | 0.000005 | 0.000006 | 0.000000 | 0.000001 |   89.4 |
| |0>>>     |_std::thread::id::id                                                 |      4 |      3 | wall_clock | sec    | 0.000001 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |  100.0 |
| |0>>>     |_std::operator==                                                     |      4 |      3 | wall_clock | sec    | 0.000001 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |  100.0 |
| |0>>> |_std::allocator_traits<std::allocator<std::thread> >::deallocate         |      1 |      1 | wall_clock | sec    | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |  100.0 |
| |0>>> |_std::allocator<std::thread>::~allocator                                 |      1 |      1 | wall_clock | sec    | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |  100.0 |
|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|