diff options
Diffstat (limited to 'pkfuncinstr.h')
| -rw-r--r-- | pkfuncinstr.h | 311 |
1 files changed, 311 insertions, 0 deletions
diff --git a/pkfuncinstr.h b/pkfuncinstr.h new file mode 100644 index 0000000..0f76eed --- /dev/null +++ b/pkfuncinstr.h @@ -0,0 +1,311 @@ +#ifndef PK_PKFUNCINSTR_H +#define PK_PKFUNCINSTR_H + +#include "./pktmr.h" /* deleteme */ + +#define PK_FUNCINSTR_CHILDREN_INCREMENT_COUNT 8 + +struct pk_funcinstr; +struct pk_funcinstr { + void *fn; + struct pk_tmr tmr; + struct pk_funcinstr *parent; + struct pk_funcinstr *first_child; + struct pk_funcinstr **children; + size_t n_children; + size_t r_children; +}; + +void pk_funcinstr_init(); +void pk_funcinstr_teardown(); + +#if defined(__cplusplus) +extern "C" { +#endif + +#if defined(__clang__) +// clang +#elif defined(__GNUC__) || defined(__GNUG__) + +void __cyg_profile_func_enter(void* this_fn, void* call_site); +void __cyg_profile_func_exit(void* this_fn, void* call_site); + +#else +// other +#endif + +#if defined(__cplusplus) +} // extern "C" +#endif + +#endif /* PK_PKFUNCINSTR_H */ +#if defined(PK_IMPL_FUNCINSTR) + +#include <assert.h> +#include <stdio.h> +#include <threads.h> +#include <string.h> + +// TODO 2025-06-02 JCB +// There's some speed improvements that can be made here by growing faster. +// OR use some type of bucket +// - might be a good chance to isolate some of the pkmem logic + +#define PK_FUNCINSTR_BKT_START_COUNT 4 +#define PK_FUNCINSTR_BKT_GROW_AMOUNT 4 +#define PK_FUNCINSTR_BKT_DATA_COUNT 255 +struct pk_funcinstr_bkt { + uint8_t used_count; + uint8_t guard_enter; + uint8_t guard_exit; + struct timespec reset_time; + struct pk_funcinstr data[PK_FUNCINSTR_BKT_DATA_COUNT+1]; +}; +struct pk_funcinstr_mstr { + mtx_t mtx; + struct timespec reset_time; + struct pk_funcinstr_bkt **buckets; + size_t r_buckets; + size_t n_buckets; +}; +// if NULL, get a new bucket (or alloc if full). if !NULL, existing thread +static thread_local struct pk_funcinstr_bkt *pk_funcinstr_thrd_bkt = NULL; +// last function call (should be NULL or parent of current) +static thread_local struct pk_funcinstr *pk_funcinstr_thrd_instr = NULL; +static struct pk_funcinstr_mstr thrd_mstr; + +__attribute__((no_instrument_function)) +void pk_funcinstr_init() { + assert(thrd_mstr.reset_time.tv_sec == 0); + assert(thrd_mstr.reset_time.tv_nsec == 0); + assert(thrd_mstr.buckets == NULL); + assert(thrd_mstr.r_buckets == 0); + assert(thrd_mstr.n_buckets == 0); + mtx_init(&thrd_mstr.mtx, mtx_plain); + thrd_mstr.r_buckets = PK_FUNCINSTR_BKT_START_COUNT; + thrd_mstr.buckets = (struct pk_funcinstr_bkt**)aligned_alloc(alignof(struct pk_funcinstr_bkt *), (sizeof(struct pk_funcinstr_bkt *) * PK_FUNCINSTR_BKT_START_COUNT)); + memset(thrd_mstr.buckets, 0, (sizeof(struct pk_funcinstr_bkt *) * PK_FUNCINSTR_BKT_START_COUNT)); + clock_gettime(PK_TMR_CLOCK, &thrd_mstr.reset_time); +} + +__attribute__((no_instrument_function)) +void pk_funcinstr_teardown() { + size_t i, k; + mtx_lock(&thrd_mstr.mtx); + for (i = 0; i < thrd_mstr.n_buckets; ++i) { + struct pk_funcinstr_bkt *bkt = thrd_mstr.buckets[i]; + for (k = 0; k < bkt->used_count; ++k) { + free(bkt->data[k].children); + } + } + free(thrd_mstr.buckets); + thrd_mstr.reset_time.tv_sec = 0; + thrd_mstr.reset_time.tv_nsec = 0; + thrd_mstr.buckets = NULL; + thrd_mstr.r_buckets = 0; + thrd_mstr.n_buckets = 0; + mtx_unlock(&thrd_mstr.mtx); + mtx_destroy(&thrd_mstr.mtx); +} + +#if defined(__clang__) +// TODO clang XRay +// Come up with pk macros since XRay requires attributes to instrument? +#elif defined(__GNUC__) || defined(__GNUG__) + +#ifndef __USE_GNU + #define __USE_GNU +#endif +#if defined(__cplusplus) +#include <cxxabi.h> +#endif +#include <dlfcn.h> +#include <link.h> +#include <string.h> + +__attribute__((no_instrument_function)) +bool pk_funcinstr_detect_not_initialized() { + if (thrd_mstr.buckets == NULL) return true; + if (thrd_mstr.r_buckets == 0) return true; + return false; +} + +__attribute__((no_instrument_function)) +void pk_funcinstr_detect_and_handle_reset() { + bool should_hard_reset = false; + bool should_reset = pk_funcinstr_thrd_bkt == NULL; + if (pk_funcinstr_thrd_bkt != NULL) { + should_reset = pk_funcinstr_thrd_bkt->used_count == PK_FUNCINSTR_BKT_DATA_COUNT; + should_hard_reset = thrd_mstr.reset_time.tv_sec > pk_funcinstr_thrd_bkt->reset_time.tv_sec; + should_hard_reset = should_hard_reset || (thrd_mstr.reset_time.tv_sec == pk_funcinstr_thrd_bkt->reset_time.tv_sec && thrd_mstr.reset_time.tv_nsec > pk_funcinstr_thrd_bkt->reset_time.tv_nsec); + } + if (should_hard_reset) { + pk_funcinstr_thrd_bkt = NULL; + pk_funcinstr_thrd_instr = NULL; + should_reset = true; + } + if (should_reset) { + if (thrd_mstr.n_buckets == thrd_mstr.r_buckets) { + mtx_lock(&thrd_mstr.mtx); + thrd_mstr.r_buckets += PK_FUNCINSTR_BKT_GROW_AMOUNT; + struct pk_funcinstr_bkt **buckets = (struct pk_funcinstr_bkt**)aligned_alloc(alignof(void *), sizeof(void *) * thrd_mstr.r_buckets); + memcpy(buckets, thrd_mstr.buckets, sizeof(void *) * (thrd_mstr.n_buckets)); + memset((char*)buckets + (sizeof(void *) * (thrd_mstr.n_buckets)), 0, (sizeof(void *) * thrd_mstr.r_buckets) - sizeof(void *) * (thrd_mstr.n_buckets)); + free(thrd_mstr.buckets); + thrd_mstr.buckets = buckets; + mtx_unlock(&thrd_mstr.mtx); + } + struct pk_funcinstr_bkt *bkt = (struct pk_funcinstr_bkt *)aligned_alloc(alignof(struct pk_funcinstr_bkt), sizeof(struct pk_funcinstr_bkt)); + bkt->used_count = 0; + bkt->guard_enter = 0; + bkt->guard_exit = 0; + bkt->reset_time.tv_sec = 0; + bkt->reset_time.tv_nsec = 0; + if (pk_funcinstr_thrd_bkt != NULL) { + pk_funcinstr_thrd_bkt->guard_enter = 0; + pk_funcinstr_thrd_bkt->guard_exit = 0; + } + pk_funcinstr_thrd_bkt = bkt; + mtx_lock(&thrd_mstr.mtx); + thrd_mstr.buckets[thrd_mstr.n_buckets++] = bkt; + mtx_unlock(&thrd_mstr.mtx); + clock_gettime(PK_TMR_CLOCK, &pk_funcinstr_thrd_bkt->reset_time); + } +} + +__attribute__((no_instrument_function)) +bool pk_funcinstr_should_early_exit() { + if (pk_funcinstr_thrd_bkt->guard_enter != 0) return true; + if (pk_funcinstr_thrd_bkt->guard_exit != 0) return true; + return false; +} + +__attribute__((no_instrument_function)) +struct pk_funcinstr *pk_funcinstr_create_funcinstr(void *this_fn) { + struct pk_funcinstr *funcinstr = &pk_funcinstr_thrd_bkt->data[pk_funcinstr_thrd_bkt->used_count]; + pk_funcinstr_thrd_bkt->used_count++; + pk_tmr_start(funcinstr->tmr); + funcinstr->fn = this_fn; + funcinstr->parent = pk_funcinstr_thrd_instr; + funcinstr->children = NULL; + funcinstr->first_child = NULL; + funcinstr->n_children = 0; + funcinstr->r_children = 0; + + if (pk_funcinstr_thrd_instr != NULL) { + if (pk_funcinstr_thrd_instr->first_child == NULL) { + // avoid an malloc if n_children will only == 1 + pk_funcinstr_thrd_instr->first_child = funcinstr; + } else { + if (pk_funcinstr_thrd_instr->n_children == pk_funcinstr_thrd_instr->r_children) { + pk_funcinstr_thrd_instr->r_children += PK_FUNCINSTR_CHILDREN_INCREMENT_COUNT; + struct pk_funcinstr **children = (struct pk_funcinstr **)aligned_alloc(alignof(void *), sizeof(void *) * pk_funcinstr_thrd_instr->r_children); + memset((char*)children + (sizeof(void *) * (pk_funcinstr_thrd_instr->n_children)), 0, (sizeof(void *) * pk_funcinstr_thrd_instr->r_children) - sizeof(void *) * (pk_funcinstr_thrd_instr->n_children)); + if (pk_funcinstr_thrd_instr->children != NULL) { + memcpy(children, pk_funcinstr_thrd_instr->children, sizeof(void *) * pk_funcinstr_thrd_instr->n_children); + free(pk_funcinstr_thrd_instr->children); + } + pk_funcinstr_thrd_instr->children = children; + if (pk_funcinstr_thrd_instr->n_children == 0) { + pk_funcinstr_thrd_instr->children[0] = pk_funcinstr_thrd_instr->first_child; + pk_funcinstr_thrd_instr->n_children++; + } + } + pk_funcinstr_thrd_instr->children[pk_funcinstr_thrd_instr->n_children] = funcinstr; + pk_funcinstr_thrd_instr->n_children++; + } + } + return funcinstr; +} + +__attribute__((no_instrument_function)) +void __cyg_profile_func_enter(void* this_fn, void* call_site) { + (void)call_site; + if (pk_funcinstr_detect_not_initialized()) return; + pk_funcinstr_detect_and_handle_reset(); + if (pk_funcinstr_should_early_exit()) return; + pk_funcinstr_thrd_bkt->guard_enter++; + + pk_funcinstr_thrd_instr = pk_funcinstr_create_funcinstr(this_fn); + + pk_funcinstr_thrd_bkt->guard_enter = 0; +} + +__attribute__((no_instrument_function)) +void __cyg_profile_func_exit(void* this_fn, void* call_site) { + (void)call_site; + if (pk_funcinstr_detect_not_initialized()) return; + pk_funcinstr_detect_and_handle_reset(); + if (pk_funcinstr_should_early_exit()) return; + if (pk_funcinstr_thrd_instr == NULL) return; // exit called before enter? + pk_funcinstr_thrd_bkt->guard_exit++; + + Dl_info info; + + if (this_fn != pk_funcinstr_thrd_instr->fn) { + int64_t i = (int64_t)pk_funcinstr_thrd_bkt->used_count - 1; + for (; i > -1; --i) { + if (pk_funcinstr_thrd_bkt->data[i].fn == this_fn) { + if (pk_funcinstr_thrd_bkt->data[i].tmr.e.tv_sec == 0) { + pk_funcinstr_thrd_instr = &pk_funcinstr_thrd_bkt->data[i]; + break; + } + } + } + } + if (this_fn != pk_funcinstr_thrd_instr->fn) { + if (pk_funcinstr_thrd_instr->parent == NULL) { + struct pk_tmr tmr = pk_funcinstr_thrd_instr->tmr; + pk_funcinstr_thrd_instr = pk_funcinstr_create_funcinstr(this_fn); + pk_funcinstr_thrd_instr->tmr = tmr; + fprintf(stdout, "[pkfuncinstr] func mismatch; Parent func? Duration not accurate."); + } else { + fprintf(stderr, "[pkfuncinstr] func mismatch. Last: '"); + if (dladdr(pk_funcinstr_thrd_instr->fn, &info) != 0) { + fprintf(stderr, "%s", info.dli_sname); + } else { + fprintf(stderr, "(unknown)"); + } + fprintf(stderr, "'. Current: '"); + if (dladdr(this_fn, &info) != 0) { + fprintf(stderr, "%s'.\n", info.dli_sname); + } else { + fprintf(stderr, "(unknown)'.\n"); + } + pk_funcinstr_thrd_bkt->guard_exit=0; + return; + } + } + + pk_tmr_stop(pk_funcinstr_thrd_instr->tmr); + if (dladdr(this_fn, &info) != 0) { + int depth = 0; + // TODO track depth in a better way + struct pk_funcinstr *p = pk_funcinstr_thrd_instr->parent; + while (p != NULL) { + depth += 1; + p = p->parent; + } + char *demangled = NULL; + if (info.dli_sname != NULL) { +#if defined(__cplusplus) + demangled = abi::__cxa_demangle(info.dli_sname, NULL, NULL, NULL); +#endif + } + fprintf(stdout, "[pkfuncinstr] %p %*s %s took %.6f ms\n" + ,this_fn + ,depth, "" + ,demangled != NULL ? demangled : info.dli_sname != NULL ? info.dli_sname : "???" + ,pk_tmr_duration_dbl_mili(pk_funcinstr_thrd_instr->tmr) + ); + if (demangled != NULL) free(demangled); + } + pk_funcinstr_thrd_bkt->guard_exit=0; + pk_funcinstr_thrd_instr = pk_funcinstr_thrd_instr->parent; +} + +#else +// other +#endif + +#endif /* PK_IMPL_FUNCINSTR */ |
