#ifndef PK_PKFUNCINSTR_H #define PK_PKFUNCINSTR_H #include "./pktmr.h" /* deleteme */ #include struct pk_funcinstr; struct pk_funcinstr { void *fn; struct pk_tmr tmr; struct pk_funcinstr *parent; struct pk_funcinstr *first_child; struct pk_funcinstr **children; size_t n_children; size_t r_children; }; void pk_funcinstr_init(); void pk_funcinstr_set_ouputs(FILE *out, FILE *err); void pk_funcinstr_teardown(); #if defined(__cplusplus) extern "C" { #endif #if defined(__clang__) // clang #elif defined(__GNUC__) || defined(__GNUG__) #ifndef __USE_GNU #define __USE_GNU #endif #if defined(__cplusplus) #include #endif #include #include #include void __cyg_profile_func_enter(void* this_fn, void* call_site); void __cyg_profile_func_exit(void* this_fn, void* call_site); #else // other #endif #if defined(__cplusplus) } // extern "C" #endif #endif /* PK_PKFUNCINSTR_H */ #if defined(PK_IMPL_FUNCINSTR) #include #include #include #include #define PK_FUNCINSTR_CHILDREN_START_COUNT 8 #define PK_FUNCINSTR_CHILDREN_GROW_RATIO 2.0 #define PK_FUNCINSTR_BKT_START_COUNT 64 #define PK_FUNCINSTR_BKT_GROW_RATIO 2.0 #define PK_FUNCINSTR_BKT_DATA_COUNT 0xFFFF struct pk_funcinstr_bkt { uint16_t used_count; uint8_t guard_enter; uint8_t guard_exit; struct timespec reset_time; struct pk_funcinstr data[PK_FUNCINSTR_BKT_DATA_COUNT+1]; }; struct pk_funcinstr_mstr { mtx_t mtx; FILE *out; FILE *err; struct timespec reset_time; struct pk_funcinstr_bkt **buckets; size_t r_buckets; size_t n_buckets; }; // if NULL, get a new bucket (or alloc if full). if !NULL, existing thread static thread_local struct pk_funcinstr_bkt *pk_funcinstr_thrd_bkt = NULL; // last function call (should be NULL or parent of current) static thread_local struct pk_funcinstr *pk_funcinstr_thrd_instr = NULL; static struct pk_funcinstr_mstr thrd_mstr; __attribute__((no_instrument_function)) void pk_funcinstr_init() { assert(thrd_mstr.out == NULL); assert(thrd_mstr.err == NULL); assert(thrd_mstr.reset_time.tv_sec == 0); assert(thrd_mstr.reset_time.tv_nsec == 0); assert(thrd_mstr.buckets == NULL); assert(thrd_mstr.r_buckets == 0); assert(thrd_mstr.n_buckets == 0); mtx_init(&thrd_mstr.mtx, mtx_plain); thrd_mstr.out = stdout; thrd_mstr.err = stderr; thrd_mstr.r_buckets = PK_FUNCINSTR_BKT_START_COUNT; thrd_mstr.buckets = (struct pk_funcinstr_bkt**)aligned_alloc(alignof(struct pk_funcinstr_bkt *), (sizeof(struct pk_funcinstr_bkt *) * PK_FUNCINSTR_BKT_START_COUNT)); clock_gettime(PK_TMR_CLOCK, &thrd_mstr.reset_time); } __attribute__((no_instrument_function)) void pk_funcinstr_set_ouputs(FILE *out, FILE *err) { thrd_mstr.out = out; thrd_mstr.err = err; } __attribute__((no_instrument_function)) void pk_funcinstr_write(FILE *f) { int64_t i, k, s; struct pk_funcinstr_bkt *bkt = nullptr; struct pk_funcinstr *instr = nullptr; struct pk_tmr fake_tmr; Dl_info info; mtx_lock(&thrd_mstr.mtx); fake_tmr.b = thrd_mstr.reset_time; fprintf(f, "["); for (i = 0; i < (int64_t)thrd_mstr.n_buckets; ++i) { bkt = thrd_mstr.buckets[i]; for (k = 0; k < (int64_t)bkt->used_count; ++k) { instr = &bkt->data[k]; for (s = 0; s < 2; ++s) { if (i == 0 && k == 0 && s == 0) { fprintf(f, "{"); } else { fprintf(f, ",{"); } if (dladdr(instr->fn, &info) != 0) { fprintf(f, "\"name\": \"%s\",", info.dli_sname); } else { fprintf(f, "\"name\": \"unknown\","); } fprintf(f, "\"cat\": \"%s\",", "funcinstr"); if (s == 0) { fake_tmr.e = instr->tmr.b; fprintf(f, "\"ph\": \"%c\",", 'B'); } else { fake_tmr.e = instr->tmr.e; fprintf(f, "\"ph\": \"%c\",", 'E'); } fprintf(f, "\"ts\": %lli,", pk_tmr_duration_u64_nano(fake_tmr)); fprintf(f, "\"pid\": %i,", 69); fprintf(f, "\"tid\": %ld", thrd_current()); fprintf(f, "}"); } } } fprintf(f, "]"); mtx_unlock(&thrd_mstr.mtx); } __attribute__((no_instrument_function)) void pk_funcinstr_teardown() { int64_t i, k; mtx_lock(&thrd_mstr.mtx); for (i = ((int64_t)thrd_mstr.n_buckets)-1; i > -1; --i) { struct pk_funcinstr_bkt *bkt = thrd_mstr.buckets[i]; for (k = ((int64_t)bkt->used_count)-1; k > -1; --k) { free(bkt->data[k].children); } } free(thrd_mstr.buckets); thrd_mstr.out = NULL; thrd_mstr.err = NULL; thrd_mstr.reset_time.tv_sec = 0; thrd_mstr.reset_time.tv_nsec = 0; thrd_mstr.buckets = NULL; thrd_mstr.r_buckets = 0; thrd_mstr.n_buckets = 0; mtx_unlock(&thrd_mstr.mtx); mtx_destroy(&thrd_mstr.mtx); } #if defined(__clang__) // TODO clang XRay // Come up with pk macros since XRay requires attributes to instrument? #elif defined(__GNUC__) || defined(__GNUG__) __attribute__((no_instrument_function)) bool pk_funcinstr_detect_not_initialized() { if (thrd_mstr.buckets == NULL) return true; if (thrd_mstr.r_buckets == 0) return true; return false; } __attribute__((no_instrument_function)) void pk_funcinstr_detect_and_handle_reset() { bool should_hard_reset = false; bool should_reset = pk_funcinstr_thrd_bkt == NULL; if (pk_funcinstr_thrd_bkt != NULL) { should_reset = pk_funcinstr_thrd_bkt->used_count == PK_FUNCINSTR_BKT_DATA_COUNT; should_hard_reset = thrd_mstr.reset_time.tv_sec > pk_funcinstr_thrd_bkt->reset_time.tv_sec; should_hard_reset = should_hard_reset || (thrd_mstr.reset_time.tv_sec == pk_funcinstr_thrd_bkt->reset_time.tv_sec && thrd_mstr.reset_time.tv_nsec > pk_funcinstr_thrd_bkt->reset_time.tv_nsec); } if (should_hard_reset) { if (pk_funcinstr_thrd_bkt != NULL) free(pk_funcinstr_thrd_bkt); pk_funcinstr_thrd_bkt = NULL; pk_funcinstr_thrd_instr = NULL; should_reset = true; } if (should_reset) { if (thrd_mstr.n_buckets == thrd_mstr.r_buckets) { mtx_lock(&thrd_mstr.mtx); thrd_mstr.r_buckets *= PK_FUNCINSTR_BKT_GROW_RATIO; struct pk_funcinstr_bkt **buckets = (struct pk_funcinstr_bkt**)aligned_alloc(alignof(void *), sizeof(void *) * thrd_mstr.r_buckets); memcpy(buckets, thrd_mstr.buckets, sizeof(void *) * (thrd_mstr.n_buckets)); free(thrd_mstr.buckets); thrd_mstr.buckets = buckets; mtx_unlock(&thrd_mstr.mtx); } struct pk_funcinstr_bkt *bkt = (struct pk_funcinstr_bkt *)aligned_alloc(alignof(struct pk_funcinstr_bkt), sizeof(struct pk_funcinstr_bkt)); bkt->used_count = 0; bkt->guard_enter = 0; bkt->guard_exit = 0; bkt->reset_time.tv_sec = 0; bkt->reset_time.tv_nsec = 0; if (pk_funcinstr_thrd_bkt != NULL) { pk_funcinstr_thrd_bkt->guard_enter = 0; pk_funcinstr_thrd_bkt->guard_exit = 0; } pk_funcinstr_thrd_bkt = bkt; mtx_lock(&thrd_mstr.mtx); thrd_mstr.buckets[thrd_mstr.n_buckets++] = bkt; mtx_unlock(&thrd_mstr.mtx); clock_gettime(PK_TMR_CLOCK, &pk_funcinstr_thrd_bkt->reset_time); } } __attribute__((no_instrument_function)) bool pk_funcinstr_should_early_exit() { if (pk_funcinstr_thrd_bkt->guard_enter != 0) return true; if (pk_funcinstr_thrd_bkt->guard_exit != 0) return true; return false; } __attribute__((no_instrument_function)) struct pk_funcinstr *pk_funcinstr_create_funcinstr(void *this_fn) { struct pk_funcinstr *funcinstr = &pk_funcinstr_thrd_bkt->data[pk_funcinstr_thrd_bkt->used_count]; pk_funcinstr_thrd_bkt->used_count++; funcinstr->fn = this_fn; pk_tmr_start(funcinstr->tmr); funcinstr->parent = pk_funcinstr_thrd_instr; funcinstr->first_child = NULL; funcinstr->children = NULL; funcinstr->n_children = 0; funcinstr->r_children = 0; if (pk_funcinstr_thrd_instr != NULL) { if (pk_funcinstr_thrd_instr->first_child == NULL) { // avoid an malloc if n_children will only == 1 pk_funcinstr_thrd_instr->first_child = funcinstr; } else { if (pk_funcinstr_thrd_instr->n_children == pk_funcinstr_thrd_instr->r_children) { if (pk_funcinstr_thrd_instr->r_children == 0) { pk_funcinstr_thrd_instr->r_children = PK_FUNCINSTR_CHILDREN_START_COUNT; } else { pk_funcinstr_thrd_instr->r_children *= PK_FUNCINSTR_CHILDREN_GROW_RATIO; } struct pk_funcinstr **children = (struct pk_funcinstr **)aligned_alloc(alignof(void *), sizeof(void *) * pk_funcinstr_thrd_instr->r_children); if (pk_funcinstr_thrd_instr->children != NULL) { memcpy(children, pk_funcinstr_thrd_instr->children, sizeof(void *) * pk_funcinstr_thrd_instr->n_children); free(pk_funcinstr_thrd_instr->children); } pk_funcinstr_thrd_instr->children = children; if (pk_funcinstr_thrd_instr->n_children == 0) { pk_funcinstr_thrd_instr->children[0] = pk_funcinstr_thrd_instr->first_child; pk_funcinstr_thrd_instr->n_children++; } } pk_funcinstr_thrd_instr->children[pk_funcinstr_thrd_instr->n_children] = funcinstr; pk_funcinstr_thrd_instr->n_children++; } } return funcinstr; } __attribute__((no_instrument_function)) void __cyg_profile_func_enter(void* this_fn, void* call_site) { (void)call_site; if (pk_funcinstr_detect_not_initialized()) return; pk_funcinstr_detect_and_handle_reset(); if (pk_funcinstr_should_early_exit()) return; pk_funcinstr_thrd_bkt->guard_enter++; pk_funcinstr_thrd_instr = pk_funcinstr_create_funcinstr(this_fn); pk_funcinstr_thrd_bkt->guard_enter = 0; } __attribute__((no_instrument_function)) void __cyg_profile_func_exit(void* this_fn, void* call_site) { (void)call_site; if (pk_funcinstr_detect_not_initialized()) return; pk_funcinstr_detect_and_handle_reset(); if (pk_funcinstr_should_early_exit()) return; if (pk_funcinstr_thrd_instr == NULL) return; // exit called before enter? pk_funcinstr_thrd_bkt->guard_exit++; #ifdef PK_FUNCINSTR_PRINT Dl_info info; #endif /* PK_FUNCINSTR_PRINT */ if (this_fn != pk_funcinstr_thrd_instr->fn) { int64_t i = (int64_t)pk_funcinstr_thrd_bkt->used_count - 1; for (; i > -1; --i) { if (pk_funcinstr_thrd_bkt->data[i].fn == this_fn) { if (pk_funcinstr_thrd_bkt->data[i].tmr.e.tv_sec == 0) { pk_funcinstr_thrd_instr = &pk_funcinstr_thrd_bkt->data[i]; break; } } } } if (this_fn != pk_funcinstr_thrd_instr->fn) { if (pk_funcinstr_thrd_instr->parent == NULL) { struct pk_tmr tmr = pk_funcinstr_thrd_instr->tmr; pk_funcinstr_thrd_instr = pk_funcinstr_create_funcinstr(this_fn); pk_funcinstr_thrd_instr->tmr = tmr; #ifdef PK_FUNCINSTR_PRINT fprintf(thrd_mstr.out, "[pkfuncinstr] func mismatch; Parent func? Duration not accurate."); #endif /* PK_FUNCINSTR_PRINT */ } else { #ifdef PK_FUNCINSTR_PRINT fprintf(thrd_mstr.err, "[pkfuncinstr] func mismatch. Last: '"); if (dladdr(pk_funcinstr_thrd_instr->fn, &info) != 0) { fprintf(thrd_mstr.err, "%s", info.dli_sname); } else { fprintf(thrd_mstr.err, "(unknown)"); } fprintf(thrd_mstr.err, "'. Current: '"); if (dladdr(this_fn, &info) != 0) { fprintf(thrd_mstr.err, "%s'.\n", info.dli_sname); } else { fprintf(thrd_mstr.err, "(unknown)'.\n"); } #endif /* PK_FUNCINSTR_PRINT */ pk_funcinstr_thrd_bkt->guard_exit=0; return; } } pk_tmr_stop(pk_funcinstr_thrd_instr->tmr); #ifdef PK_FUNCINSTR_PRINT if (dladdr(this_fn, &info) != 0) { int depth = 0; // TODO track depth in a better way struct pk_funcinstr *p = pk_funcinstr_thrd_instr->parent; while (p != NULL) { depth += 1; p = p->parent; } char *demangled = NULL; if (info.dli_sname != NULL) { #if defined(__cplusplus) demangled = abi::__cxa_demangle(info.dli_sname, NULL, NULL, NULL); #endif } fprintf(thrd_mstr.out, "[pkfuncinstr] %p %*s %s took %.6f ms\n" ,this_fn ,depth, "" ,demangled != NULL ? demangled : info.dli_sname != NULL ? info.dli_sname : "???" ,pk_tmr_duration_dbl_mili(pk_funcinstr_thrd_instr->tmr) ); if (demangled != NULL) free(demangled); } #endif /* PK_FUNCINSTR_PRINT */ pk_funcinstr_thrd_bkt->guard_exit=0; pk_funcinstr_thrd_instr = pk_funcinstr_thrd_instr->parent; } #else // other #endif #endif /* PK_IMPL_FUNCINSTR */