summaryrefslogtreecommitdiff
path: root/pkfuncinstr.h
diff options
context:
space:
mode:
Diffstat (limited to 'pkfuncinstr.h')
-rw-r--r--pkfuncinstr.h311
1 files changed, 311 insertions, 0 deletions
diff --git a/pkfuncinstr.h b/pkfuncinstr.h
new file mode 100644
index 0000000..0f76eed
--- /dev/null
+++ b/pkfuncinstr.h
@@ -0,0 +1,311 @@
+#ifndef PK_PKFUNCINSTR_H
+#define PK_PKFUNCINSTR_H
+
+#include "./pktmr.h" /* deleteme */
+
+#define PK_FUNCINSTR_CHILDREN_INCREMENT_COUNT 8
+
+struct pk_funcinstr;
+struct pk_funcinstr {
+ void *fn;
+ struct pk_tmr tmr;
+ struct pk_funcinstr *parent;
+ struct pk_funcinstr *first_child;
+ struct pk_funcinstr **children;
+ size_t n_children;
+ size_t r_children;
+};
+
+void pk_funcinstr_init();
+void pk_funcinstr_teardown();
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#if defined(__clang__)
+// clang
+#elif defined(__GNUC__) || defined(__GNUG__)
+
+void __cyg_profile_func_enter(void* this_fn, void* call_site);
+void __cyg_profile_func_exit(void* this_fn, void* call_site);
+
+#else
+// other
+#endif
+
+#if defined(__cplusplus)
+} // extern "C"
+#endif
+
+#endif /* PK_PKFUNCINSTR_H */
+#if defined(PK_IMPL_FUNCINSTR)
+
+#include <assert.h>
+#include <stdio.h>
+#include <threads.h>
+#include <string.h>
+
+// TODO 2025-06-02 JCB
+// There's some speed improvements that can be made here by growing faster.
+// OR use some type of bucket
+// - might be a good chance to isolate some of the pkmem logic
+
+#define PK_FUNCINSTR_BKT_START_COUNT 4
+#define PK_FUNCINSTR_BKT_GROW_AMOUNT 4
+#define PK_FUNCINSTR_BKT_DATA_COUNT 255
+struct pk_funcinstr_bkt {
+ uint8_t used_count;
+ uint8_t guard_enter;
+ uint8_t guard_exit;
+ struct timespec reset_time;
+ struct pk_funcinstr data[PK_FUNCINSTR_BKT_DATA_COUNT+1];
+};
+struct pk_funcinstr_mstr {
+ mtx_t mtx;
+ struct timespec reset_time;
+ struct pk_funcinstr_bkt **buckets;
+ size_t r_buckets;
+ size_t n_buckets;
+};
+// if NULL, get a new bucket (or alloc if full). if !NULL, existing thread
+static thread_local struct pk_funcinstr_bkt *pk_funcinstr_thrd_bkt = NULL;
+// last function call (should be NULL or parent of current)
+static thread_local struct pk_funcinstr *pk_funcinstr_thrd_instr = NULL;
+static struct pk_funcinstr_mstr thrd_mstr;
+
+__attribute__((no_instrument_function))
+void pk_funcinstr_init() {
+ assert(thrd_mstr.reset_time.tv_sec == 0);
+ assert(thrd_mstr.reset_time.tv_nsec == 0);
+ assert(thrd_mstr.buckets == NULL);
+ assert(thrd_mstr.r_buckets == 0);
+ assert(thrd_mstr.n_buckets == 0);
+ mtx_init(&thrd_mstr.mtx, mtx_plain);
+ thrd_mstr.r_buckets = PK_FUNCINSTR_BKT_START_COUNT;
+ thrd_mstr.buckets = (struct pk_funcinstr_bkt**)aligned_alloc(alignof(struct pk_funcinstr_bkt *), (sizeof(struct pk_funcinstr_bkt *) * PK_FUNCINSTR_BKT_START_COUNT));
+ memset(thrd_mstr.buckets, 0, (sizeof(struct pk_funcinstr_bkt *) * PK_FUNCINSTR_BKT_START_COUNT));
+ clock_gettime(PK_TMR_CLOCK, &thrd_mstr.reset_time);
+}
+
+__attribute__((no_instrument_function))
+void pk_funcinstr_teardown() {
+ size_t i, k;
+ mtx_lock(&thrd_mstr.mtx);
+ for (i = 0; i < thrd_mstr.n_buckets; ++i) {
+ struct pk_funcinstr_bkt *bkt = thrd_mstr.buckets[i];
+ for (k = 0; k < bkt->used_count; ++k) {
+ free(bkt->data[k].children);
+ }
+ }
+ free(thrd_mstr.buckets);
+ thrd_mstr.reset_time.tv_sec = 0;
+ thrd_mstr.reset_time.tv_nsec = 0;
+ thrd_mstr.buckets = NULL;
+ thrd_mstr.r_buckets = 0;
+ thrd_mstr.n_buckets = 0;
+ mtx_unlock(&thrd_mstr.mtx);
+ mtx_destroy(&thrd_mstr.mtx);
+}
+
+#if defined(__clang__)
+// TODO clang XRay
+// Come up with pk macros since XRay requires attributes to instrument?
+#elif defined(__GNUC__) || defined(__GNUG__)
+
+#ifndef __USE_GNU
+ #define __USE_GNU
+#endif
+#if defined(__cplusplus)
+#include <cxxabi.h>
+#endif
+#include <dlfcn.h>
+#include <link.h>
+#include <string.h>
+
+__attribute__((no_instrument_function))
+bool pk_funcinstr_detect_not_initialized() {
+ if (thrd_mstr.buckets == NULL) return true;
+ if (thrd_mstr.r_buckets == 0) return true;
+ return false;
+}
+
+__attribute__((no_instrument_function))
+void pk_funcinstr_detect_and_handle_reset() {
+ bool should_hard_reset = false;
+ bool should_reset = pk_funcinstr_thrd_bkt == NULL;
+ if (pk_funcinstr_thrd_bkt != NULL) {
+ should_reset = pk_funcinstr_thrd_bkt->used_count == PK_FUNCINSTR_BKT_DATA_COUNT;
+ should_hard_reset = thrd_mstr.reset_time.tv_sec > pk_funcinstr_thrd_bkt->reset_time.tv_sec;
+ should_hard_reset = should_hard_reset || (thrd_mstr.reset_time.tv_sec == pk_funcinstr_thrd_bkt->reset_time.tv_sec && thrd_mstr.reset_time.tv_nsec > pk_funcinstr_thrd_bkt->reset_time.tv_nsec);
+ }
+ if (should_hard_reset) {
+ pk_funcinstr_thrd_bkt = NULL;
+ pk_funcinstr_thrd_instr = NULL;
+ should_reset = true;
+ }
+ if (should_reset) {
+ if (thrd_mstr.n_buckets == thrd_mstr.r_buckets) {
+ mtx_lock(&thrd_mstr.mtx);
+ thrd_mstr.r_buckets += PK_FUNCINSTR_BKT_GROW_AMOUNT;
+ struct pk_funcinstr_bkt **buckets = (struct pk_funcinstr_bkt**)aligned_alloc(alignof(void *), sizeof(void *) * thrd_mstr.r_buckets);
+ memcpy(buckets, thrd_mstr.buckets, sizeof(void *) * (thrd_mstr.n_buckets));
+ memset((char*)buckets + (sizeof(void *) * (thrd_mstr.n_buckets)), 0, (sizeof(void *) * thrd_mstr.r_buckets) - sizeof(void *) * (thrd_mstr.n_buckets));
+ free(thrd_mstr.buckets);
+ thrd_mstr.buckets = buckets;
+ mtx_unlock(&thrd_mstr.mtx);
+ }
+ struct pk_funcinstr_bkt *bkt = (struct pk_funcinstr_bkt *)aligned_alloc(alignof(struct pk_funcinstr_bkt), sizeof(struct pk_funcinstr_bkt));
+ bkt->used_count = 0;
+ bkt->guard_enter = 0;
+ bkt->guard_exit = 0;
+ bkt->reset_time.tv_sec = 0;
+ bkt->reset_time.tv_nsec = 0;
+ if (pk_funcinstr_thrd_bkt != NULL) {
+ pk_funcinstr_thrd_bkt->guard_enter = 0;
+ pk_funcinstr_thrd_bkt->guard_exit = 0;
+ }
+ pk_funcinstr_thrd_bkt = bkt;
+ mtx_lock(&thrd_mstr.mtx);
+ thrd_mstr.buckets[thrd_mstr.n_buckets++] = bkt;
+ mtx_unlock(&thrd_mstr.mtx);
+ clock_gettime(PK_TMR_CLOCK, &pk_funcinstr_thrd_bkt->reset_time);
+ }
+}
+
+__attribute__((no_instrument_function))
+bool pk_funcinstr_should_early_exit() {
+ if (pk_funcinstr_thrd_bkt->guard_enter != 0) return true;
+ if (pk_funcinstr_thrd_bkt->guard_exit != 0) return true;
+ return false;
+}
+
+__attribute__((no_instrument_function))
+struct pk_funcinstr *pk_funcinstr_create_funcinstr(void *this_fn) {
+ struct pk_funcinstr *funcinstr = &pk_funcinstr_thrd_bkt->data[pk_funcinstr_thrd_bkt->used_count];
+ pk_funcinstr_thrd_bkt->used_count++;
+ pk_tmr_start(funcinstr->tmr);
+ funcinstr->fn = this_fn;
+ funcinstr->parent = pk_funcinstr_thrd_instr;
+ funcinstr->children = NULL;
+ funcinstr->first_child = NULL;
+ funcinstr->n_children = 0;
+ funcinstr->r_children = 0;
+
+ if (pk_funcinstr_thrd_instr != NULL) {
+ if (pk_funcinstr_thrd_instr->first_child == NULL) {
+ // avoid an malloc if n_children will only == 1
+ pk_funcinstr_thrd_instr->first_child = funcinstr;
+ } else {
+ if (pk_funcinstr_thrd_instr->n_children == pk_funcinstr_thrd_instr->r_children) {
+ pk_funcinstr_thrd_instr->r_children += PK_FUNCINSTR_CHILDREN_INCREMENT_COUNT;
+ struct pk_funcinstr **children = (struct pk_funcinstr **)aligned_alloc(alignof(void *), sizeof(void *) * pk_funcinstr_thrd_instr->r_children);
+ memset((char*)children + (sizeof(void *) * (pk_funcinstr_thrd_instr->n_children)), 0, (sizeof(void *) * pk_funcinstr_thrd_instr->r_children) - sizeof(void *) * (pk_funcinstr_thrd_instr->n_children));
+ if (pk_funcinstr_thrd_instr->children != NULL) {
+ memcpy(children, pk_funcinstr_thrd_instr->children, sizeof(void *) * pk_funcinstr_thrd_instr->n_children);
+ free(pk_funcinstr_thrd_instr->children);
+ }
+ pk_funcinstr_thrd_instr->children = children;
+ if (pk_funcinstr_thrd_instr->n_children == 0) {
+ pk_funcinstr_thrd_instr->children[0] = pk_funcinstr_thrd_instr->first_child;
+ pk_funcinstr_thrd_instr->n_children++;
+ }
+ }
+ pk_funcinstr_thrd_instr->children[pk_funcinstr_thrd_instr->n_children] = funcinstr;
+ pk_funcinstr_thrd_instr->n_children++;
+ }
+ }
+ return funcinstr;
+}
+
+__attribute__((no_instrument_function))
+void __cyg_profile_func_enter(void* this_fn, void* call_site) {
+ (void)call_site;
+ if (pk_funcinstr_detect_not_initialized()) return;
+ pk_funcinstr_detect_and_handle_reset();
+ if (pk_funcinstr_should_early_exit()) return;
+ pk_funcinstr_thrd_bkt->guard_enter++;
+
+ pk_funcinstr_thrd_instr = pk_funcinstr_create_funcinstr(this_fn);
+
+ pk_funcinstr_thrd_bkt->guard_enter = 0;
+}
+
+__attribute__((no_instrument_function))
+void __cyg_profile_func_exit(void* this_fn, void* call_site) {
+ (void)call_site;
+ if (pk_funcinstr_detect_not_initialized()) return;
+ pk_funcinstr_detect_and_handle_reset();
+ if (pk_funcinstr_should_early_exit()) return;
+ if (pk_funcinstr_thrd_instr == NULL) return; // exit called before enter?
+ pk_funcinstr_thrd_bkt->guard_exit++;
+
+ Dl_info info;
+
+ if (this_fn != pk_funcinstr_thrd_instr->fn) {
+ int64_t i = (int64_t)pk_funcinstr_thrd_bkt->used_count - 1;
+ for (; i > -1; --i) {
+ if (pk_funcinstr_thrd_bkt->data[i].fn == this_fn) {
+ if (pk_funcinstr_thrd_bkt->data[i].tmr.e.tv_sec == 0) {
+ pk_funcinstr_thrd_instr = &pk_funcinstr_thrd_bkt->data[i];
+ break;
+ }
+ }
+ }
+ }
+ if (this_fn != pk_funcinstr_thrd_instr->fn) {
+ if (pk_funcinstr_thrd_instr->parent == NULL) {
+ struct pk_tmr tmr = pk_funcinstr_thrd_instr->tmr;
+ pk_funcinstr_thrd_instr = pk_funcinstr_create_funcinstr(this_fn);
+ pk_funcinstr_thrd_instr->tmr = tmr;
+ fprintf(stdout, "[pkfuncinstr] func mismatch; Parent func? Duration not accurate.");
+ } else {
+ fprintf(stderr, "[pkfuncinstr] func mismatch. Last: '");
+ if (dladdr(pk_funcinstr_thrd_instr->fn, &info) != 0) {
+ fprintf(stderr, "%s", info.dli_sname);
+ } else {
+ fprintf(stderr, "(unknown)");
+ }
+ fprintf(stderr, "'. Current: '");
+ if (dladdr(this_fn, &info) != 0) {
+ fprintf(stderr, "%s'.\n", info.dli_sname);
+ } else {
+ fprintf(stderr, "(unknown)'.\n");
+ }
+ pk_funcinstr_thrd_bkt->guard_exit=0;
+ return;
+ }
+ }
+
+ pk_tmr_stop(pk_funcinstr_thrd_instr->tmr);
+ if (dladdr(this_fn, &info) != 0) {
+ int depth = 0;
+ // TODO track depth in a better way
+ struct pk_funcinstr *p = pk_funcinstr_thrd_instr->parent;
+ while (p != NULL) {
+ depth += 1;
+ p = p->parent;
+ }
+ char *demangled = NULL;
+ if (info.dli_sname != NULL) {
+#if defined(__cplusplus)
+ demangled = abi::__cxa_demangle(info.dli_sname, NULL, NULL, NULL);
+#endif
+ }
+ fprintf(stdout, "[pkfuncinstr] %p %*s %s took %.6f ms\n"
+ ,this_fn
+ ,depth, ""
+ ,demangled != NULL ? demangled : info.dli_sname != NULL ? info.dli_sname : "???"
+ ,pk_tmr_duration_dbl_mili(pk_funcinstr_thrd_instr->tmr)
+ );
+ if (demangled != NULL) free(demangled);
+ }
+ pk_funcinstr_thrd_bkt->guard_exit=0;
+ pk_funcinstr_thrd_instr = pk_funcinstr_thrd_instr->parent;
+}
+
+#else
+// other
+#endif
+
+#endif /* PK_IMPL_FUNCINSTR */