diff options
| author | Jonathan Bradley <jcb@pikum.xyz> | 2025-08-27 15:10:45 -0400 |
|---|---|---|
| committer | Jonathan Bradley <jcb@pikum.xyz> | 2025-08-27 15:10:45 -0400 |
| commit | 9f84a84a33dc5a77ec32ff9ca9fbd84fab0f0ad0 (patch) | |
| tree | 4465851e8b4c07eb9679d6fa900c26939b315dc0 | |
| parent | c1c58188cbc1156f2b37d585fa7e7603b3d3acf4 (diff) | |
pkfuncinstr: perf: alloc, memset
pkbktarr: perf: unnecessary indirection
| -rw-r--r-- | pkbktarr.h | 28 | ||||
| -rw-r--r-- | pkfuncinstr.h | 28 |
2 files changed, 27 insertions, 29 deletions
@@ -144,10 +144,10 @@ void pk_bkt_arr_init(struct pk_bkt_arr *bkt_arr, unsigned long stride, unsigned bkt_arr->reserved_buckets = 1; bkt_arr->stride = stride; bkt_arr->alignment = alignment; - bkt_arr->idx_unused = (unsigned long long *)pk_new(sizeof(unsigned long long), alignof(unsigned long long), bkt_buckets); + bkt_arr->idx_unused = (unsigned long long *)pk_new_bkt(sizeof(unsigned long long), alignof(unsigned long long), bkt_buckets); bkt_arr->idx_unused[0] = PK_BKT_ARR_ALL_UNUSED_VAL; - bkt_arr->bucketed_data = (void **)pk_new(sizeof(void *), alignof(void *), bkt_buckets); - bkt_arr->bucketed_data[0] = pk_new(stride * limits.i, alignment, bkt_data); + bkt_arr->bucketed_data = (void **)pk_new_bkt(sizeof(void *), alignof(void *), bkt_buckets); + bkt_arr->bucketed_data[0] = pk_new_bkt(stride * limits.i, alignment, bkt_data); } void pk_bkt_arr_clear(struct pk_bkt_arr *bkt_arr) { @@ -164,8 +164,8 @@ void pk_bkt_arr_clear(struct pk_bkt_arr *bkt_arr) { void pk_bkt_arr_reserve(struct pk_bkt_arr *bkt_arr, size_t count) { size_t bucket_count = count / bkt_arr->limits.i; if (bkt_arr->reserved_buckets >= bucket_count) return; - unsigned long long *new_idx_unused = (unsigned long long *)pk_new(sizeof(unsigned long long) * bucket_count, alignof(uint64_t), bkt_arr->bkt_buckets); - void **new_bucketed_data = (void **)pk_new(sizeof(void *) * bucket_count, alignof(void *), bkt_arr->bkt_buckets); + unsigned long long *new_idx_unused = (unsigned long long *)pk_new_bkt(sizeof(unsigned long long) * bucket_count, alignof(uint64_t), bkt_arr->bkt_buckets); + void **new_bucketed_data = (void **)pk_new_bkt(sizeof(void *) * bucket_count, alignof(void *), bkt_arr->bkt_buckets); if (bkt_arr->reserved_buckets > 0) { memcpy(new_idx_unused, bkt_arr->idx_unused, sizeof(unsigned long long) * bkt_arr->reserved_buckets); memcpy(new_bucketed_data, bkt_arr->bucketed_data, sizeof(void *) * bkt_arr->reserved_buckets); @@ -174,7 +174,7 @@ void pk_bkt_arr_reserve(struct pk_bkt_arr *bkt_arr, size_t count) { } for (size_t i = bkt_arr->reserved_buckets; i < bucket_count; ++i) { new_idx_unused[i] = PK_BKT_ARR_ALL_UNUSED_VAL; - new_bucketed_data[i] = pk_new(bkt_arr->stride * bkt_arr->limits.i, bkt_arr->alignment, bkt_arr->bkt_data); + new_bucketed_data[i] = pk_new_bkt(bkt_arr->stride * bkt_arr->limits.i, bkt_arr->alignment, bkt_arr->bkt_data); } bkt_arr->idx_unused = new_idx_unused; bkt_arr->bucketed_data = new_bucketed_data; @@ -227,10 +227,10 @@ void pk_bkt_arr_teardown(struct pk_bkt_arr *bkt_arr) size_t sz = bkt_arr->limits.i * bkt_arr->stride; if (bkt_arr->idx_unused == nullptr && bkt_arr->bucketed_data == nullptr) return; for (b = bkt_arr->reserved_buckets - 1; b > -1; --b) { - pk_delete(bkt_arr->bucketed_data[b], sz, bkt_arr->bkt_data); + pk_delete_bkt(bkt_arr->bucketed_data[b], sz, bkt_arr->bkt_data); } - pk_delete((void *)bkt_arr->idx_unused, sizeof(unsigned long long) * (bkt_arr->reserved_buckets), bkt_arr->bkt_buckets); - pk_delete((void *)bkt_arr->bucketed_data, sizeof(void *) * (bkt_arr->reserved_buckets), bkt_arr->bkt_buckets); + pk_delete_bkt((void *)bkt_arr->idx_unused, sizeof(unsigned long long) * (bkt_arr->reserved_buckets), bkt_arr->bkt_buckets); + pk_delete_bkt((void *)bkt_arr->bucketed_data, sizeof(void *) * (bkt_arr->reserved_buckets), bkt_arr->bkt_buckets); memset(bkt_arr, 0, sizeof(struct pk_bkt_arr)); bkt_arr->bkt_buckets = NULL; bkt_arr->bkt_data = NULL; @@ -269,18 +269,18 @@ struct pk_bkt_arr_handle pk_bkt_arr_new_handle(struct pk_bkt_arr *bkt_arr) if (bkt_arr->head_r.b == bkt_arr->reserved_buckets && bkt_arr->head_r.i == 0) { bkt_arr->reserved_buckets += 1; - unsigned long long *new_idx_unused = (unsigned long long *)pk_new(sizeof(unsigned long long) * bkt_arr->reserved_buckets, alignof(unsigned long long), bkt_arr->bkt_buckets); - void **new_data_ptrs = (void **)pk_new(sizeof(void *) * bkt_arr->reserved_buckets, alignof(void *), bkt_arr->bkt_buckets); + unsigned long long *new_idx_unused = (unsigned long long *)pk_new_bkt(sizeof(unsigned long long) * bkt_arr->reserved_buckets, alignof(unsigned long long), bkt_arr->bkt_buckets); + void **new_data_ptrs = (void **)pk_new_bkt(sizeof(void *) * bkt_arr->reserved_buckets, alignof(void *), bkt_arr->bkt_buckets); for (b = 0; b < bkt_arr->reserved_buckets - 1; ++b) { new_idx_unused[b] = bkt_arr->idx_unused[b]; new_data_ptrs[b] = bkt_arr->bucketed_data[b]; } new_idx_unused[bkt_arr->reserved_buckets - 1] = PK_BKT_ARR_ALL_UNUSED_VAL; - new_data_ptrs[bkt_arr->reserved_buckets - 1] = pk_new(bkt_arr->stride * bkt_arr->limits.i, bkt_arr->alignment, bkt_arr->bkt_data); + new_data_ptrs[bkt_arr->reserved_buckets - 1] = pk_new_bkt(bkt_arr->stride * bkt_arr->limits.i, bkt_arr->alignment, bkt_arr->bkt_data); - pk_delete((void *)bkt_arr->idx_unused, sizeof(unsigned long long) * (bkt_arr->reserved_buckets - 1), bkt_arr->bkt_buckets); - pk_delete((void *)bkt_arr->bucketed_data, sizeof(void *) * (bkt_arr->reserved_buckets - 1), bkt_arr->bkt_buckets); + pk_delete_bkt((void *)bkt_arr->idx_unused, sizeof(unsigned long long) * (bkt_arr->reserved_buckets - 1), bkt_arr->bkt_buckets); + pk_delete_bkt((void *)bkt_arr->bucketed_data, sizeof(void *) * (bkt_arr->reserved_buckets - 1), bkt_arr->bkt_buckets); bkt_arr->idx_unused = new_idx_unused; bkt_arr->bucketed_data = new_data_ptrs; } diff --git a/pkfuncinstr.h b/pkfuncinstr.h index 151c60d..be7e59a 100644 --- a/pkfuncinstr.h +++ b/pkfuncinstr.h @@ -5,8 +5,6 @@ #include <stdio.h> -#define PK_FUNCINSTR_CHILDREN_INCREMENT_COUNT 8 - struct pk_funcinstr; struct pk_funcinstr { void *fn; @@ -60,11 +58,8 @@ void __cyg_profile_func_exit(void* this_fn, void* call_site); #include <threads.h> #include <string.h> -// TODO 2025-06-02 JCB -// There's some speed improvements that can be made here by growing faster. -// OR use some type of bucket -// - might be a good chance to isolate some of the pkmem logic - +#define PK_FUNCINSTR_CHILDREN_START_COUNT 8 +#define PK_FUNCINSTR_CHILDREN_GROW_RATIO 2.0 #define PK_FUNCINSTR_BKT_START_COUNT 64 #define PK_FUNCINSTR_BKT_GROW_RATIO 2.0 #define PK_FUNCINSTR_BKT_DATA_COUNT 0xFFFF @@ -104,7 +99,6 @@ void pk_funcinstr_init() { thrd_mstr.err = stderr; thrd_mstr.r_buckets = PK_FUNCINSTR_BKT_START_COUNT; thrd_mstr.buckets = (struct pk_funcinstr_bkt**)aligned_alloc(alignof(struct pk_funcinstr_bkt *), (sizeof(struct pk_funcinstr_bkt *) * PK_FUNCINSTR_BKT_START_COUNT)); - memset(thrd_mstr.buckets, 0, (sizeof(struct pk_funcinstr_bkt *) * PK_FUNCINSTR_BKT_START_COUNT)); clock_gettime(PK_TMR_CLOCK, &thrd_mstr.reset_time); } @@ -213,7 +207,6 @@ void pk_funcinstr_detect_and_handle_reset() { thrd_mstr.r_buckets *= PK_FUNCINSTR_BKT_GROW_RATIO; struct pk_funcinstr_bkt **buckets = (struct pk_funcinstr_bkt**)aligned_alloc(alignof(void *), sizeof(void *) * thrd_mstr.r_buckets); memcpy(buckets, thrd_mstr.buckets, sizeof(void *) * (thrd_mstr.n_buckets)); - memset((char*)buckets + (sizeof(void *) * (thrd_mstr.n_buckets)), 0, (sizeof(void *) * thrd_mstr.r_buckets) - sizeof(void *) * (thrd_mstr.n_buckets)); free(thrd_mstr.buckets); thrd_mstr.buckets = buckets; mtx_unlock(&thrd_mstr.mtx); @@ -247,11 +240,11 @@ __attribute__((no_instrument_function)) struct pk_funcinstr *pk_funcinstr_create_funcinstr(void *this_fn) { struct pk_funcinstr *funcinstr = &pk_funcinstr_thrd_bkt->data[pk_funcinstr_thrd_bkt->used_count]; pk_funcinstr_thrd_bkt->used_count++; - pk_tmr_start(funcinstr->tmr); funcinstr->fn = this_fn; + pk_tmr_start(funcinstr->tmr); funcinstr->parent = pk_funcinstr_thrd_instr; - funcinstr->children = NULL; funcinstr->first_child = NULL; + funcinstr->children = NULL; funcinstr->n_children = 0; funcinstr->r_children = 0; @@ -261,9 +254,12 @@ struct pk_funcinstr *pk_funcinstr_create_funcinstr(void *this_fn) { pk_funcinstr_thrd_instr->first_child = funcinstr; } else { if (pk_funcinstr_thrd_instr->n_children == pk_funcinstr_thrd_instr->r_children) { - pk_funcinstr_thrd_instr->r_children += PK_FUNCINSTR_CHILDREN_INCREMENT_COUNT; + if (pk_funcinstr_thrd_instr->r_children == 0) { + pk_funcinstr_thrd_instr->r_children = PK_FUNCINSTR_CHILDREN_START_COUNT; + } else { + pk_funcinstr_thrd_instr->r_children *= PK_FUNCINSTR_CHILDREN_GROW_RATIO; + } struct pk_funcinstr **children = (struct pk_funcinstr **)aligned_alloc(alignof(void *), sizeof(void *) * pk_funcinstr_thrd_instr->r_children); - memset((char*)children + (sizeof(void *) * (pk_funcinstr_thrd_instr->n_children)), 0, (sizeof(void *) * pk_funcinstr_thrd_instr->r_children) - sizeof(void *) * (pk_funcinstr_thrd_instr->n_children)); if (pk_funcinstr_thrd_instr->children != NULL) { memcpy(children, pk_funcinstr_thrd_instr->children, sizeof(void *) * pk_funcinstr_thrd_instr->n_children); free(pk_funcinstr_thrd_instr->children); @@ -303,7 +299,9 @@ void __cyg_profile_func_exit(void* this_fn, void* call_site) { if (pk_funcinstr_thrd_instr == NULL) return; // exit called before enter? pk_funcinstr_thrd_bkt->guard_exit++; +#ifdef PK_FUNCINSTR_PRINT Dl_info info; +#endif /* PK_FUNCINSTR_PRINT */ if (this_fn != pk_funcinstr_thrd_instr->fn) { int64_t i = (int64_t)pk_funcinstr_thrd_bkt->used_count - 1; @@ -345,6 +343,7 @@ void __cyg_profile_func_exit(void* this_fn, void* call_site) { } pk_tmr_stop(pk_funcinstr_thrd_instr->tmr); +#ifdef PK_FUNCINSTR_PRINT if (dladdr(this_fn, &info) != 0) { int depth = 0; // TODO track depth in a better way @@ -359,16 +358,15 @@ void __cyg_profile_func_exit(void* this_fn, void* call_site) { demangled = abi::__cxa_demangle(info.dli_sname, NULL, NULL, NULL); #endif } -#ifdef PK_FUNCINSTR_PRINT fprintf(thrd_mstr.out, "[pkfuncinstr] %p %*s %s took %.6f ms\n" ,this_fn ,depth, "" ,demangled != NULL ? demangled : info.dli_sname != NULL ? info.dli_sname : "???" ,pk_tmr_duration_dbl_mili(pk_funcinstr_thrd_instr->tmr) ); -#endif /* PK_FUNCINSTR_PRINT */ if (demangled != NULL) free(demangled); } +#endif /* PK_FUNCINSTR_PRINT */ pk_funcinstr_thrd_bkt->guard_exit=0; pk_funcinstr_thrd_instr = pk_funcinstr_thrd_instr->parent; } |
