summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonathan Bradley <jcb@pikum.xyz>2025-08-27 15:10:45 -0400
committerJonathan Bradley <jcb@pikum.xyz>2025-08-27 15:10:45 -0400
commit9f84a84a33dc5a77ec32ff9ca9fbd84fab0f0ad0 (patch)
tree4465851e8b4c07eb9679d6fa900c26939b315dc0
parentc1c58188cbc1156f2b37d585fa7e7603b3d3acf4 (diff)
pkfuncinstr: perf: alloc, memset
pkbktarr: perf: unnecessary indirection
-rw-r--r--pkbktarr.h28
-rw-r--r--pkfuncinstr.h28
2 files changed, 27 insertions, 29 deletions
diff --git a/pkbktarr.h b/pkbktarr.h
index 87e71ad..d0d2439 100644
--- a/pkbktarr.h
+++ b/pkbktarr.h
@@ -144,10 +144,10 @@ void pk_bkt_arr_init(struct pk_bkt_arr *bkt_arr, unsigned long stride, unsigned
bkt_arr->reserved_buckets = 1;
bkt_arr->stride = stride;
bkt_arr->alignment = alignment;
- bkt_arr->idx_unused = (unsigned long long *)pk_new(sizeof(unsigned long long), alignof(unsigned long long), bkt_buckets);
+ bkt_arr->idx_unused = (unsigned long long *)pk_new_bkt(sizeof(unsigned long long), alignof(unsigned long long), bkt_buckets);
bkt_arr->idx_unused[0] = PK_BKT_ARR_ALL_UNUSED_VAL;
- bkt_arr->bucketed_data = (void **)pk_new(sizeof(void *), alignof(void *), bkt_buckets);
- bkt_arr->bucketed_data[0] = pk_new(stride * limits.i, alignment, bkt_data);
+ bkt_arr->bucketed_data = (void **)pk_new_bkt(sizeof(void *), alignof(void *), bkt_buckets);
+ bkt_arr->bucketed_data[0] = pk_new_bkt(stride * limits.i, alignment, bkt_data);
}
void pk_bkt_arr_clear(struct pk_bkt_arr *bkt_arr) {
@@ -164,8 +164,8 @@ void pk_bkt_arr_clear(struct pk_bkt_arr *bkt_arr) {
void pk_bkt_arr_reserve(struct pk_bkt_arr *bkt_arr, size_t count) {
size_t bucket_count = count / bkt_arr->limits.i;
if (bkt_arr->reserved_buckets >= bucket_count) return;
- unsigned long long *new_idx_unused = (unsigned long long *)pk_new(sizeof(unsigned long long) * bucket_count, alignof(uint64_t), bkt_arr->bkt_buckets);
- void **new_bucketed_data = (void **)pk_new(sizeof(void *) * bucket_count, alignof(void *), bkt_arr->bkt_buckets);
+ unsigned long long *new_idx_unused = (unsigned long long *)pk_new_bkt(sizeof(unsigned long long) * bucket_count, alignof(uint64_t), bkt_arr->bkt_buckets);
+ void **new_bucketed_data = (void **)pk_new_bkt(sizeof(void *) * bucket_count, alignof(void *), bkt_arr->bkt_buckets);
if (bkt_arr->reserved_buckets > 0) {
memcpy(new_idx_unused, bkt_arr->idx_unused, sizeof(unsigned long long) * bkt_arr->reserved_buckets);
memcpy(new_bucketed_data, bkt_arr->bucketed_data, sizeof(void *) * bkt_arr->reserved_buckets);
@@ -174,7 +174,7 @@ void pk_bkt_arr_reserve(struct pk_bkt_arr *bkt_arr, size_t count) {
}
for (size_t i = bkt_arr->reserved_buckets; i < bucket_count; ++i) {
new_idx_unused[i] = PK_BKT_ARR_ALL_UNUSED_VAL;
- new_bucketed_data[i] = pk_new(bkt_arr->stride * bkt_arr->limits.i, bkt_arr->alignment, bkt_arr->bkt_data);
+ new_bucketed_data[i] = pk_new_bkt(bkt_arr->stride * bkt_arr->limits.i, bkt_arr->alignment, bkt_arr->bkt_data);
}
bkt_arr->idx_unused = new_idx_unused;
bkt_arr->bucketed_data = new_bucketed_data;
@@ -227,10 +227,10 @@ void pk_bkt_arr_teardown(struct pk_bkt_arr *bkt_arr)
size_t sz = bkt_arr->limits.i * bkt_arr->stride;
if (bkt_arr->idx_unused == nullptr && bkt_arr->bucketed_data == nullptr) return;
for (b = bkt_arr->reserved_buckets - 1; b > -1; --b) {
- pk_delete(bkt_arr->bucketed_data[b], sz, bkt_arr->bkt_data);
+ pk_delete_bkt(bkt_arr->bucketed_data[b], sz, bkt_arr->bkt_data);
}
- pk_delete((void *)bkt_arr->idx_unused, sizeof(unsigned long long) * (bkt_arr->reserved_buckets), bkt_arr->bkt_buckets);
- pk_delete((void *)bkt_arr->bucketed_data, sizeof(void *) * (bkt_arr->reserved_buckets), bkt_arr->bkt_buckets);
+ pk_delete_bkt((void *)bkt_arr->idx_unused, sizeof(unsigned long long) * (bkt_arr->reserved_buckets), bkt_arr->bkt_buckets);
+ pk_delete_bkt((void *)bkt_arr->bucketed_data, sizeof(void *) * (bkt_arr->reserved_buckets), bkt_arr->bkt_buckets);
memset(bkt_arr, 0, sizeof(struct pk_bkt_arr));
bkt_arr->bkt_buckets = NULL;
bkt_arr->bkt_data = NULL;
@@ -269,18 +269,18 @@ struct pk_bkt_arr_handle pk_bkt_arr_new_handle(struct pk_bkt_arr *bkt_arr)
if (bkt_arr->head_r.b == bkt_arr->reserved_buckets && bkt_arr->head_r.i == 0) {
bkt_arr->reserved_buckets += 1;
- unsigned long long *new_idx_unused = (unsigned long long *)pk_new(sizeof(unsigned long long) * bkt_arr->reserved_buckets, alignof(unsigned long long), bkt_arr->bkt_buckets);
- void **new_data_ptrs = (void **)pk_new(sizeof(void *) * bkt_arr->reserved_buckets, alignof(void *), bkt_arr->bkt_buckets);
+ unsigned long long *new_idx_unused = (unsigned long long *)pk_new_bkt(sizeof(unsigned long long) * bkt_arr->reserved_buckets, alignof(unsigned long long), bkt_arr->bkt_buckets);
+ void **new_data_ptrs = (void **)pk_new_bkt(sizeof(void *) * bkt_arr->reserved_buckets, alignof(void *), bkt_arr->bkt_buckets);
for (b = 0; b < bkt_arr->reserved_buckets - 1; ++b) {
new_idx_unused[b] = bkt_arr->idx_unused[b];
new_data_ptrs[b] = bkt_arr->bucketed_data[b];
}
new_idx_unused[bkt_arr->reserved_buckets - 1] = PK_BKT_ARR_ALL_UNUSED_VAL;
- new_data_ptrs[bkt_arr->reserved_buckets - 1] = pk_new(bkt_arr->stride * bkt_arr->limits.i, bkt_arr->alignment, bkt_arr->bkt_data);
+ new_data_ptrs[bkt_arr->reserved_buckets - 1] = pk_new_bkt(bkt_arr->stride * bkt_arr->limits.i, bkt_arr->alignment, bkt_arr->bkt_data);
- pk_delete((void *)bkt_arr->idx_unused, sizeof(unsigned long long) * (bkt_arr->reserved_buckets - 1), bkt_arr->bkt_buckets);
- pk_delete((void *)bkt_arr->bucketed_data, sizeof(void *) * (bkt_arr->reserved_buckets - 1), bkt_arr->bkt_buckets);
+ pk_delete_bkt((void *)bkt_arr->idx_unused, sizeof(unsigned long long) * (bkt_arr->reserved_buckets - 1), bkt_arr->bkt_buckets);
+ pk_delete_bkt((void *)bkt_arr->bucketed_data, sizeof(void *) * (bkt_arr->reserved_buckets - 1), bkt_arr->bkt_buckets);
bkt_arr->idx_unused = new_idx_unused;
bkt_arr->bucketed_data = new_data_ptrs;
}
diff --git a/pkfuncinstr.h b/pkfuncinstr.h
index 151c60d..be7e59a 100644
--- a/pkfuncinstr.h
+++ b/pkfuncinstr.h
@@ -5,8 +5,6 @@
#include <stdio.h>
-#define PK_FUNCINSTR_CHILDREN_INCREMENT_COUNT 8
-
struct pk_funcinstr;
struct pk_funcinstr {
void *fn;
@@ -60,11 +58,8 @@ void __cyg_profile_func_exit(void* this_fn, void* call_site);
#include <threads.h>
#include <string.h>
-// TODO 2025-06-02 JCB
-// There's some speed improvements that can be made here by growing faster.
-// OR use some type of bucket
-// - might be a good chance to isolate some of the pkmem logic
-
+#define PK_FUNCINSTR_CHILDREN_START_COUNT 8
+#define PK_FUNCINSTR_CHILDREN_GROW_RATIO 2.0
#define PK_FUNCINSTR_BKT_START_COUNT 64
#define PK_FUNCINSTR_BKT_GROW_RATIO 2.0
#define PK_FUNCINSTR_BKT_DATA_COUNT 0xFFFF
@@ -104,7 +99,6 @@ void pk_funcinstr_init() {
thrd_mstr.err = stderr;
thrd_mstr.r_buckets = PK_FUNCINSTR_BKT_START_COUNT;
thrd_mstr.buckets = (struct pk_funcinstr_bkt**)aligned_alloc(alignof(struct pk_funcinstr_bkt *), (sizeof(struct pk_funcinstr_bkt *) * PK_FUNCINSTR_BKT_START_COUNT));
- memset(thrd_mstr.buckets, 0, (sizeof(struct pk_funcinstr_bkt *) * PK_FUNCINSTR_BKT_START_COUNT));
clock_gettime(PK_TMR_CLOCK, &thrd_mstr.reset_time);
}
@@ -213,7 +207,6 @@ void pk_funcinstr_detect_and_handle_reset() {
thrd_mstr.r_buckets *= PK_FUNCINSTR_BKT_GROW_RATIO;
struct pk_funcinstr_bkt **buckets = (struct pk_funcinstr_bkt**)aligned_alloc(alignof(void *), sizeof(void *) * thrd_mstr.r_buckets);
memcpy(buckets, thrd_mstr.buckets, sizeof(void *) * (thrd_mstr.n_buckets));
- memset((char*)buckets + (sizeof(void *) * (thrd_mstr.n_buckets)), 0, (sizeof(void *) * thrd_mstr.r_buckets) - sizeof(void *) * (thrd_mstr.n_buckets));
free(thrd_mstr.buckets);
thrd_mstr.buckets = buckets;
mtx_unlock(&thrd_mstr.mtx);
@@ -247,11 +240,11 @@ __attribute__((no_instrument_function))
struct pk_funcinstr *pk_funcinstr_create_funcinstr(void *this_fn) {
struct pk_funcinstr *funcinstr = &pk_funcinstr_thrd_bkt->data[pk_funcinstr_thrd_bkt->used_count];
pk_funcinstr_thrd_bkt->used_count++;
- pk_tmr_start(funcinstr->tmr);
funcinstr->fn = this_fn;
+ pk_tmr_start(funcinstr->tmr);
funcinstr->parent = pk_funcinstr_thrd_instr;
- funcinstr->children = NULL;
funcinstr->first_child = NULL;
+ funcinstr->children = NULL;
funcinstr->n_children = 0;
funcinstr->r_children = 0;
@@ -261,9 +254,12 @@ struct pk_funcinstr *pk_funcinstr_create_funcinstr(void *this_fn) {
pk_funcinstr_thrd_instr->first_child = funcinstr;
} else {
if (pk_funcinstr_thrd_instr->n_children == pk_funcinstr_thrd_instr->r_children) {
- pk_funcinstr_thrd_instr->r_children += PK_FUNCINSTR_CHILDREN_INCREMENT_COUNT;
+ if (pk_funcinstr_thrd_instr->r_children == 0) {
+ pk_funcinstr_thrd_instr->r_children = PK_FUNCINSTR_CHILDREN_START_COUNT;
+ } else {
+ pk_funcinstr_thrd_instr->r_children *= PK_FUNCINSTR_CHILDREN_GROW_RATIO;
+ }
struct pk_funcinstr **children = (struct pk_funcinstr **)aligned_alloc(alignof(void *), sizeof(void *) * pk_funcinstr_thrd_instr->r_children);
- memset((char*)children + (sizeof(void *) * (pk_funcinstr_thrd_instr->n_children)), 0, (sizeof(void *) * pk_funcinstr_thrd_instr->r_children) - sizeof(void *) * (pk_funcinstr_thrd_instr->n_children));
if (pk_funcinstr_thrd_instr->children != NULL) {
memcpy(children, pk_funcinstr_thrd_instr->children, sizeof(void *) * pk_funcinstr_thrd_instr->n_children);
free(pk_funcinstr_thrd_instr->children);
@@ -303,7 +299,9 @@ void __cyg_profile_func_exit(void* this_fn, void* call_site) {
if (pk_funcinstr_thrd_instr == NULL) return; // exit called before enter?
pk_funcinstr_thrd_bkt->guard_exit++;
+#ifdef PK_FUNCINSTR_PRINT
Dl_info info;
+#endif /* PK_FUNCINSTR_PRINT */
if (this_fn != pk_funcinstr_thrd_instr->fn) {
int64_t i = (int64_t)pk_funcinstr_thrd_bkt->used_count - 1;
@@ -345,6 +343,7 @@ void __cyg_profile_func_exit(void* this_fn, void* call_site) {
}
pk_tmr_stop(pk_funcinstr_thrd_instr->tmr);
+#ifdef PK_FUNCINSTR_PRINT
if (dladdr(this_fn, &info) != 0) {
int depth = 0;
// TODO track depth in a better way
@@ -359,16 +358,15 @@ void __cyg_profile_func_exit(void* this_fn, void* call_site) {
demangled = abi::__cxa_demangle(info.dli_sname, NULL, NULL, NULL);
#endif
}
-#ifdef PK_FUNCINSTR_PRINT
fprintf(thrd_mstr.out, "[pkfuncinstr] %p %*s %s took %.6f ms\n"
,this_fn
,depth, ""
,demangled != NULL ? demangled : info.dli_sname != NULL ? info.dli_sname : "???"
,pk_tmr_duration_dbl_mili(pk_funcinstr_thrd_instr->tmr)
);
-#endif /* PK_FUNCINSTR_PRINT */
if (demangled != NULL) free(demangled);
}
+#endif /* PK_FUNCINSTR_PRINT */
pk_funcinstr_thrd_bkt->guard_exit=0;
pk_funcinstr_thrd_instr = pk_funcinstr_thrd_instr->parent;
}