diff options
Diffstat (limited to 'src/pk.h')
| -rw-r--r-- | src/pk.h | 1208 |
1 files changed, 765 insertions, 443 deletions
@@ -1,7 +1,7 @@ #ifndef PK_SINGLE_HEADER_FILE_H #define PK_SINGLE_HEADER_FILE_H /******************************************************************************* -* PK Single-Header-Library V0.5.2 +* PK Single-Header-Library V0.6.0 * * Author: Jonathan Bradley * Copyright: © 2024-2025 Jonathan Bradley @@ -47,6 +47,27 @@ * type-specific integers, implemented via enums. * ******************************************************************************** +* pktmpl.h: only contains c++ templates, no IMPL. +* +* Provides template structs for trampolines, allowing c-style callbacks with +* capturing lambdas. +* +* Examples: +* ```c++ +* int some_counter = 0; +* using IterCbWrapper = pk_tmpln_1<void, int*, void*>; +* IterCbWrapper cb_wrapper{}; +* cb_wrapper.func = [&some_counter](int *lhs) +* { +* (void)lhs; +* some_counter += 1; +* return; +* }; +* pk_bkt_arr_iterate(&bkt_arr, &IterCbWrapper::invoke, &cb_wrapper); +* assert(some_count == 1); +* ``` +* +******************************************************************************** * pkmem-types.h: def PK_IMPL_MEM_TYPES before including pk.h to enable ad-hoc. * * Provides the types needed by pkmem, as well as a generic pk_handle featuring a @@ -55,20 +76,19 @@ ******************************************************************************** * pkmem.h: def PK_IMPL_MEM before including pk.h to enable ad-hoc. * -* A bucketed memory manager. Allows for the creation and management of up to a -* well-defined number of buckets. +* A bucketed memory manager. Allows for the creation of ad-hoc buckets. +* +* Note: Each created pk_membucket MUST call pk_bucket_destroy(bkt). Memory +* buckets are client managed. * -* Thread safety: Bucket creation and destruction is *not* thread-safe. On the -* other hand, the "pk_new" and "pk_delete" methods *are* thread-safe, but +* Thread safety: "pk_new" and "pk_delete" methods *are* thread-safe, but * thread-safety is implemented per-bucket via a single mutex with long-running * lock times. PRs for a more performant thread-safe strategy are welcome, * complexity and benchmark depending. * * The following definitions (shown with defaults) can be overridden: -* PK_DEFAULT_BUCKET_SIZE 256MB (used when bkt is NULL on first call) +* PK_MEM_DEFAULT_BUCKET_SIZE 256MB (client-convenience only) * PK_MINIMUM_ALIGNMENT 1 -* PK_MAXIMUM_ALIGNMENT 64 -* PK_MAX_BUCKET_COUNT 8 * * For debugging purposes, define the following: * PK_MEMORY_DEBUGGER : enables a tracking system for all allocs and frees to @@ -253,29 +273,25 @@ * ``` * ******************************************************************************** -* pktmpl.h: only contains c++ templates, no IMPL. +* pkbktarr.h: define PK_IMPL_FUNCINSTR before including pk.h to enable ad-hoc. * -* Provides template structs for trampolines, allowing c-style callbacks with -* capturing lambdas. +* Provides function instrumentation. +* +* Note: Currently only supports gcc/g++. +* Note: Currently only prints results. * * Examples: -* ```c++ -* int some_counter = 0; -* using IterCbWrapper = pk_tmpln_1<void, int*, void*>; -* IterCbWrapper cb_wrapper{}; -* cb_wrapper.func = [&some_counter](int *lhs) -* { -* (void)lhs; -* some_counter += 1; -* return; -* }; -* pk_bkt_arr_iterate(&bkt_arr, &IterCbWrapper::invoke, &cb_wrapper); -* assert(some_count == 1); +* ```c +* main() { +* pk_funcinstr_init(); +* ... +* pk_funcinstr_teardown(); +* } * ``` * *******************************************************************************/ -#define PK_VERSION "0.5.2" +#define PK_VERSION "0.6.0" #ifdef PK_IMPL_ALL # ifndef PK_IMPL_MEM_TYPES @@ -302,6 +318,9 @@ # ifndef PK_IMPL_BKTARR # define PK_IMPL_BKTARR # endif +# ifndef PK_IMPL_FUNCINSTR +# define PK_IMPL_FUNCINSTR +# endif #endif #ifndef PK_MACROS_H #define PK_MACROS_H @@ -550,6 +569,41 @@ TypeSafeInt2_H_constexpr(TypeName, Type, Max, PK_CONCAT(TypeName, _T), PK_CONCAT(TypeName, _MAX), PK_CONCAT(TypeName, _T_MAX)) #endif /* PK_MACROS_H */ +#ifndef PK_PKTMPLN_H +#define PK_PKTMPLN_H + +#if defined (__cplusplus) +#include <functional> +template<typename Ret, typename A1, typename B1> +struct pk_tmpln_1 { + using FuncType = std::function<Ret(A1)>; + FuncType func; + static Ret invoke(void *ptr, B1 b1) { + auto *self = static_cast<pk_tmpln_1*>(ptr); + return self->func(reinterpret_cast<A1>(b1)); + } +}; +template<typename Ret, typename A1, typename A2, typename B1, typename B2> +struct pk_tmpln_2 { + using FuncType = std::function<Ret(A1, A2)>; + FuncType func; + static Ret invoke(void *ptr, B1 b1, B2 b2) { + auto *self = static_cast<pk_tmpln_2*>(ptr); + return self->func(reinterpret_cast<A1>(b1), reinterpret_cast<A2>(b2)); + } +}; +template<typename Ret, typename A1, typename A2, typename A3, typename B1, typename B2, typename B3> +struct pk_tmpln_3 { + using FuncType = std::function<Ret(A1, A2, A3)>; + FuncType func; + static Ret invoke(void *ptr, B1 b1, B2 b2, B3 b3) { + auto *self = static_cast<pk_tmpln_3*>(ptr); + return self->func(reinterpret_cast<A1>(b1), reinterpret_cast<A2>(b2), reinterpret_cast<A3>(b3)); + } +}; +#endif + +#endif /* PK_PKTMPLN_H */ #ifndef PK_MEM_TYPES_H #define PK_MEM_TYPES_H @@ -606,6 +660,12 @@ pk_handle_validate_constexpr() struct pk_membucket; +enum PK_MEMBUCKET_FLAGS : uint64_t { + PK_MEMBUCKET_FLAG_NONE = (0), + PK_MEMBUCKET_FLAG_TRANSIENT = (1 << 01l), + PK_MEMBUCKET_FLAG_ALL = (0xFFFFFFFFFFFFFFFF), +}; + #endif /* PK_MEM_TYPES_H */ #ifdef PK_IMPL_MEM_TYPES @@ -631,24 +691,17 @@ pk_handle_validate(const struct pk_handle handle, const struct pk_handle bucketH #include <stdint.h> #include <stdlib.h> -#ifndef PK_DEFAULT_BUCKET_SIZE -# define PK_DEFAULT_BUCKET_SIZE (1ULL * 1024ULL * 1024ULL * 256ULL) -#endif -#ifndef PK_MINIMUM_ALIGNMENT -# define PK_MINIMUM_ALIGNMENT 1 -#endif -#ifndef PK_MAXIMUM_ALIGNMENT -# define PK_MAXIMUM_ALIGNMENT 64 +#ifndef PK_MEM_DEFAULT_BUCKET_SIZE +# define PK_MEM_DEFAULT_BUCKET_SIZE (1ULL * 1024ULL * 1024ULL * 256ULL) #endif -struct pk_membucket* pk_bucket_create(const char* description, int64_t sz, bool transient); -void pk_bucket_destroy(struct pk_membucket* bkt); -void pk_bucket_reset(struct pk_membucket* bkt); - -void pk_memory_debug_print(); -void pk_memory_flush(); -void pk_memory_teardown_all(); -bool pk_memory_is_in_bucket(const void* ptr, const struct pk_membucket* bkt); +size_t pk_mem_bucket_calculate_size(size_t sz, size_t reserved_block_count); +struct pk_membucket* pk_mem_bucket_create(const char* description, int64_t sz, enum PK_MEMBUCKET_FLAGS flags); +void pk_mem_bucket_debug_print(struct pk_membucket *bkt); +void pk_mem_bucket_destroy(struct pk_membucket* bkt); +void pk_mem_bucket_reset(struct pk_membucket* bkt); +void pk_mem_bucket_set_client_mem_bucket(struct pk_membucket *bkt); +bool pk_mem_bucket_ptr_is_in_mem_bucket(const void* ptr, const struct pk_membucket* bkt); void* pk_new_base(size_t sz, size_t alignment); void* pk_new_bkt(size_t sz, size_t alignment, struct pk_membucket* bkt); @@ -660,14 +713,15 @@ void pk_delete(const void* ptr, size_t sz, struct pk_membucket* bkt); #if defined(__cplusplus) #include <type_traits> +#include <new> static inline void stupid_header_warnings_cpp() { (void)std::is_const<void>::value; } template <typename T> inline T* -pk_new(pk_membucket* bucket = nullptr) +pk_new(pk_membucket* bucket = NULL) { - void* ptr = nullptr; + void* ptr = NULL; if (bucket) { ptr = pk_new_bkt(sizeof(T), alignof(T), bucket); } else { @@ -681,15 +735,15 @@ pk_new(pk_membucket* bucket = nullptr) template <typename T> inline T* -pk_new(long count, pk_membucket* bucket = nullptr) +pk_new(long count, pk_membucket* bucket = NULL) { - char* ptr = nullptr; + char* ptr = NULL; if (bucket) { ptr = static_cast<char*>(pk_new_bkt(sizeof(T) * count, alignof(T), bucket)); } else { ptr = static_cast<char*>(pk_new_base(sizeof(T) * count, alignof(T))); } - if (ptr == nullptr) return nullptr; + if (ptr == NULL) return NULL; if IS_CONSTRUCTIBLE(T) { for (long i = 0; i < count; ++i) { new (ptr + (i * sizeof(T))) T{}; @@ -700,7 +754,7 @@ pk_new(long count, pk_membucket* bucket = nullptr) template <typename T> inline void -pk_delete(const T* ptr, pk_membucket* bucket = nullptr) +pk_delete(const T* ptr, pk_membucket* bucket = NULL) { if IS_DESTRUCTIBLE(T) { reinterpret_cast<const T*>(ptr)->~T(); @@ -714,7 +768,7 @@ pk_delete(const T* ptr, pk_membucket* bucket = nullptr) template <typename T> inline void -pk_delete(const T* ptr, long count, pk_membucket* bucket = nullptr) +pk_delete(const T* ptr, long count, pk_membucket* bucket = NULL) { if IS_DESTRUCTIBLE(T) { for (long i = 0; i < count; ++i) { @@ -742,282 +796,264 @@ pk_delete(const T* ptr, long count, pk_membucket* bucket = nullptr) static inline void pkmem_stupid_header_warnings() { (void)stdout; } #if defined(PK_MEMORY_DEBUGGER) -/* - * Note that certain aspects of this expect that you only have one non-transient bucket. - * If you need to track multiple non-transient buckets, these sections will need a refactor. - */ #endif -#ifndef PK_MAX_BUCKET_COUNT -# define PK_MAX_BUCKET_COUNT 8 +#ifndef PK_MINIMUM_ALIGNMENT +# define PK_MINIMUM_ALIGNMENT 1 +#endif +#ifndef PK_MEMORY_DEBUGGER_MAX_BUCKET_COUNT + #define PK_MEMORY_DEBUGGER_MAX_BUCKET_COUNT 16 #endif +#define EXPECTED_PK_MEMBLOCK_SIZE 128 + +#define pk_memblock_blocks_idx(bkt, idx) ((bkt->block_capacity-1)-(idx)) +#define pk_bkt_data(bkt) ((char*)bkt + EXPECTED_PK_MEMBLOCK_SIZE) +#define pk_bkt_head(bkt) ((&bkt->data[0]) + bkt->head) +#define pk_bkt_data_sz(bkt) (size_t)((char*)&bkt->blocks[0] - &bkt->data[0]) + struct pk_memblock { - char* data; + union { + char* data; + void* ptr; + }; size_t size; }; struct pk_membucket { - // the total size of the bucket, `blocks+ptr` + // 00 + mtx_t mtx; + // 40 + // the total size of the bucket, struct+data size_t size; - // the current head of the bucket: byte offset from `ptr`. + // 48 + // the current head of the bucket: byte offset from `data`. // All currently alloc'd data is before this offset size_t head; - // amount of lost bytes in this membucket, hopefully zero - size_t lostBytes; + // 56 + uint32_t block_capacity; + // 60 + uint32_t block_head_l; + // 64 + // this should ALWAYS point to the last block containing unalloced space in bkt + uint32_t block_head_r; + // 68 // the number of active allocations from this bucket - size_t allocs; - // the index of the last empty block. - // Should always point to `pk_memblock{ .data = ptr+head, .size=size-head }` - size_t lastEmptyBlockIndex; - // number of pk_memblocks in the `*blocks` array - size_t maxBlockCount; - // ptr to an array of pk_memblock to track ALL free space between ptr and ptr+sz - struct pk_memblock* blocks; - // starting point for alloc'd data - union { - char* ptr; - void* raw; - }; - const char* description; - mtx_t mtx; - bool transient; -}; - -static struct pk_membucket pk_buckets[PK_MAX_BUCKET_COUNT]; -static size_t pk_bucket_head = 0; - + // -should correlate to blocks that have a sz > 0 + uint32_t alloc_count; + // 72 + struct pk_memblock *blocks; + // 80 + enum PK_MEMBUCKET_FLAGS flags; + // 88 + const char *description; + // 96 #ifdef PK_MEMORY_DEBUGGER -#include <stdatomic.h> -struct pk_dbg_memblock { - struct pk_memblock blk; - struct pk_membucket *bkt; -}; -static struct pk_dbg_memblock debug_all_allocs[1024 * 1024]; -static atomic_size_t debug_alloc_head = 0; -static atomic_bool has_init_debug = false; -static mtx_t debug_alloc_mtx; + struct pk_memblock *debug_blocks; + // 104 + uint32_t debug_head_l; + // 108 + uint32_t debug_head_r; + // 112 + uint32_t debug_block_capacity; + // 116 + char padding[(8*1)+4]; +#else + char padding[(8*4)]; #endif + // 128 + // starting point for alloc'd data + // data[] is illegal in c++ (though it works in gcc/clang, but so does this) + char data[1]; +}; -bool -pk_memory_is_in_bucket(const void* ptr, const struct pk_membucket* bkt) -{ - if (ptr >= bkt->raw && (const char*)ptr < bkt->ptr + bkt->size) return true; - return false; -} +static struct pk_membucket *client_bucket = NULL; -void -pk_memory_debug_print() +size_t +pk_mem_bucket_calculate_size(size_t sz, size_t reserved_block_count) { - size_t i, d; -#ifdef PK_MEMORY_DEBUGGER - size_t count = 0; - size_t dah = debug_alloc_head; -#endif - PK_LOGV_INF("Memory Manager printout:\nBucket count: %li\n", pk_bucket_head); - for (i = 0; i < pk_bucket_head; ++i) { - PK_LOGV_INF("- bucket #%li\n", i); - PK_LOGV_INF("\tdescription: %s\n", pk_buckets[i].description); - PK_LOGV_INF("\tsize: %li\n", pk_buckets[i].size); - PK_LOGV_INF("\thead: %li\n", pk_buckets[i].head); - PK_LOGV_INF("\tlostBytes: %li\n", pk_buckets[i].lostBytes); - PK_LOGV_INF("\tallocs: %li\n", pk_buckets[i].allocs); - PK_LOGV_INF("\tlastEmptyBlockIndex: %li\n", pk_buckets[i].lastEmptyBlockIndex); - PK_LOGV_INF("\tmaxBlockCount: %li\n", pk_buckets[i].maxBlockCount); - PK_LOGV_INF("\tblocks: %p\n", (void *)pk_buckets[i].blocks); - PK_LOGV_INF("\tptr: %p\n", (void *)pk_buckets[i].ptr); - PK_LOGV_INF("\ttransient: %i\n", pk_buckets[i].transient); -#ifdef PK_MEMORY_DEBUGGER - for (d = 0; d < dah; ++d) { - if (debug_all_allocs[d].bkt == &pk_buckets[d] && debug_all_allocs[d].blk.size > 0) { - count += 1; - } - } - PK_LOGV_INF("\tdebug alloc count: %lu\n", count); - PK_LOGV_INF("\tdebug alloc last: %lu\n", dah); -#endif - } + size_t base_size = EXPECTED_PK_MEMBLOCK_SIZE + sz + (sizeof(struct pk_memblock) * reserved_block_count); + // This trick ensures that our array of pk_memblocks at the end is mem-aligned. + // We do, however, still have to do the math when setting the ptr. + // Why? the user may have strict memory requirements and didn't call this function. + return base_size + (64 - (base_size % 64)); } -void -pk_memory_flush() +bool +pk_mem_bucket_ptr_is_in_mem_bucket(const void* ptr, const struct pk_membucket* bkt) { - for (long i = pk_bucket_head - 1; i > -1; --i) { - if (pk_buckets[i].head != 0) break; - if (pk_buckets[i].transient == true) break; - pk_bucket_head--; - if (pk_buckets[i].raw == CAFE_BABE(void)) continue; - pk_bucket_destroy(&pk_buckets[i]); - } + return (ptr >= (void*)bkt && ptr < (void*)pk_bkt_head(bkt)); } void -pk_memory_teardown_all() -{ - for (int64_t i = pk_bucket_head; i > 0; --i) { - if (pk_buckets[i - 1].ptr == nullptr) continue; - if (pk_buckets[i - 1].ptr == CAFE_BABE(char)) continue; - pk_bucket_destroy(&pk_buckets[i - 1]); - } - pk_bucket_head = 0; +pk_mem_bucket_debug_print(struct pk_membucket *bkt) +{ + PK_LOG_INF("pk_membucket details:\n"); + PK_LOGV_INF("\tbkt: %p\n", (void *)bkt); + PK_LOGV_INF("\tdescription: %s\n", bkt->description); + PK_LOGV_INF("\tsize: %lu\n", bkt->size); + PK_LOGV_INF("\thead: %lu\n", bkt->head); + PK_LOGV_INF("\tallocs: %u\n", bkt->alloc_count); + PK_LOGV_INF("\tblock head_l: %u\n", bkt->block_head_l); + PK_LOGV_INF("\tblock head_r: %u\n", bkt->block_head_r); + PK_LOGV_INF("\tflags: %lu\n", bkt->flags); #ifdef PK_MEMORY_DEBUGGER - debug_alloc_head = 0; - mtx_destroy(&debug_alloc_mtx); - has_init_debug = false; + PK_LOGV_INF("\tdebug alloc head_l: %u\n", bkt->debug_head_l); + PK_LOGV_INF("\tdebug alloc head_r: %u\n", bkt->debug_head_r); + PK_LOGV_INF("\tdebug cappacity: %u\n", bkt->debug_block_capacity); #endif } -static int64_t -pk_bucket_create_inner(int64_t sz, bool transient, const char* description) -{ - if (pk_bucket_head >= PK_MAX_BUCKET_COUNT) return -1; -#ifdef PK_MEMORY_DEBUGGER - if (has_init_debug == false) { - has_init_debug = true; - mtx_init(&debug_alloc_mtx, mtx_plain); - memset(debug_all_allocs, 0, sizeof(struct pk_dbg_memblock) * 1024 * 1024); - } -#endif - int64_t blockCount = sz * 0.01; - struct pk_membucket* bkt = &pk_buckets[pk_bucket_head]; +struct pk_membucket* +pk_mem_bucket_create(const char* description, int64_t sz, enum PK_MEMBUCKET_FLAGS flags) +{ + // 512 example: + // [000-127] pk_membucket + // [128-191] 64 bytes of data LOL + // [192-511] 20 pk_memblocks (20 is worst-case, start 16, 4 per 64 bytes) + assert(sz >= 512 && "[pkmem.h] bucket too small to track allocation data"); + struct pk_membucket* bkt = (struct pk_membucket*)aligned_alloc(64, sz); + if (bkt == NULL) return NULL; + mtx_init(&bkt->mtx, mtx_plain); bkt->size = sz; bkt->head = 0; - bkt->lostBytes = 0; - bkt->allocs = 0; - bkt->lastEmptyBlockIndex = 0; - bkt->maxBlockCount = blockCount < 10 ? 10 : blockCount; - bkt->blocks = (struct pk_memblock*)malloc(sz); - mtx_init(&bkt->mtx, mtx_plain); - assert(bkt->blocks != nullptr && "failed to allocate memory"); -#if 1 - memset(bkt->blocks, 0, sz); -#endif - bkt->ptr = ((char*)(bkt->blocks)) + (sizeof(struct pk_memblock) * bkt->maxBlockCount); - size_t misalignment = (size_t)(bkt->ptr) % PK_MAXIMUM_ALIGNMENT; - if (misalignment != 0) { - size_t moreBlocks = misalignment / sizeof(struct pk_memblock); - bkt->maxBlockCount += moreBlocks; - bkt->ptr += (PK_MAXIMUM_ALIGNMENT - misalignment); - } + bkt->block_capacity = 16; + bkt->block_head_l = 0; + bkt->block_head_r = 0; + bkt->alloc_count = 0; + bkt->flags = flags; bkt->description = description; - bkt->transient = transient; - struct pk_memblock* memBlock = (struct pk_memblock*)(bkt->blocks); - memBlock->data = bkt->ptr; - memBlock->size = sz - (sizeof(struct pk_memblock) * bkt->maxBlockCount); - return pk_bucket_head++; -} + char* blocks_addr = (char*)bkt + sz - (sizeof(struct pk_memblock) * bkt->block_capacity); + blocks_addr -= (size_t)blocks_addr % 64; + bkt->blocks = (struct pk_memblock*)blocks_addr; + bkt->block_capacity = (size_t)(((char*)bkt + sz) - blocks_addr) / sizeof(struct pk_memblock); -struct pk_membucket* -pk_bucket_create(const char* description, int64_t sz, bool transient) -{ - int64_t bkt_index = pk_bucket_create_inner(sz, transient, description); - // TODO some of of error handling - if (bkt_index < 0) { return nullptr; } - return &pk_buckets[bkt_index]; + bkt->blocks[pk_memblock_blocks_idx(bkt,0)].size = pk_bkt_data_sz(bkt); + bkt->blocks[pk_memblock_blocks_idx(bkt,0)].ptr = pk_bkt_data(bkt); + +#ifdef PK_MEMORY_DEBUGGER + bkt->debug_head_l = 0; + bkt->debug_head_r = 0; + bkt->debug_block_capacity = 128; + bkt->debug_blocks = (struct pk_memblock*)aligned_alloc(alignof(struct pk_memblock), sizeof(struct pk_memblock) * 128); + bkt->debug_blocks[0].ptr = NULL; + bkt->debug_blocks[0].size = 0; +#endif + + return bkt; } void -pk_bucket_destroy(struct pk_membucket* bkt) +pk_mem_bucket_destroy(struct pk_membucket* bkt) { - size_t i; -#ifdef PK_MEMORY_DEBUGGER - size_t dah; -#endif - for (i = 0; i < pk_bucket_head; ++i) { - if (&pk_buckets[i] == bkt) { - if (pk_bucket_head == i + 1) - pk_bucket_head--; - break; - } - } - free(bkt->blocks); - bkt->size = 0; - bkt->head = 0; - bkt->lostBytes = 0; - bkt->allocs = 0; - bkt->lastEmptyBlockIndex = 0; - bkt->maxBlockCount = 0; - bkt->blocks = CAFE_BABE(struct pk_memblock); - bkt->ptr = CAFE_BABE(char); - bkt->transient = false; - mtx_destroy(&bkt->mtx); + assert(bkt != NULL); #ifdef PK_MEMORY_DEBUGGER - dah = debug_alloc_head; - for (i = dah; i > 0; --i) { - if (debug_all_allocs[i].bkt == bkt) { - debug_all_allocs[i].blk.data = NULL; - debug_all_allocs[i].blk.size = 0u; - } - } + if (bkt->debug_blocks != NULL) free(bkt->debug_blocks); #endif + free(bkt); } void -pk_bucket_reset(struct pk_membucket* bkt) +pk_mem_bucket_reset(struct pk_membucket* bkt) { -#ifdef PK_MEMORY_DEBUGGER - size_t i; -#endif - if (bkt->transient != true) { + if (PK_HAS_FLAG(bkt->flags, PK_MEMBUCKET_FLAG_TRANSIENT) == false) { PK_LOG_ERR("WARNING: pk_bucket_reset called on non-transient pk_membucket\n"); } bkt->head = 0; - bkt->lostBytes = 0; - bkt->allocs = 0; - bkt->lastEmptyBlockIndex = 0; - bkt->blocks->data = bkt->ptr; - bkt->blocks->size = bkt->size - (sizeof(struct pk_memblock) * bkt->maxBlockCount); + bkt->block_capacity = 16; + char* blocks_addr = (char*)bkt + bkt->size - (sizeof(struct pk_memblock) * bkt->block_capacity); + blocks_addr -= (size_t)blocks_addr % 64; + bkt->blocks = (struct pk_memblock*)blocks_addr; + bkt->block_capacity = (size_t)(((char*)bkt + bkt->size) - blocks_addr) / sizeof(struct pk_memblock); + bkt->block_head_l = 0; + bkt->block_head_r = 0; + bkt->alloc_count = 0; + + bkt->blocks[pk_memblock_blocks_idx(bkt,0)].size = pk_bkt_data_sz(bkt); + bkt->blocks[pk_memblock_blocks_idx(bkt,0)].ptr = pk_bkt_data(bkt); + #ifdef PK_MEMORY_DEBUGGER - for (i = debug_alloc_head; i > 0; --i) { - if (debug_all_allocs[i].bkt == bkt) { - debug_all_allocs[i].blk.data = NULL; - debug_all_allocs[i].blk.size = 0u; - } - } + bkt->debug_head_l = 0; + bkt->debug_head_r = 0; + bkt->debug_blocks[0].ptr = NULL; + bkt->debug_blocks[0].size = 0; #endif } +void pk_mem_bucket_set_client_mem_bucket(struct pk_membucket *bkt) { + client_bucket = bkt; +} + void pk_bucket_insert_block(struct pk_membucket* bkt, const struct pk_memblock* block) { - int64_t index = bkt->lastEmptyBlockIndex; - while (index >= 0) { - struct pk_memblock* b = &bkt->blocks[index]; - struct pk_memblock* nb = &bkt->blocks[index + 1]; - if (b->data < block->data) { + // 2025-06-03 JCB + // Note that this function should only be called if we're INSERTING. + // This means that the block will never go at the END of the list - that would be an append. + // It can, however, be placed at the beginning, in which case the entire array shifts. + + struct pk_memblock* new_block = NULL; + struct pk_memblock* old_block = NULL; + size_t i, k; + + // 1. resize if needed + if (bkt->block_head_r+1 == bkt->block_capacity) { + if (bkt->blocks[pk_memblock_blocks_idx(bkt, bkt->block_head_r)].size < sizeof(struct pk_memblock)) { + PK_LOG_ERR("[pkmem.h] bkt out of memory when expanding memory blocks."); + exit(1); + } + // this is all that needs done, arr can just grow like this + bkt->blocks[pk_memblock_blocks_idx(bkt, bkt->block_head_r)].size -= sizeof(struct pk_memblock); + bkt->block_capacity += 1; + bkt->blocks -= 1; + } + + // 2. move all blocks forward until we pass the pointer + // reminder that these blocks are in REVERSE order + for (i = bkt->block_head_r+1; i > 0; --i) { + k = pk_memblock_blocks_idx(bkt, i); + new_block = &bkt->blocks[k]; + old_block = new_block+1; + *new_block = *old_block; + if (old_block->data < block->data) { break; } - nb->data = b->data; - nb->size = b->size; - index -= 1; } - struct pk_memblock *b = &bkt->blocks[index + 1]; - b->data = block->data; - b->size = block->size; - bkt->lastEmptyBlockIndex += 1; + assert(old_block != NULL); + if (i == 0 && old_block != NULL) { + *old_block = *block; + } else { + *new_block = *block; + } + bkt->block_head_r += 1; } void -pk_bucket_collapse_empty_blocks(struct pk_membucket* bkt) -{ - size_t i, ii; - for (ii = bkt->lastEmptyBlockIndex+1; ii > 0; --ii) { - i = ii-1; - struct pk_memblock* block = &bkt->blocks[i]; - if (block->size == 0 && i == bkt->lastEmptyBlockIndex) { - block->data = nullptr; - bkt->lastEmptyBlockIndex -= 1; - continue; - } - if (block->size > 0) { - continue; - } - for (size_t k = i; k < bkt->lastEmptyBlockIndex; ++k) { - bkt->blocks[k].data = bkt->blocks[k + 1].data; - bkt->blocks[k].size = bkt->blocks[k + 1].size; - } - bkt->lastEmptyBlockIndex -= 1; +pk_bucket_collapse_blocks(struct pk_membucket* bkt) +{ + // 1. loop through from (rev_idx)0 to head_r, shifting any blocks that have size 0 + struct pk_memblock* new_block; + struct pk_memblock* old_block; + size_t i, ii, bhr; + // there's an off by one annoynce here + // start with ii = 0 + // if we start with ii = 1, we might subtract from block_head_r when nothing was shifted + for (i = 0, ii = 0, bhr = bkt->block_head_r; (i + ii) <= bhr; ++i) { + new_block = &bkt->blocks[pk_memblock_blocks_idx(bkt, i)]; + if (new_block->size > 0) continue; + do { + old_block = new_block - ii; + if (old_block->size == 0) { + ii+=1; + } else { + break; + } + } while (i + ii <= bhr); + *new_block = *old_block; + old_block->size = 0; + old_block->ptr = NULL; } + bkt->block_head_r -= ii; } void* @@ -1026,130 +1062,122 @@ pk_new_bkt(size_t sz, size_t alignment, struct pk_membucket* bkt) #ifdef PK_MEMORY_FORCE_MALLOC return malloc(sz); #endif - if (sz == 0) return nullptr; - if (bkt == nullptr) return nullptr; + if (sz == 0) return NULL; + if (bkt == NULL) return NULL; // TODO some type of error handling - if ((bkt->size - bkt->head) < (sz + alignment - 1)) return nullptr; - size_t i; + if ((bkt->size - bkt->head) < (sz + alignment - 1)) return NULL; + size_t i, k; size_t calculatedAlignment = alignment < PK_MINIMUM_ALIGNMENT ? PK_MINIMUM_ALIGNMENT : alignment; size_t misalignment = 0; - struct pk_memblock* prevBlock = nullptr; - struct pk_memblock* block = nullptr; - struct pk_memblock* nextBlock = nullptr; - void* data = nullptr; + struct pk_memblock tmp_blk; + struct pk_memblock* block = NULL; + void* data = NULL; mtx_lock(&bkt->mtx); - for (i = 0; i <= bkt->lastEmptyBlockIndex; ++i) { - struct pk_memblock* blk = &bkt->blocks[i]; - misalignment = (size_t)(blk->data) % calculatedAlignment; + + // find block + for (i = 0; i <= bkt->block_head_r; ++i) { + k = pk_memblock_blocks_idx(bkt, i); + tmp_blk = bkt->blocks[k]; + misalignment = (size_t)(tmp_blk.data) % calculatedAlignment; misalignment = (calculatedAlignment - misalignment) % calculatedAlignment; - if (blk->size >= sz + misalignment) { - block = blk; - if (i < bkt->lastEmptyBlockIndex && bkt->blocks[i + 1].data == block->data + block->size) { - nextBlock = &bkt->blocks[i + 1]; - } - if (i > 0 && i != bkt->lastEmptyBlockIndex && (bkt->blocks[i-1].data + bkt->blocks[i-1].size) == block->data) { - prevBlock = &bkt->blocks[i - 1]; - } - break; + if (tmp_blk.size < sz + misalignment) { + continue; } + block = &bkt->blocks[k]; + break; } - if (block == nullptr) { + if (block == NULL) { mtx_unlock(&bkt->mtx); - assert(block != nullptr && "memory corruption: not enough space in chosen bkt"); + assert(block != NULL && "memory corruption: not enough space in chosen bkt"); } data = block->data + misalignment; #ifdef PK_MEMORY_DEBUGGER - bool handled = bkt->transient; - struct pk_dbg_memblock* mb = nullptr; - if (handled == false) { - for (i = 0; i < debug_alloc_head; ++i) { - mb = &debug_all_allocs[i]; - if (mb->bkt != NULL) continue; - assert((mb->blk.size == 0 || (void*)(mb->blk.data) != data) && "mem address alloc'd twice!"); - if (mb->blk.size == 0) { - mb->blk.data = (char*)(data); - mb->blk.size = sz; - mb->bkt = bkt; - handled = true; - break; + size_t ii; + if (PK_HAS_FLAG(bkt->flags, PK_MEMBUCKET_FLAG_TRANSIENT) == false) { + for (i = 0; i < bkt->debug_head_r; ++i) { + assert((bkt->debug_blocks[i].size == 0 || (void*)(bkt->debug_blocks[i].data) != data) && "mem address alloc'd twice!"); + } + i = bkt->debug_head_l; + if (bkt->debug_head_l == bkt->debug_head_r) { + bkt->debug_head_l++; + bkt->debug_head_r++; + + if (bkt->debug_head_r == bkt->debug_block_capacity) { + struct pk_memblock *debug_blocks; + debug_blocks = (struct pk_memblock*)aligned_alloc(alignof(struct pk_memblock), sizeof(struct pk_memblock) * (bkt->debug_block_capacity + 128)); + assert(debug_blocks != NULL); + memcpy(debug_blocks, bkt->debug_blocks, sizeof(struct pk_memblock) * bkt->debug_block_capacity); + free(bkt->debug_blocks); + bkt->debug_blocks = debug_blocks; + bkt->debug_block_capacity += 128; } + + bkt->debug_blocks[bkt->debug_head_r].ptr = NULL; + bkt->debug_blocks[bkt->debug_head_r].size = 0; + + } else { + // 2025-06-05 JCB + // This intentionally looks at debug_head_r, which could potentially + // be uninitialized. I added some logic elsewhere to ensure that + // whenever debug_head_r is incremented, we set the related block + // to NULL/0 so that this will catch size==0. + // I was experiencing an issue where in testing it was initialized to + // NULL/0, but then in a client application it was garbage data. + for (ii = bkt->debug_head_l+1; ii <= bkt->debug_head_r; ++ii) { + if (bkt->debug_blocks[ii].size == 0) { + bkt->debug_head_l = ii; + break; + } + } + assert(ii != bkt->debug_head_r+1); } - } - if (handled == false) { - mtx_lock(&debug_alloc_mtx); - i = debug_alloc_head++; - mtx_unlock(&debug_alloc_mtx); - debug_all_allocs[i].blk.data = (char*)data; - debug_all_allocs[i].blk.size = sz; - debug_all_allocs[i].bkt = bkt; + assert(bkt->debug_head_l <= bkt->debug_head_r); + bkt->debug_blocks[i].data = (char*)data; + bkt->debug_blocks[i].size = sz; } #endif - int64_t afterSize = block->size - (misalignment + sz); - if (block->data == bkt->ptr + bkt->head) { + if (block->data == pk_bkt_head(bkt)) { bkt->head += (sz + misalignment); } - if (afterSize > 0 && nextBlock == nullptr) { - struct pk_memblock newBlock; - memset(&newBlock, 0, sizeof(struct pk_memblock)); - newBlock.data = block->data + misalignment + sz; - newBlock.size = afterSize; - pk_bucket_insert_block(bkt, &newBlock); - } - if (prevBlock == nullptr && nextBlock == nullptr) { - block->size = misalignment; - } else if (nextBlock != nullptr) { - block->size = misalignment; - nextBlock->data -= afterSize; - nextBlock->size += afterSize; - } else if (prevBlock != nullptr) { - prevBlock->size += misalignment; - block->data += misalignment + sz; - block->size = 0; // if you make it here, afterSize has already been handled - } else { - assert(true == false && "this shouldn't happen"); + + tmp_blk.data = block->data; + tmp_blk.size = misalignment; + block->data += misalignment + sz; + block->size -= misalignment + sz; + + if (tmp_blk.size > 0) { + pk_bucket_insert_block(bkt, &tmp_blk); } - bkt->allocs++; - assert(data >= bkt->raw && "allocated data is before bucket data"); - assert((char*)data <= bkt->ptr + bkt->size && "allocated data is after bucket data"); - pk_bucket_collapse_empty_blocks(bkt); + pk_bucket_collapse_blocks(bkt); + + bkt->alloc_count++; + assert(data >= (void*)pk_bkt_data(bkt) && "allocated data is before bucket data"); + assert((char*)data <= pk_bkt_data(bkt) + bkt->size && "allocated data is after bucket data"); #ifdef PK_MEMORY_DEBUGGER - if (!bkt->transient) { + if (PK_HAS_FLAG(bkt->flags, PK_MEMBUCKET_FLAG_TRANSIENT) == false) { + size_t k; int64_t debug_tracked_alloc_size = 0; - int64_t debug_bucket_alloc_size = bkt->size - (sizeof(struct pk_memblock) * bkt->maxBlockCount); - for (i = 0; i < debug_alloc_head; ++i) { - if (debug_all_allocs[i].bkt != bkt) continue; - debug_tracked_alloc_size += debug_all_allocs[i].blk.size; + int64_t debug_bucket_alloc_size = pk_bkt_data_sz(bkt); + for (i = 0; i < bkt->debug_head_r; ++i) { + debug_tracked_alloc_size += bkt->debug_blocks[i].size; } - for (i = 0; i <= bkt->lastEmptyBlockIndex; ++i) { - debug_bucket_alloc_size -= bkt->blocks[i].size; + for (i = 0; i <= bkt->block_head_r; ++i) { + k = pk_memblock_blocks_idx(bkt, i); + debug_bucket_alloc_size -= bkt->blocks[k].size; } assert(debug_tracked_alloc_size == debug_bucket_alloc_size && "allocation size mismatch!"); } #endif mtx_unlock(&bkt->mtx); + memset(data, 0, sz); return data; } void* pk_new_base(size_t sz, size_t alignment) { - struct pk_membucket* bkt = nullptr; - for (size_t i = 0; i < pk_bucket_head; ++i) { - if (pk_buckets[i].transient == true) { - continue; - } - if (pk_buckets[i].size - pk_buckets[i].head < sz + (alignment - 1)) { - continue; - } - bkt = &pk_buckets[i]; - break; - } - if (bkt == nullptr) { - int64_t bkt_index = pk_bucket_create_inner(PK_DEFAULT_BUCKET_SIZE, false, "pk_bucket internally created"); - // TODO some of of error handling - if (bkt_index >= 0) bkt = &pk_buckets[bkt_index]; - } - return pk_new_bkt(sz, alignment, bkt); + if (client_bucket == NULL) return NULL; + return pk_new_bkt(sz, alignment, client_bucket); } void* @@ -1163,97 +1191,121 @@ void pk_delete_bkt(const void* ptr, size_t sz, struct pk_membucket* bkt) { #ifdef PK_MEMORY_FORCE_MALLOC +#if defined(__cplusplus) return std::free(const_cast<void*>(ptr)); +#else + return free((void*)ptr); #endif - size_t i; +#endif + size_t i, k; mtx_lock(&bkt->mtx); - assert(bkt->allocs > 0); - assert(ptr >= bkt->raw && (char*)ptr < bkt->ptr + bkt->size && "pointer not in memory bucket range"); + assert(bkt->alloc_count > 0); + assert(pk_mem_bucket_ptr_is_in_mem_bucket(ptr, bkt) && "pointer not in memory bucket range"); assert(sz > 0 && "attempted to free pointer of size 0"); #ifdef PK_MEMORY_DEBUGGER - size_t ii; - bool found = bkt->transient; - mtx_lock(&debug_alloc_mtx); + bool found = PK_HAS_FLAG(bkt->flags, PK_MEMBUCKET_FLAG_TRANSIENT); + struct pk_memblock *debug_memblocks = bkt->debug_blocks; + struct pk_memblock *mb; if (found == false) { - for (ii = debug_alloc_head; ii > 0; --ii) { - i = ii-1; - struct pk_dbg_memblock* mb = &debug_all_allocs[i]; - if (mb->bkt != bkt) continue; - if (mb->blk.size == 0) continue; - if ((void*)(mb->blk.data) == ptr) { - assert(mb->blk.size == sz && "[pkmem.h] incorrect free size"); - mb->blk.size = 0; - mb->bkt = NULL; + for (i = bkt->debug_head_r+1; i > 0; --i) { + mb = &debug_memblocks[i-1]; + if (mb->size == 0) continue; + if ((void*)(mb->ptr) == ptr) { + assert(mb->size == sz && "[pkmem.h] incorrect free size"); + mb->ptr = NULL; + mb->size = 0; found = true; - if (i == (debug_alloc_head - 1)) { - debug_alloc_head--; + if (i <= bkt->debug_head_l) { + bkt->debug_head_l = i-1; } + if (i == bkt->debug_head_r+1) { + if (bkt->debug_head_l == bkt->debug_head_r) { + bkt->debug_head_l--; + } + bkt->debug_head_r--; + } + assert(bkt->debug_head_l <= bkt->debug_head_r); break; } } } - mtx_unlock(&debug_alloc_mtx); assert(found && "[pkmem.h] double free or invalid ptr"); #endif - bkt->allocs--; - if (bkt->allocs == 0) { + bkt->alloc_count--; + if (bkt->alloc_count == 0) { bkt->head = 0; - bkt->lastEmptyBlockIndex = 0; - bkt->blocks[0].data = bkt->ptr; - bkt->blocks[0].size = bkt->size - (sizeof(struct pk_memblock) * bkt->maxBlockCount); + bkt->block_head_l = 0; + bkt->block_head_r = 0; + bkt->blocks[0].data = pk_bkt_data(bkt); + bkt->blocks[0].size = pk_bkt_data_sz(bkt); +#ifdef PK_MEMORY_DEBUGGER + bkt->debug_head_l = 0; + bkt->debug_head_r = 0; + bkt->debug_blocks[0].data = NULL; + bkt->debug_blocks[0].size = 0; +#endif mtx_unlock(&bkt->mtx); return; } char* afterPtr = ((char*)(ptr))+sz; - struct pk_memblock* beforeBlk = nullptr; - struct pk_memblock* afterBlk = nullptr; - for (i = bkt->lastEmptyBlockIndex; i > 0; --i) { - if (bkt->blocks[i-1].data + bkt->blocks[i-1].size == ptr) { - beforeBlk = &bkt->blocks[i-1]; + struct pk_memblock* tmp_blk = NULL; + struct pk_memblock* beforeBlk = NULL; + struct pk_memblock* afterBlk = NULL; + for (i = bkt->block_head_r+1; i > 0 ; --i) { + k = pk_memblock_blocks_idx(bkt, i-2); + tmp_blk = &bkt->blocks[k]; + if (tmp_blk->data + tmp_blk->size == ptr) { + beforeBlk = tmp_blk; } - if (bkt->blocks[i].data == afterPtr) { - afterBlk = &bkt->blocks[i]; + tmp_blk -= 1; + if (i <= bkt->block_head_r+1 && tmp_blk->data == afterPtr) { + afterBlk = tmp_blk; break; } - if (bkt->blocks[i-1].data < (char*)ptr) { + tmp_blk += 1; + if (tmp_blk->data < (char*)ptr) { break; } } - if (ptr == bkt->ptr && afterBlk == nullptr && bkt->blocks[0].data == afterPtr) { - afterBlk = &bkt->blocks[0]; + if (ptr == &bkt->data[0] && afterBlk == NULL && bkt->blocks[pk_memblock_blocks_idx(bkt, 0)].data == afterPtr) { + afterBlk = &bkt->blocks[pk_memblock_blocks_idx(bkt, 0)]; } - if (afterBlk != nullptr && afterBlk->data == bkt->ptr + bkt->head) { + if (afterBlk != NULL && afterBlk->data == pk_bkt_head(bkt)) { bkt->head -= sz; - if (beforeBlk != nullptr) { + if (beforeBlk != NULL) { bkt->head -= beforeBlk->size; } } - if (beforeBlk == nullptr && afterBlk == nullptr) { + if (beforeBlk == NULL && afterBlk == NULL) { struct pk_memblock newBlock; memset(&newBlock, 0, sizeof(struct pk_memblock)); newBlock.data = (char*)ptr; newBlock.size = sz; pk_bucket_insert_block(bkt, &newBlock); - } else if (beforeBlk != nullptr && afterBlk != nullptr) { + } else if (beforeBlk != NULL && afterBlk != NULL) { beforeBlk->size += sz + afterBlk->size; + if (beforeBlk->data == pk_bkt_head(bkt)) { + bkt->block_head_r--; + } afterBlk->size = 0; - } else if (beforeBlk != nullptr) { + afterBlk->data = NULL; + } else if (beforeBlk != NULL) { beforeBlk->size += sz; - } else if (afterBlk != nullptr) { + } else if (afterBlk != NULL) { afterBlk->data -= sz; afterBlk->size += sz; } - pk_bucket_collapse_empty_blocks(bkt); + pk_bucket_collapse_blocks(bkt); #ifdef PK_MEMORY_DEBUGGER - if (!bkt->transient) { + if (PK_HAS_FLAG(bkt->flags, PK_MEMBUCKET_FLAG_TRANSIENT) == false) { int64_t debug_tracked_alloc_size = 0; - int64_t debug_bucket_alloc_size = bkt->size - (sizeof(struct pk_memblock) * bkt->maxBlockCount); - for (i = 0; i < debug_alloc_head; ++i) { - if (debug_all_allocs[i].bkt != bkt) continue; - debug_tracked_alloc_size += debug_all_allocs[i].blk.size; + int64_t debug_bucket_alloc_size = pk_bkt_data_sz(bkt); + for (i = 0; i < bkt->debug_head_r; ++i) { + debug_tracked_alloc_size += bkt->debug_blocks[i].size; } - for (i = 0; i <= bkt->lastEmptyBlockIndex; ++i) { - debug_bucket_alloc_size -= bkt->blocks[i].size; + for (i = 0; i <= bkt->block_head_r; ++i) { + k = pk_memblock_blocks_idx(bkt, i); + debug_bucket_alloc_size -= bkt->blocks[k].size; } assert(debug_tracked_alloc_size == debug_bucket_alloc_size && "allocation size mismatch!"); } @@ -1264,13 +1316,7 @@ pk_delete_bkt(const void* ptr, size_t sz, struct pk_membucket* bkt) void pk_delete_base(const void* ptr, size_t sz) { - struct pk_membucket* bkt = nullptr; - for (size_t i = 0; i < pk_bucket_head; ++i) { - bkt = &pk_buckets[i]; - if (ptr >= bkt->raw && (char*)ptr < bkt->ptr + bkt->size) break; - } - assert(bkt != nullptr && "failed to determine correct memory bucket"); - pk_delete_bkt(ptr, sz, bkt); + pk_delete_bkt(ptr, sz, client_bucket); } void @@ -1795,6 +1841,8 @@ void pk_arr_append_t(pk_arr_t<T> *arr, const T &item) { #ifdef PK_IMPL_ARR +#include <string.h> + #ifndef PK_ARR_GROW_RATIO #define PK_ARR_GROW_RATIO 1.5 #endif @@ -2613,18 +2661,17 @@ struct pk_bkt_arr_handle pk_bkt_arr_handle_decrement(struct pk_bkt_arr *arr, str #if defined (__cplusplus) #include <assert.h> -#include <future> template<typename T> struct pk_bkt_arr_t : public pk_bkt_arr { pk_bkt_arr_t(); pk_bkt_arr_t(struct pk_bkt_arr_handle limits, struct pk_membucket *bkt_buckets = nullptr, struct pk_membucket *bkt_data = nullptr); ~pk_bkt_arr_t(); T &operator[](struct pk_bkt_arr_handle); + using FN_Iter = pk_tmpln_1<void, T*, void*>; + using FN_Find = pk_tmpln_2<bool, const T*, const T*, const void*, const void*>; }; template<typename T> -pk_bkt_arr_t<T>::pk_bkt_arr_t() { - pk_bkt_arr_init(this, sizeof(T), alignof(T), {PK_BKT_ARR_HANDLE_B_MAX, PK_BKT_ARR_HANDLE_I_MAX}, nullptr, nullptr); -} +pk_bkt_arr_t<T>::pk_bkt_arr_t() { } template<typename T> pk_bkt_arr_t<T>::pk_bkt_arr_t(struct pk_bkt_arr_handle limits, struct pk_membucket *bkt_buckets, struct pk_membucket *bkt_data) { pk_bkt_arr_init(this, sizeof(T), alignof(T), limits, bkt_buckets, bkt_data); @@ -2856,39 +2903,314 @@ struct pk_bkt_arr_handle pk_bkt_arr_handle_decrement(struct pk_bkt_arr *arr, str } #endif /* PK_IMPL_BKTARR */ -#ifndef PK_PKTMPLN_H -#define PK_PKTMPLN_H +#ifndef PK_PKFUNCINSTR_H +#define PK_PKFUNCINSTR_H -#if defined (__cplusplus) -#include <functional> -template<typename Ret, typename A1, typename B1> -struct pk_tmpln_1 { - using FuncType = std::function<Ret(A1)>; - FuncType func; - static Ret invoke(void *ptr, B1 b1) { - auto *self = static_cast<pk_tmpln_1*>(ptr); - return self->func(reinterpret_cast<A1>(b1)); - } + +#define PK_FUNCINSTR_CHILDREN_INCREMENT_COUNT 8 + +struct pk_funcinstr; +struct pk_funcinstr { + void *fn; + struct pk_tmr tmr; + struct pk_funcinstr *parent; + struct pk_funcinstr *first_child; + struct pk_funcinstr **children; + size_t n_children; + size_t r_children; }; -template<typename Ret, typename A1, typename A2, typename B1, typename B2> -struct pk_tmpln_2 { - using FuncType = std::function<Ret(A1, A2)>; - FuncType func; - static Ret invoke(void *ptr, B1 b1, B2 b2) { - auto *self = static_cast<pk_tmpln_2*>(ptr); - return self->func(reinterpret_cast<A1>(b1), reinterpret_cast<A2>(b2)); - } + +void pk_funcinstr_init(); +void pk_funcinstr_teardown(); + +#if defined(__cplusplus) +extern "C" { +#endif + +#if defined(__clang__) +// clang +#elif defined(__GNUC__) || defined(__GNUG__) + +void __cyg_profile_func_enter(void* this_fn, void* call_site); +void __cyg_profile_func_exit(void* this_fn, void* call_site); + +#else +// other +#endif + +#if defined(__cplusplus) +} // extern "C" +#endif + +#endif /* PK_PKFUNCINSTR_H */ +#if defined(PK_IMPL_FUNCINSTR) + +#include <assert.h> +#include <stdio.h> +#include <threads.h> +#include <string.h> + +// TODO 2025-06-02 JCB +// There's some speed improvements that can be made here by growing faster. +// OR use some type of bucket +// - might be a good chance to isolate some of the pkmem logic + +#define PK_FUNCINSTR_BKT_START_COUNT 4 +#define PK_FUNCINSTR_BKT_GROW_AMOUNT 4 +#define PK_FUNCINSTR_BKT_DATA_COUNT 255 +struct pk_funcinstr_bkt { + uint8_t used_count; + uint8_t guard_enter; + uint8_t guard_exit; + struct timespec reset_time; + struct pk_funcinstr data[PK_FUNCINSTR_BKT_DATA_COUNT+1]; }; -template<typename Ret, typename A1, typename A2, typename A3, typename B1, typename B2, typename B3> -struct pk_tmpln_3 { - using FuncType = std::function<Ret(A1, A2, A3)>; - FuncType func; - static Ret invoke(void *ptr, B1 b1, B2 b2, B3 b3) { - auto *self = static_cast<pk_tmpln_3*>(ptr); - return self->func(reinterpret_cast<A1>(b1), reinterpret_cast<A2>(b2), reinterpret_cast<A3>(b3)); - } +struct pk_funcinstr_mstr { + mtx_t mtx; + struct timespec reset_time; + struct pk_funcinstr_bkt **buckets; + size_t r_buckets; + size_t n_buckets; }; +// if NULL, get a new bucket (or alloc if full). if !NULL, existing thread +static thread_local struct pk_funcinstr_bkt *pk_funcinstr_thrd_bkt = NULL; +// last function call (should be NULL or parent of current) +static thread_local struct pk_funcinstr *pk_funcinstr_thrd_instr = NULL; +static struct pk_funcinstr_mstr thrd_mstr; + +__attribute__((no_instrument_function)) +void pk_funcinstr_init() { + assert(thrd_mstr.reset_time.tv_sec == 0); + assert(thrd_mstr.reset_time.tv_nsec == 0); + assert(thrd_mstr.buckets == NULL); + assert(thrd_mstr.r_buckets == 0); + assert(thrd_mstr.n_buckets == 0); + mtx_init(&thrd_mstr.mtx, mtx_plain); + thrd_mstr.r_buckets = PK_FUNCINSTR_BKT_START_COUNT; + thrd_mstr.buckets = (struct pk_funcinstr_bkt**)aligned_alloc(alignof(struct pk_funcinstr_bkt *), (sizeof(struct pk_funcinstr_bkt *) * PK_FUNCINSTR_BKT_START_COUNT)); + memset(thrd_mstr.buckets, 0, (sizeof(struct pk_funcinstr_bkt *) * PK_FUNCINSTR_BKT_START_COUNT)); + clock_gettime(PK_TMR_CLOCK, &thrd_mstr.reset_time); +} + +__attribute__((no_instrument_function)) +void pk_funcinstr_teardown() { + size_t i, k; + mtx_lock(&thrd_mstr.mtx); + for (i = 0; i < thrd_mstr.n_buckets; ++i) { + struct pk_funcinstr_bkt *bkt = thrd_mstr.buckets[i]; + for (k = 0; k < bkt->used_count; ++k) { + free(bkt->data[k].children); + } + } + free(thrd_mstr.buckets); + thrd_mstr.reset_time.tv_sec = 0; + thrd_mstr.reset_time.tv_nsec = 0; + thrd_mstr.buckets = NULL; + thrd_mstr.r_buckets = 0; + thrd_mstr.n_buckets = 0; + mtx_unlock(&thrd_mstr.mtx); + mtx_destroy(&thrd_mstr.mtx); +} + +#if defined(__clang__) +// TODO clang XRay +// Come up with pk macros since XRay requires attributes to instrument? +#elif defined(__GNUC__) || defined(__GNUG__) + +#ifndef __USE_GNU + #define __USE_GNU #endif +#if defined(__cplusplus) +#include <cxxabi.h> +#endif +#include <dlfcn.h> +#include <link.h> +#include <string.h> -#endif /* PK_PKTMPLN_H */ +__attribute__((no_instrument_function)) +bool pk_funcinstr_detect_not_initialized() { + if (thrd_mstr.buckets == NULL) return true; + if (thrd_mstr.r_buckets == 0) return true; + return false; +} + +__attribute__((no_instrument_function)) +void pk_funcinstr_detect_and_handle_reset() { + bool should_hard_reset = false; + bool should_reset = pk_funcinstr_thrd_bkt == NULL; + if (pk_funcinstr_thrd_bkt != NULL) { + should_reset = pk_funcinstr_thrd_bkt->used_count == PK_FUNCINSTR_BKT_DATA_COUNT; + should_hard_reset = thrd_mstr.reset_time.tv_sec > pk_funcinstr_thrd_bkt->reset_time.tv_sec; + should_hard_reset = should_hard_reset || (thrd_mstr.reset_time.tv_sec == pk_funcinstr_thrd_bkt->reset_time.tv_sec && thrd_mstr.reset_time.tv_nsec > pk_funcinstr_thrd_bkt->reset_time.tv_nsec); + } + if (should_hard_reset) { + pk_funcinstr_thrd_bkt = NULL; + pk_funcinstr_thrd_instr = NULL; + should_reset = true; + } + if (should_reset) { + if (thrd_mstr.n_buckets == thrd_mstr.r_buckets) { + mtx_lock(&thrd_mstr.mtx); + thrd_mstr.r_buckets += PK_FUNCINSTR_BKT_GROW_AMOUNT; + struct pk_funcinstr_bkt **buckets = (struct pk_funcinstr_bkt**)aligned_alloc(alignof(void *), sizeof(void *) * thrd_mstr.r_buckets); + memcpy(buckets, thrd_mstr.buckets, sizeof(void *) * (thrd_mstr.n_buckets)); + memset((char*)buckets + (sizeof(void *) * (thrd_mstr.n_buckets)), 0, (sizeof(void *) * thrd_mstr.r_buckets) - sizeof(void *) * (thrd_mstr.n_buckets)); + free(thrd_mstr.buckets); + thrd_mstr.buckets = buckets; + mtx_unlock(&thrd_mstr.mtx); + } + struct pk_funcinstr_bkt *bkt = (struct pk_funcinstr_bkt *)aligned_alloc(alignof(struct pk_funcinstr_bkt), sizeof(struct pk_funcinstr_bkt)); + bkt->used_count = 0; + bkt->guard_enter = 0; + bkt->guard_exit = 0; + bkt->reset_time.tv_sec = 0; + bkt->reset_time.tv_nsec = 0; + if (pk_funcinstr_thrd_bkt != NULL) { + pk_funcinstr_thrd_bkt->guard_enter = 0; + pk_funcinstr_thrd_bkt->guard_exit = 0; + } + pk_funcinstr_thrd_bkt = bkt; + mtx_lock(&thrd_mstr.mtx); + thrd_mstr.buckets[thrd_mstr.n_buckets++] = bkt; + mtx_unlock(&thrd_mstr.mtx); + clock_gettime(PK_TMR_CLOCK, &pk_funcinstr_thrd_bkt->reset_time); + } +} + +__attribute__((no_instrument_function)) +bool pk_funcinstr_should_early_exit() { + if (pk_funcinstr_thrd_bkt->guard_enter != 0) return true; + if (pk_funcinstr_thrd_bkt->guard_exit != 0) return true; + return false; +} + +__attribute__((no_instrument_function)) +struct pk_funcinstr *pk_funcinstr_create_funcinstr(void *this_fn) { + struct pk_funcinstr *funcinstr = &pk_funcinstr_thrd_bkt->data[pk_funcinstr_thrd_bkt->used_count]; + pk_funcinstr_thrd_bkt->used_count++; + pk_tmr_start(funcinstr->tmr); + funcinstr->fn = this_fn; + funcinstr->parent = pk_funcinstr_thrd_instr; + funcinstr->children = NULL; + funcinstr->first_child = NULL; + funcinstr->n_children = 0; + funcinstr->r_children = 0; + + if (pk_funcinstr_thrd_instr != NULL) { + if (pk_funcinstr_thrd_instr->first_child == NULL) { + // avoid an malloc if n_children will only == 1 + pk_funcinstr_thrd_instr->first_child = funcinstr; + } else { + if (pk_funcinstr_thrd_instr->n_children == pk_funcinstr_thrd_instr->r_children) { + pk_funcinstr_thrd_instr->r_children += PK_FUNCINSTR_CHILDREN_INCREMENT_COUNT; + struct pk_funcinstr **children = (struct pk_funcinstr **)aligned_alloc(alignof(void *), sizeof(void *) * pk_funcinstr_thrd_instr->r_children); + memset((char*)children + (sizeof(void *) * (pk_funcinstr_thrd_instr->n_children)), 0, (sizeof(void *) * pk_funcinstr_thrd_instr->r_children) - sizeof(void *) * (pk_funcinstr_thrd_instr->n_children)); + if (pk_funcinstr_thrd_instr->children != NULL) { + memcpy(children, pk_funcinstr_thrd_instr->children, sizeof(void *) * pk_funcinstr_thrd_instr->n_children); + free(pk_funcinstr_thrd_instr->children); + } + pk_funcinstr_thrd_instr->children = children; + if (pk_funcinstr_thrd_instr->n_children == 0) { + pk_funcinstr_thrd_instr->children[0] = pk_funcinstr_thrd_instr->first_child; + pk_funcinstr_thrd_instr->n_children++; + } + } + pk_funcinstr_thrd_instr->children[pk_funcinstr_thrd_instr->n_children] = funcinstr; + pk_funcinstr_thrd_instr->n_children++; + } + } + return funcinstr; +} + +__attribute__((no_instrument_function)) +void __cyg_profile_func_enter(void* this_fn, void* call_site) { + (void)call_site; + if (pk_funcinstr_detect_not_initialized()) return; + pk_funcinstr_detect_and_handle_reset(); + if (pk_funcinstr_should_early_exit()) return; + pk_funcinstr_thrd_bkt->guard_enter++; + + pk_funcinstr_thrd_instr = pk_funcinstr_create_funcinstr(this_fn); + + pk_funcinstr_thrd_bkt->guard_enter = 0; +} + +__attribute__((no_instrument_function)) +void __cyg_profile_func_exit(void* this_fn, void* call_site) { + (void)call_site; + if (pk_funcinstr_detect_not_initialized()) return; + pk_funcinstr_detect_and_handle_reset(); + if (pk_funcinstr_should_early_exit()) return; + if (pk_funcinstr_thrd_instr == NULL) return; // exit called before enter? + pk_funcinstr_thrd_bkt->guard_exit++; + + Dl_info info; + + if (this_fn != pk_funcinstr_thrd_instr->fn) { + int64_t i = (int64_t)pk_funcinstr_thrd_bkt->used_count - 1; + for (; i > -1; --i) { + if (pk_funcinstr_thrd_bkt->data[i].fn == this_fn) { + if (pk_funcinstr_thrd_bkt->data[i].tmr.e.tv_sec == 0) { + pk_funcinstr_thrd_instr = &pk_funcinstr_thrd_bkt->data[i]; + break; + } + } + } + } + if (this_fn != pk_funcinstr_thrd_instr->fn) { + if (pk_funcinstr_thrd_instr->parent == NULL) { + struct pk_tmr tmr = pk_funcinstr_thrd_instr->tmr; + pk_funcinstr_thrd_instr = pk_funcinstr_create_funcinstr(this_fn); + pk_funcinstr_thrd_instr->tmr = tmr; + fprintf(stdout, "[pkfuncinstr] func mismatch; Parent func? Duration not accurate."); + } else { + fprintf(stderr, "[pkfuncinstr] func mismatch. Last: '"); + if (dladdr(pk_funcinstr_thrd_instr->fn, &info) != 0) { + fprintf(stderr, "%s", info.dli_sname); + } else { + fprintf(stderr, "(unknown)"); + } + fprintf(stderr, "'. Current: '"); + if (dladdr(this_fn, &info) != 0) { + fprintf(stderr, "%s'.\n", info.dli_sname); + } else { + fprintf(stderr, "(unknown)'.\n"); + } + pk_funcinstr_thrd_bkt->guard_exit=0; + return; + } + } + + pk_tmr_stop(pk_funcinstr_thrd_instr->tmr); + if (dladdr(this_fn, &info) != 0) { + int depth = 0; + // TODO track depth in a better way + struct pk_funcinstr *p = pk_funcinstr_thrd_instr->parent; + while (p != NULL) { + depth += 1; + p = p->parent; + } + char *demangled = NULL; + if (info.dli_sname != NULL) { +#if defined(__cplusplus) + demangled = abi::__cxa_demangle(info.dli_sname, NULL, NULL, NULL); +#endif + } + fprintf(stdout, "[pkfuncinstr] %p %*s %s took %.6f ms\n" + ,this_fn + ,depth, "" + ,demangled != NULL ? demangled : info.dli_sname != NULL ? info.dli_sname : "???" + ,pk_tmr_duration_dbl_mili(pk_funcinstr_thrd_instr->tmr) + ); + if (demangled != NULL) free(demangled); + } + pk_funcinstr_thrd_bkt->guard_exit=0; + pk_funcinstr_thrd_instr = pk_funcinstr_thrd_instr->parent; +} + +#else +// other +#endif + +#endif /* PK_IMPL_FUNCINSTR */ #endif /* PK_SINGLE_HEADER_FILE_H */ |
