diff options
| author | Jonathan Bradley <jcb@pikum.xyz> | 2025-06-05 10:50:24 -0400 |
|---|---|---|
| committer | Jonathan Bradley <jcb@pikum.xyz> | 2025-06-05 10:50:24 -0400 |
| commit | 55c518b2922a2d8a41212a952a68c17c7c9a3f8b (patch) | |
| tree | 31cc135bc18bf11893d5ca3bca47c9ee27bf141f /src | |
| parent | b0b1159780311c7c8ca9a9d41d11794e91e8895c (diff) | |
pk.h: update to 0.6.0 + handle breaking changes
Diffstat (limited to 'src')
| -rw-r--r-- | src/asset-manager.cpp | 2 | ||||
| -rw-r--r-- | src/camera.cpp | 2 | ||||
| -rw-r--r-- | src/ecs.cpp | 16 | ||||
| -rw-r--r-- | src/entities.cpp | 12 | ||||
| -rw-r--r-- | src/font.cpp | 2 | ||||
| -rw-r--r-- | src/game-settings.hpp | 5 | ||||
| -rw-r--r-- | src/game.cpp | 21 | ||||
| -rw-r--r-- | src/level.cpp | 6 | ||||
| -rw-r--r-- | src/physics.cpp | 10 | ||||
| -rw-r--r-- | src/pk.h | 1208 | ||||
| -rw-r--r-- | src/plugins.cpp | 2 | ||||
| -rw-r--r-- | src/scene.cpp | 4 | ||||
| -rw-r--r-- | src/static-ui.cpp | 6 | ||||
| -rw-r--r-- | src/thread-pool.cpp | 4 | ||||
| -rw-r--r-- | src/window.cpp | 32 |
15 files changed, 833 insertions, 499 deletions
diff --git a/src/asset-manager.cpp b/src/asset-manager.cpp index daaf6da..ef62600 100644 --- a/src/asset-manager.cpp +++ b/src/asset-manager.cpp @@ -26,7 +26,7 @@ AssetKey EngineDefinedAssets[EngineDefinedAssetCount] = { }; void AM_Init() { - new (&asset_mstr.bc) pk_bkt_arr_t<Asset>{}; + new (&asset_mstr.bc) pk_bkt_arr_t<Asset>{ pk_bkt_arr_handle_MAX_constexpr }; asset_mstr.thread_pool = PkeThreads_Init(2, 255); AM_Register(EngineDefinedAssets[0], PKE_ASSET_TYPE_SHADER, "assets/shaders/present.vert.spv"); AM_Register(EngineDefinedAssets[1], PKE_ASSET_TYPE_SHADER, "assets/shaders/present.frag.spv"); diff --git a/src/camera.cpp b/src/camera.cpp index e0a31fa..ef00b6e 100644 --- a/src/camera.cpp +++ b/src/camera.cpp @@ -179,7 +179,7 @@ pk_bkt_arr &PkeCamera_GetPkBktArr() { } void PkeCamera_Init() { - new (&bktc_cameras) pk_bkt_arr_t<PkeCamera>{}; + new (&bktc_cameras) pk_bkt_arr_t<PkeCamera>{ pk_bkt_arr_handle_MAX_constexpr }; NullCamera.type = PKE_CAMERA_TYPE_ORTHOGONAL; NullCamera.view = PKE_CAMERA_VIEW_TARGET; NullCamera.stale = PKE_CAMERA_STALE_ALL; diff --git a/src/ecs.cpp b/src/ecs.cpp index b0b5f6c..5d409be 100644 --- a/src/ecs.cpp +++ b/src/ecs.cpp @@ -58,10 +58,10 @@ void ECS_GetEntity_Inner(EntityHandle entHandle, Entity_Base*& ent) { } void ECS_Init() { - new (&ecs.bc.generics) pk_bkt_arr_t<Entity_Base>{}; - new (&ecs.bc.entityPtrs) pk_bkt_arr_t<Entity_Base*>{}; - new (&ecs.bc.grBinds) pk_bkt_arr_t<CompGrBinds>{}; - new (&ecs.bc.instances) pk_bkt_arr_t<CompInstance>{}; + new (&ecs.bc.generics) pk_bkt_arr_t<Entity_Base>{ pk_bkt_arr_handle_MAX_constexpr }; + new (&ecs.bc.entityPtrs) pk_bkt_arr_t<Entity_Base*>{ pk_bkt_arr_handle_MAX_constexpr }; + new (&ecs.bc.grBinds) pk_bkt_arr_t<CompGrBinds>{ pk_bkt_arr_handle_MAX_constexpr }; + new (&ecs.bc.instances) pk_bkt_arr_t<CompInstance>{ pk_bkt_arr_handle_MAX_constexpr }; pk_arr_reserve(&entitiesMarkedForRemoval, 16); pk_arr_reserve(&EntitiesToBeRemoved, 16); pk_arr_reserve(&entitiesYetToBeRemoved, 16); @@ -185,7 +185,7 @@ void ECS_Tick(double delta) { GrBindsIterFn grbinds_iter_cb{}; pk_arr_t<updateGrBindsAfter> updateGrBinds; - updateGrBinds.bkt = pkeSettings.mem.bkt; + updateGrBinds.bkt = pkeSettings.mem_bkt.game_transient; inst_iter_cb.func = [entityRemovalCount, &updateGrBinds](CompInstance *arr_obj_data) { CompInstance &inst = *arr_obj_data; @@ -277,7 +277,7 @@ void ECS_Tick_Late(double delta) { (void)delta; PKVK_TmpBufferDetails tmpBufferDetails{}; pk_arr_t<InstanceBufferCopy> bufferUpdates; - bufferUpdates.bkt = pkeSettings.mem.bkt; + bufferUpdates.bkt = pkeSettings.mem_bkt.game_transient; using InstIterFn = pk_tmpln_1<void, CompInstance*, void*>; InstIterFn inst_iter_cb{}; @@ -302,7 +302,7 @@ void ECS_Tick_Late(double delta) { if (bfrUpdate == nullptr) { InstanceBufferCopy tmp{}; tmp.grBinds = &grBinds; - tmp.chunks.bkt = pkeSettings.mem.bkt; + tmp.chunks.bkt = pkeSettings.mem_bkt.game_transient; pk_arr_append_t(&bufferUpdates, tmp); bfrUpdate = &bufferUpdates[bufferUpdates.next-1]; pk_arr_reserve(&bfrUpdate->chunks, 4); @@ -320,7 +320,7 @@ void ECS_Tick_Late(double delta) { InstanceBufferCopyChunk tmp{}; tmp.startingIndex = inst.index; tmp.endingIndex = inst.index; - tmp.mats.bkt = pkeSettings.mem.bkt; + tmp.mats.bkt = pkeSettings.mem_bkt.game_transient; tmp.dstBufferCopy = {}; pk_arr_append_t(&bfrUpdate->chunks, tmp); chunk = &bfrUpdate->chunks[bfrUpdate->chunks.next-1]; diff --git a/src/entities.cpp b/src/entities.cpp index eba48d5..dc54cdf 100644 --- a/src/entities.cpp +++ b/src/entities.cpp @@ -30,7 +30,7 @@ struct EntToTeardown { pk_arr_t<EntToTeardown> EntityTypesToTeardown{}; void EntityType_Init() { - new (&et_mstr.bc) pk_bkt_arr_t<EntityType>; + new (&et_mstr.bc) pk_bkt_arr_t<EntityType>{ pk_bkt_arr_handle_MAX_constexpr }; pk_arr_reserve(&EntityTypesToTeardown, 16); } @@ -211,7 +211,7 @@ void EntityType_Inner_UpdateDescriptorSets(EntityType *et) { vkDescriptorPoolCreateInfo.poolSizeCount = (uint32_t)2; vkDescriptorPoolCreateInfo.pPoolSizes = descriptorPoolSizes; - VkDescriptorSetLayout *descriptorSets = pk_new<VkDescriptorSetLayout>(swapchainLength, pkeSettings.mem.bkt); + VkDescriptorSetLayout *descriptorSets = pk_new<VkDescriptorSetLayout>(swapchainLength, pkeSettings.mem_bkt.game_transient); for (long i = 0; i < swapchainLength; ++i) { descriptorSets[i] = pkePipelines.descr_layouts.named.ubo_txtr; } @@ -222,7 +222,7 @@ void EntityType_Inner_UpdateDescriptorSets(EntityType *et) { vkDescriptorSetAllocateInfo.descriptorSetCount = swapchainLength; vkDescriptorSetAllocateInfo.pSetLayouts = descriptorSets; - VkWriteDescriptorSet *writeDescriptorSets = pk_new<VkWriteDescriptorSet>(2 * swapchainLength, pkeSettings.mem.bkt); + VkWriteDescriptorSet *writeDescriptorSets = pk_new<VkWriteDescriptorSet>(2 * swapchainLength, pkeSettings.mem_bkt.game_transient); for (long i = 0; i < 2 * swapchainLength; ++i) { writeDescriptorSets[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; writeDescriptorSets[i].pNext = nullptr; @@ -241,7 +241,7 @@ void EntityType_Inner_UpdateDescriptorSets(EntityType *et) { textureDescriptorInfo.sampler = global_sampler; textureDescriptorInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - VkDescriptorBufferInfo *vkDescriptorBufferInfo = pk_new<VkDescriptorBufferInfo>(swapchainLength, pkeSettings.mem.bkt); + VkDescriptorBufferInfo *vkDescriptorBufferInfo = pk_new<VkDescriptorBufferInfo>(swapchainLength, pkeSettings.mem_bkt.game_transient); for (long i = 0; i < et->detailsCount; ++i) { EntityTypeDetails *etd = &et->details[i]; @@ -1095,7 +1095,7 @@ void EntityType_Load(EntityType &et) { AssetHandle modelBinHandle = AM_Register(gltfPath.c_str(), PKE_ASSET_TYPE_UNSET); const Asset *modelBinAsset = AM_Get(modelBinHandle); - struct pk_membucket *entLoaderBkt = pk_bucket_create("entities", PK_DEFAULT_BUCKET_SIZE, false); + struct pk_membucket *entLoaderBkt = pk_mem_bucket_create("entities", PK_MEM_DEFAULT_BUCKET_SIZE, PK_MEMBUCKET_FLAG_NONE); EntityType_LoadHelperStruct helper { .bkt = entLoaderBkt, .et = et, @@ -1146,7 +1146,7 @@ void EntityType_Load(EntityType &et) { helper.instMemoryRequirements.bkt = nullptr; helper.physVertMemoryRequirements.bkt = nullptr; helper.textureMemoryRequirements.bkt = nullptr; - pk_bucket_destroy(helper.bkt); + pk_mem_bucket_destroy(entLoaderBkt); } void EntityType_Unload(EntityType &et, CompGrBinds *grBindsArr[1]) { diff --git a/src/font.cpp b/src/font.cpp index cce683b..927f806 100644 --- a/src/font.cpp +++ b/src/font.cpp @@ -808,7 +808,7 @@ FontRenderHandle FontType_AddStringRender(FontTypeIndex idx_ft, const pk_cstr && // insert new characters into tmp buffer { pk_arr_t<uint32_t> glyph_indices{}; - glyph_indices.bkt = pkeSettings.mem.bkt; + glyph_indices.bkt = pkeSettings.mem_bkt.game_transient; pk_arr_reserve(&glyph_indices, str.length); count = 0; for (i = 0; i < str.length;) { diff --git a/src/game-settings.hpp b/src/game-settings.hpp index e6740e0..651d827 100644 --- a/src/game-settings.hpp +++ b/src/game-settings.hpp @@ -31,8 +31,9 @@ struct GameSettings { bool isWaitingForVsync = true; } graphicsSettings; struct memory { - struct pk_membucket *bkt = nullptr; - } mem; + struct pk_membucket *game = nullptr; + struct pk_membucket *game_transient = nullptr; + } mem_bkt; struct engineArgs { const char *levelName = nullptr; const char *pluginPath = nullptr; diff --git a/src/game.cpp b/src/game.cpp index 349acb1..67a9600 100644 --- a/src/game.cpp +++ b/src/game.cpp @@ -67,7 +67,7 @@ void Game_RecordImGui() { } void Game_Tick(double delta) { - pk_bucket_reset(pkeSettings.mem.bkt); + pk_mem_bucket_reset(pkeSettings.mem_bkt.game_transient); if (pkeSettings.rt.nextLevel != LevelHandle_MAX) { // TODO async this @@ -117,6 +117,7 @@ void Game_Main(PKEWindowProperties windowProps, const char *executablePath) { pkeSettings.executablePath = executablePath; fprintf(stdout, "Game_Main Entering\n"); try { + Game_Init(); pk_ev_init(); PkeThreads_Init(); AM_Init(); @@ -125,7 +126,6 @@ void Game_Main(PKEWindowProperties windowProps, const char *executablePath) { PkeCamera_Init(); pke_level_init(); pke_scene_master_init(); - Game_Init(); CreateWindow(windowProps); EntityType_Init(); PkeInput_Init(); @@ -292,7 +292,7 @@ void Game_Main(PKEWindowProperties windowProps, const char *executablePath) { GAME_SHUTDOWN: fprintf(stdout, "Game_Main SHUTDOWN INITIATED\n"); #ifndef NDEBUG - pk_memory_debug_print(); + // TODO debug print buckets before shutdown #endif for (long i = 0; i < LoadedPkePlugins.next; ++i) { if (LoadedPkePlugins[i].OnTeardown) { @@ -300,7 +300,6 @@ GAME_SHUTDOWN: } } PkePlugin_Teardown(); - Game_Teardown(); EntityType_Teardown(); FontType_Teardown(); pke_ui_teardown(); @@ -315,16 +314,24 @@ GAME_SHUTDOWN: AM_Teardown(); PkeThreads_Teardown(); pk_ev_teardown(); - pk_memory_debug_print(); + Game_Teardown(); + // TODO debug print buckets after shutdown fprintf(stdout, "Game_Main Exiting\n"); } void Game_Init() { - pkeSettings.mem.bkt = pk_bucket_create("game-transient", 1UL << 26, true); + pkeSettings.mem_bkt.game = pk_mem_bucket_create("game", 1UL << 26, PK_MEMBUCKET_FLAG_NONE); + pkeSettings.mem_bkt.game_transient = pk_mem_bucket_create("game-transient", 1UL << 26, PK_MEMBUCKET_FLAG_TRANSIENT); + pk_mem_bucket_set_client_mem_bucket(pkeSettings.mem_bkt.game); for (uint64_t i = 0; i < consoleBufferCount; ++i) { memset(consoleBuffer[i], '\0', consoleLineLength); } } -void Game_Teardown() { } +void Game_Teardown() { + pk_mem_bucket_destroy(pkeSettings.mem_bkt.game_transient); + pk_mem_bucket_destroy(pkeSettings.mem_bkt.game); + pkeSettings.mem_bkt.game_transient = NULL; + pkeSettings.mem_bkt.game = NULL; +} diff --git a/src/level.cpp b/src/level.cpp index 14e189f..44b10af 100644 --- a/src/level.cpp +++ b/src/level.cpp @@ -9,7 +9,7 @@ struct level_mstr { } level_mstr; void pke_level_init() { - new (&level_mstr.bc) pk_bkt_arr_t<pke_level>{}; + new (&level_mstr.bc) pk_bkt_arr_t<pke_level>{ pk_bkt_arr_handle_MAX_constexpr }; } void pke_level_teardown() { @@ -48,7 +48,7 @@ pke_level *pke_level_create(const char *levelName) { ECS_CreateEntity(lvl); if (lvl->bkt == nullptr) { - lvl->bkt = pk_bucket_create(levelName, PK_DEFAULT_BUCKET_SIZE, false); + lvl->bkt = pk_mem_bucket_create(levelName, PK_MEM_DEFAULT_BUCKET_SIZE, PK_MEMBUCKET_FLAG_NONE); } return lvl; @@ -84,6 +84,6 @@ void pke_level_remove(LevelHandle handle) { pke_level *lvl = &level_mstr.bc[handle]; assert(lvl == nullptr && "Failed to find level to remove by requested LevelHandle"); ECS_MarkForRemoval(lvl); - pk_bucket_reset(lvl->bkt); + pk_mem_bucket_destroy(lvl->bkt); pk_bkt_arr_free_handle(&level_mstr.bc, handle); } diff --git a/src/physics.cpp b/src/physics.cpp index 3a20f8b..34cfbed 100644 --- a/src/physics.cpp +++ b/src/physics.cpp @@ -52,7 +52,11 @@ void pke_btAlignedFreeFunc(void *memBlock) { pk_arr_remove_at(&bulletAllocs, index); } void *pke_btAllocFunc(size_t size) { - void *ptr = pk_new_bkt(size, PK_MINIMUM_ALIGNMENT, MemBkt_Bullet); + // 2025-06-05 JCB + // I have no idea what minimum alignment should be. + // However, there *is* an aligned func defined above. + // Maybe 1 is fine? + void *ptr = pk_new_bkt(size, 1, MemBkt_Bullet); pk_arr_append_t(&bulletAllocs, {ptr, size}); return ptr; } @@ -95,7 +99,7 @@ struct CollisionHandlerStruct : public btOverlapFilterCallback { } collisionHandlerStruct; void Physics_Init() { - MemBkt_Bullet = pk_bucket_create("physics", PK_DEFAULT_BUCKET_SIZE, false); + MemBkt_Bullet = pk_mem_bucket_create("physics", PK_MEM_DEFAULT_BUCKET_SIZE, PK_MEMBUCKET_FLAG_NONE); bulletAllocs.bkt = MemBkt_Bullet; pk_arr_reserve(&bulletAllocs, 1024); btAlignedAllocSetCustom(pke_btAllocFunc, pke_btFreeFunc); @@ -137,5 +141,5 @@ void Physics_Teardown() { // TODO should we manually delete each bullet alloc? // or just drop? pk_arr_reset(&bulletAllocs); - pk_bucket_destroy(MemBkt_Bullet); + pk_mem_bucket_destroy(MemBkt_Bullet); } @@ -1,7 +1,7 @@ #ifndef PK_SINGLE_HEADER_FILE_H #define PK_SINGLE_HEADER_FILE_H /******************************************************************************* -* PK Single-Header-Library V0.5.2 +* PK Single-Header-Library V0.6.0 * * Author: Jonathan Bradley * Copyright: © 2024-2025 Jonathan Bradley @@ -47,6 +47,27 @@ * type-specific integers, implemented via enums. * ******************************************************************************** +* pktmpl.h: only contains c++ templates, no IMPL. +* +* Provides template structs for trampolines, allowing c-style callbacks with +* capturing lambdas. +* +* Examples: +* ```c++ +* int some_counter = 0; +* using IterCbWrapper = pk_tmpln_1<void, int*, void*>; +* IterCbWrapper cb_wrapper{}; +* cb_wrapper.func = [&some_counter](int *lhs) +* { +* (void)lhs; +* some_counter += 1; +* return; +* }; +* pk_bkt_arr_iterate(&bkt_arr, &IterCbWrapper::invoke, &cb_wrapper); +* assert(some_count == 1); +* ``` +* +******************************************************************************** * pkmem-types.h: def PK_IMPL_MEM_TYPES before including pk.h to enable ad-hoc. * * Provides the types needed by pkmem, as well as a generic pk_handle featuring a @@ -55,20 +76,19 @@ ******************************************************************************** * pkmem.h: def PK_IMPL_MEM before including pk.h to enable ad-hoc. * -* A bucketed memory manager. Allows for the creation and management of up to a -* well-defined number of buckets. +* A bucketed memory manager. Allows for the creation of ad-hoc buckets. +* +* Note: Each created pk_membucket MUST call pk_bucket_destroy(bkt). Memory +* buckets are client managed. * -* Thread safety: Bucket creation and destruction is *not* thread-safe. On the -* other hand, the "pk_new" and "pk_delete" methods *are* thread-safe, but +* Thread safety: "pk_new" and "pk_delete" methods *are* thread-safe, but * thread-safety is implemented per-bucket via a single mutex with long-running * lock times. PRs for a more performant thread-safe strategy are welcome, * complexity and benchmark depending. * * The following definitions (shown with defaults) can be overridden: -* PK_DEFAULT_BUCKET_SIZE 256MB (used when bkt is NULL on first call) +* PK_MEM_DEFAULT_BUCKET_SIZE 256MB (client-convenience only) * PK_MINIMUM_ALIGNMENT 1 -* PK_MAXIMUM_ALIGNMENT 64 -* PK_MAX_BUCKET_COUNT 8 * * For debugging purposes, define the following: * PK_MEMORY_DEBUGGER : enables a tracking system for all allocs and frees to @@ -253,29 +273,25 @@ * ``` * ******************************************************************************** -* pktmpl.h: only contains c++ templates, no IMPL. +* pkbktarr.h: define PK_IMPL_FUNCINSTR before including pk.h to enable ad-hoc. * -* Provides template structs for trampolines, allowing c-style callbacks with -* capturing lambdas. +* Provides function instrumentation. +* +* Note: Currently only supports gcc/g++. +* Note: Currently only prints results. * * Examples: -* ```c++ -* int some_counter = 0; -* using IterCbWrapper = pk_tmpln_1<void, int*, void*>; -* IterCbWrapper cb_wrapper{}; -* cb_wrapper.func = [&some_counter](int *lhs) -* { -* (void)lhs; -* some_counter += 1; -* return; -* }; -* pk_bkt_arr_iterate(&bkt_arr, &IterCbWrapper::invoke, &cb_wrapper); -* assert(some_count == 1); +* ```c +* main() { +* pk_funcinstr_init(); +* ... +* pk_funcinstr_teardown(); +* } * ``` * *******************************************************************************/ -#define PK_VERSION "0.5.2" +#define PK_VERSION "0.6.0" #ifdef PK_IMPL_ALL # ifndef PK_IMPL_MEM_TYPES @@ -302,6 +318,9 @@ # ifndef PK_IMPL_BKTARR # define PK_IMPL_BKTARR # endif +# ifndef PK_IMPL_FUNCINSTR +# define PK_IMPL_FUNCINSTR +# endif #endif #ifndef PK_MACROS_H #define PK_MACROS_H @@ -550,6 +569,41 @@ TypeSafeInt2_H_constexpr(TypeName, Type, Max, PK_CONCAT(TypeName, _T), PK_CONCAT(TypeName, _MAX), PK_CONCAT(TypeName, _T_MAX)) #endif /* PK_MACROS_H */ +#ifndef PK_PKTMPLN_H +#define PK_PKTMPLN_H + +#if defined (__cplusplus) +#include <functional> +template<typename Ret, typename A1, typename B1> +struct pk_tmpln_1 { + using FuncType = std::function<Ret(A1)>; + FuncType func; + static Ret invoke(void *ptr, B1 b1) { + auto *self = static_cast<pk_tmpln_1*>(ptr); + return self->func(reinterpret_cast<A1>(b1)); + } +}; +template<typename Ret, typename A1, typename A2, typename B1, typename B2> +struct pk_tmpln_2 { + using FuncType = std::function<Ret(A1, A2)>; + FuncType func; + static Ret invoke(void *ptr, B1 b1, B2 b2) { + auto *self = static_cast<pk_tmpln_2*>(ptr); + return self->func(reinterpret_cast<A1>(b1), reinterpret_cast<A2>(b2)); + } +}; +template<typename Ret, typename A1, typename A2, typename A3, typename B1, typename B2, typename B3> +struct pk_tmpln_3 { + using FuncType = std::function<Ret(A1, A2, A3)>; + FuncType func; + static Ret invoke(void *ptr, B1 b1, B2 b2, B3 b3) { + auto *self = static_cast<pk_tmpln_3*>(ptr); + return self->func(reinterpret_cast<A1>(b1), reinterpret_cast<A2>(b2), reinterpret_cast<A3>(b3)); + } +}; +#endif + +#endif /* PK_PKTMPLN_H */ #ifndef PK_MEM_TYPES_H #define PK_MEM_TYPES_H @@ -606,6 +660,12 @@ pk_handle_validate_constexpr() struct pk_membucket; +enum PK_MEMBUCKET_FLAGS : uint64_t { + PK_MEMBUCKET_FLAG_NONE = (0), + PK_MEMBUCKET_FLAG_TRANSIENT = (1 << 01l), + PK_MEMBUCKET_FLAG_ALL = (0xFFFFFFFFFFFFFFFF), +}; + #endif /* PK_MEM_TYPES_H */ #ifdef PK_IMPL_MEM_TYPES @@ -631,24 +691,17 @@ pk_handle_validate(const struct pk_handle handle, const struct pk_handle bucketH #include <stdint.h> #include <stdlib.h> -#ifndef PK_DEFAULT_BUCKET_SIZE -# define PK_DEFAULT_BUCKET_SIZE (1ULL * 1024ULL * 1024ULL * 256ULL) -#endif -#ifndef PK_MINIMUM_ALIGNMENT -# define PK_MINIMUM_ALIGNMENT 1 -#endif -#ifndef PK_MAXIMUM_ALIGNMENT -# define PK_MAXIMUM_ALIGNMENT 64 +#ifndef PK_MEM_DEFAULT_BUCKET_SIZE +# define PK_MEM_DEFAULT_BUCKET_SIZE (1ULL * 1024ULL * 1024ULL * 256ULL) #endif -struct pk_membucket* pk_bucket_create(const char* description, int64_t sz, bool transient); -void pk_bucket_destroy(struct pk_membucket* bkt); -void pk_bucket_reset(struct pk_membucket* bkt); - -void pk_memory_debug_print(); -void pk_memory_flush(); -void pk_memory_teardown_all(); -bool pk_memory_is_in_bucket(const void* ptr, const struct pk_membucket* bkt); +size_t pk_mem_bucket_calculate_size(size_t sz, size_t reserved_block_count); +struct pk_membucket* pk_mem_bucket_create(const char* description, int64_t sz, enum PK_MEMBUCKET_FLAGS flags); +void pk_mem_bucket_debug_print(struct pk_membucket *bkt); +void pk_mem_bucket_destroy(struct pk_membucket* bkt); +void pk_mem_bucket_reset(struct pk_membucket* bkt); +void pk_mem_bucket_set_client_mem_bucket(struct pk_membucket *bkt); +bool pk_mem_bucket_ptr_is_in_mem_bucket(const void* ptr, const struct pk_membucket* bkt); void* pk_new_base(size_t sz, size_t alignment); void* pk_new_bkt(size_t sz, size_t alignment, struct pk_membucket* bkt); @@ -660,14 +713,15 @@ void pk_delete(const void* ptr, size_t sz, struct pk_membucket* bkt); #if defined(__cplusplus) #include <type_traits> +#include <new> static inline void stupid_header_warnings_cpp() { (void)std::is_const<void>::value; } template <typename T> inline T* -pk_new(pk_membucket* bucket = nullptr) +pk_new(pk_membucket* bucket = NULL) { - void* ptr = nullptr; + void* ptr = NULL; if (bucket) { ptr = pk_new_bkt(sizeof(T), alignof(T), bucket); } else { @@ -681,15 +735,15 @@ pk_new(pk_membucket* bucket = nullptr) template <typename T> inline T* -pk_new(long count, pk_membucket* bucket = nullptr) +pk_new(long count, pk_membucket* bucket = NULL) { - char* ptr = nullptr; + char* ptr = NULL; if (bucket) { ptr = static_cast<char*>(pk_new_bkt(sizeof(T) * count, alignof(T), bucket)); } else { ptr = static_cast<char*>(pk_new_base(sizeof(T) * count, alignof(T))); } - if (ptr == nullptr) return nullptr; + if (ptr == NULL) return NULL; if IS_CONSTRUCTIBLE(T) { for (long i = 0; i < count; ++i) { new (ptr + (i * sizeof(T))) T{}; @@ -700,7 +754,7 @@ pk_new(long count, pk_membucket* bucket = nullptr) template <typename T> inline void -pk_delete(const T* ptr, pk_membucket* bucket = nullptr) +pk_delete(const T* ptr, pk_membucket* bucket = NULL) { if IS_DESTRUCTIBLE(T) { reinterpret_cast<const T*>(ptr)->~T(); @@ -714,7 +768,7 @@ pk_delete(const T* ptr, pk_membucket* bucket = nullptr) template <typename T> inline void -pk_delete(const T* ptr, long count, pk_membucket* bucket = nullptr) +pk_delete(const T* ptr, long count, pk_membucket* bucket = NULL) { if IS_DESTRUCTIBLE(T) { for (long i = 0; i < count; ++i) { @@ -742,282 +796,264 @@ pk_delete(const T* ptr, long count, pk_membucket* bucket = nullptr) static inline void pkmem_stupid_header_warnings() { (void)stdout; } #if defined(PK_MEMORY_DEBUGGER) -/* - * Note that certain aspects of this expect that you only have one non-transient bucket. - * If you need to track multiple non-transient buckets, these sections will need a refactor. - */ #endif -#ifndef PK_MAX_BUCKET_COUNT -# define PK_MAX_BUCKET_COUNT 8 +#ifndef PK_MINIMUM_ALIGNMENT +# define PK_MINIMUM_ALIGNMENT 1 +#endif +#ifndef PK_MEMORY_DEBUGGER_MAX_BUCKET_COUNT + #define PK_MEMORY_DEBUGGER_MAX_BUCKET_COUNT 16 #endif +#define EXPECTED_PK_MEMBLOCK_SIZE 128 + +#define pk_memblock_blocks_idx(bkt, idx) ((bkt->block_capacity-1)-(idx)) +#define pk_bkt_data(bkt) ((char*)bkt + EXPECTED_PK_MEMBLOCK_SIZE) +#define pk_bkt_head(bkt) ((&bkt->data[0]) + bkt->head) +#define pk_bkt_data_sz(bkt) (size_t)((char*)&bkt->blocks[0] - &bkt->data[0]) + struct pk_memblock { - char* data; + union { + char* data; + void* ptr; + }; size_t size; }; struct pk_membucket { - // the total size of the bucket, `blocks+ptr` + // 00 + mtx_t mtx; + // 40 + // the total size of the bucket, struct+data size_t size; - // the current head of the bucket: byte offset from `ptr`. + // 48 + // the current head of the bucket: byte offset from `data`. // All currently alloc'd data is before this offset size_t head; - // amount of lost bytes in this membucket, hopefully zero - size_t lostBytes; + // 56 + uint32_t block_capacity; + // 60 + uint32_t block_head_l; + // 64 + // this should ALWAYS point to the last block containing unalloced space in bkt + uint32_t block_head_r; + // 68 // the number of active allocations from this bucket - size_t allocs; - // the index of the last empty block. - // Should always point to `pk_memblock{ .data = ptr+head, .size=size-head }` - size_t lastEmptyBlockIndex; - // number of pk_memblocks in the `*blocks` array - size_t maxBlockCount; - // ptr to an array of pk_memblock to track ALL free space between ptr and ptr+sz - struct pk_memblock* blocks; - // starting point for alloc'd data - union { - char* ptr; - void* raw; - }; - const char* description; - mtx_t mtx; - bool transient; -}; - -static struct pk_membucket pk_buckets[PK_MAX_BUCKET_COUNT]; -static size_t pk_bucket_head = 0; - + // -should correlate to blocks that have a sz > 0 + uint32_t alloc_count; + // 72 + struct pk_memblock *blocks; + // 80 + enum PK_MEMBUCKET_FLAGS flags; + // 88 + const char *description; + // 96 #ifdef PK_MEMORY_DEBUGGER -#include <stdatomic.h> -struct pk_dbg_memblock { - struct pk_memblock blk; - struct pk_membucket *bkt; -}; -static struct pk_dbg_memblock debug_all_allocs[1024 * 1024]; -static atomic_size_t debug_alloc_head = 0; -static atomic_bool has_init_debug = false; -static mtx_t debug_alloc_mtx; + struct pk_memblock *debug_blocks; + // 104 + uint32_t debug_head_l; + // 108 + uint32_t debug_head_r; + // 112 + uint32_t debug_block_capacity; + // 116 + char padding[(8*1)+4]; +#else + char padding[(8*4)]; #endif + // 128 + // starting point for alloc'd data + // data[] is illegal in c++ (though it works in gcc/clang, but so does this) + char data[1]; +}; -bool -pk_memory_is_in_bucket(const void* ptr, const struct pk_membucket* bkt) -{ - if (ptr >= bkt->raw && (const char*)ptr < bkt->ptr + bkt->size) return true; - return false; -} +static struct pk_membucket *client_bucket = NULL; -void -pk_memory_debug_print() +size_t +pk_mem_bucket_calculate_size(size_t sz, size_t reserved_block_count) { - size_t i, d; -#ifdef PK_MEMORY_DEBUGGER - size_t count = 0; - size_t dah = debug_alloc_head; -#endif - PK_LOGV_INF("Memory Manager printout:\nBucket count: %li\n", pk_bucket_head); - for (i = 0; i < pk_bucket_head; ++i) { - PK_LOGV_INF("- bucket #%li\n", i); - PK_LOGV_INF("\tdescription: %s\n", pk_buckets[i].description); - PK_LOGV_INF("\tsize: %li\n", pk_buckets[i].size); - PK_LOGV_INF("\thead: %li\n", pk_buckets[i].head); - PK_LOGV_INF("\tlostBytes: %li\n", pk_buckets[i].lostBytes); - PK_LOGV_INF("\tallocs: %li\n", pk_buckets[i].allocs); - PK_LOGV_INF("\tlastEmptyBlockIndex: %li\n", pk_buckets[i].lastEmptyBlockIndex); - PK_LOGV_INF("\tmaxBlockCount: %li\n", pk_buckets[i].maxBlockCount); - PK_LOGV_INF("\tblocks: %p\n", (void *)pk_buckets[i].blocks); - PK_LOGV_INF("\tptr: %p\n", (void *)pk_buckets[i].ptr); - PK_LOGV_INF("\ttransient: %i\n", pk_buckets[i].transient); -#ifdef PK_MEMORY_DEBUGGER - for (d = 0; d < dah; ++d) { - if (debug_all_allocs[d].bkt == &pk_buckets[d] && debug_all_allocs[d].blk.size > 0) { - count += 1; - } - } - PK_LOGV_INF("\tdebug alloc count: %lu\n", count); - PK_LOGV_INF("\tdebug alloc last: %lu\n", dah); -#endif - } + size_t base_size = EXPECTED_PK_MEMBLOCK_SIZE + sz + (sizeof(struct pk_memblock) * reserved_block_count); + // This trick ensures that our array of pk_memblocks at the end is mem-aligned. + // We do, however, still have to do the math when setting the ptr. + // Why? the user may have strict memory requirements and didn't call this function. + return base_size + (64 - (base_size % 64)); } -void -pk_memory_flush() +bool +pk_mem_bucket_ptr_is_in_mem_bucket(const void* ptr, const struct pk_membucket* bkt) { - for (long i = pk_bucket_head - 1; i > -1; --i) { - if (pk_buckets[i].head != 0) break; - if (pk_buckets[i].transient == true) break; - pk_bucket_head--; - if (pk_buckets[i].raw == CAFE_BABE(void)) continue; - pk_bucket_destroy(&pk_buckets[i]); - } + return (ptr >= (void*)bkt && ptr < (void*)pk_bkt_head(bkt)); } void -pk_memory_teardown_all() -{ - for (int64_t i = pk_bucket_head; i > 0; --i) { - if (pk_buckets[i - 1].ptr == nullptr) continue; - if (pk_buckets[i - 1].ptr == CAFE_BABE(char)) continue; - pk_bucket_destroy(&pk_buckets[i - 1]); - } - pk_bucket_head = 0; +pk_mem_bucket_debug_print(struct pk_membucket *bkt) +{ + PK_LOG_INF("pk_membucket details:\n"); + PK_LOGV_INF("\tbkt: %p\n", (void *)bkt); + PK_LOGV_INF("\tdescription: %s\n", bkt->description); + PK_LOGV_INF("\tsize: %lu\n", bkt->size); + PK_LOGV_INF("\thead: %lu\n", bkt->head); + PK_LOGV_INF("\tallocs: %u\n", bkt->alloc_count); + PK_LOGV_INF("\tblock head_l: %u\n", bkt->block_head_l); + PK_LOGV_INF("\tblock head_r: %u\n", bkt->block_head_r); + PK_LOGV_INF("\tflags: %lu\n", bkt->flags); #ifdef PK_MEMORY_DEBUGGER - debug_alloc_head = 0; - mtx_destroy(&debug_alloc_mtx); - has_init_debug = false; + PK_LOGV_INF("\tdebug alloc head_l: %u\n", bkt->debug_head_l); + PK_LOGV_INF("\tdebug alloc head_r: %u\n", bkt->debug_head_r); + PK_LOGV_INF("\tdebug cappacity: %u\n", bkt->debug_block_capacity); #endif } -static int64_t -pk_bucket_create_inner(int64_t sz, bool transient, const char* description) -{ - if (pk_bucket_head >= PK_MAX_BUCKET_COUNT) return -1; -#ifdef PK_MEMORY_DEBUGGER - if (has_init_debug == false) { - has_init_debug = true; - mtx_init(&debug_alloc_mtx, mtx_plain); - memset(debug_all_allocs, 0, sizeof(struct pk_dbg_memblock) * 1024 * 1024); - } -#endif - int64_t blockCount = sz * 0.01; - struct pk_membucket* bkt = &pk_buckets[pk_bucket_head]; +struct pk_membucket* +pk_mem_bucket_create(const char* description, int64_t sz, enum PK_MEMBUCKET_FLAGS flags) +{ + // 512 example: + // [000-127] pk_membucket + // [128-191] 64 bytes of data LOL + // [192-511] 20 pk_memblocks (20 is worst-case, start 16, 4 per 64 bytes) + assert(sz >= 512 && "[pkmem.h] bucket too small to track allocation data"); + struct pk_membucket* bkt = (struct pk_membucket*)aligned_alloc(64, sz); + if (bkt == NULL) return NULL; + mtx_init(&bkt->mtx, mtx_plain); bkt->size = sz; bkt->head = 0; - bkt->lostBytes = 0; - bkt->allocs = 0; - bkt->lastEmptyBlockIndex = 0; - bkt->maxBlockCount = blockCount < 10 ? 10 : blockCount; - bkt->blocks = (struct pk_memblock*)malloc(sz); - mtx_init(&bkt->mtx, mtx_plain); - assert(bkt->blocks != nullptr && "failed to allocate memory"); -#if 1 - memset(bkt->blocks, 0, sz); -#endif - bkt->ptr = ((char*)(bkt->blocks)) + (sizeof(struct pk_memblock) * bkt->maxBlockCount); - size_t misalignment = (size_t)(bkt->ptr) % PK_MAXIMUM_ALIGNMENT; - if (misalignment != 0) { - size_t moreBlocks = misalignment / sizeof(struct pk_memblock); - bkt->maxBlockCount += moreBlocks; - bkt->ptr += (PK_MAXIMUM_ALIGNMENT - misalignment); - } + bkt->block_capacity = 16; + bkt->block_head_l = 0; + bkt->block_head_r = 0; + bkt->alloc_count = 0; + bkt->flags = flags; bkt->description = description; - bkt->transient = transient; - struct pk_memblock* memBlock = (struct pk_memblock*)(bkt->blocks); - memBlock->data = bkt->ptr; - memBlock->size = sz - (sizeof(struct pk_memblock) * bkt->maxBlockCount); - return pk_bucket_head++; -} + char* blocks_addr = (char*)bkt + sz - (sizeof(struct pk_memblock) * bkt->block_capacity); + blocks_addr -= (size_t)blocks_addr % 64; + bkt->blocks = (struct pk_memblock*)blocks_addr; + bkt->block_capacity = (size_t)(((char*)bkt + sz) - blocks_addr) / sizeof(struct pk_memblock); -struct pk_membucket* -pk_bucket_create(const char* description, int64_t sz, bool transient) -{ - int64_t bkt_index = pk_bucket_create_inner(sz, transient, description); - // TODO some of of error handling - if (bkt_index < 0) { return nullptr; } - return &pk_buckets[bkt_index]; + bkt->blocks[pk_memblock_blocks_idx(bkt,0)].size = pk_bkt_data_sz(bkt); + bkt->blocks[pk_memblock_blocks_idx(bkt,0)].ptr = pk_bkt_data(bkt); + +#ifdef PK_MEMORY_DEBUGGER + bkt->debug_head_l = 0; + bkt->debug_head_r = 0; + bkt->debug_block_capacity = 128; + bkt->debug_blocks = (struct pk_memblock*)aligned_alloc(alignof(struct pk_memblock), sizeof(struct pk_memblock) * 128); + bkt->debug_blocks[0].ptr = NULL; + bkt->debug_blocks[0].size = 0; +#endif + + return bkt; } void -pk_bucket_destroy(struct pk_membucket* bkt) +pk_mem_bucket_destroy(struct pk_membucket* bkt) { - size_t i; -#ifdef PK_MEMORY_DEBUGGER - size_t dah; -#endif - for (i = 0; i < pk_bucket_head; ++i) { - if (&pk_buckets[i] == bkt) { - if (pk_bucket_head == i + 1) - pk_bucket_head--; - break; - } - } - free(bkt->blocks); - bkt->size = 0; - bkt->head = 0; - bkt->lostBytes = 0; - bkt->allocs = 0; - bkt->lastEmptyBlockIndex = 0; - bkt->maxBlockCount = 0; - bkt->blocks = CAFE_BABE(struct pk_memblock); - bkt->ptr = CAFE_BABE(char); - bkt->transient = false; - mtx_destroy(&bkt->mtx); + assert(bkt != NULL); #ifdef PK_MEMORY_DEBUGGER - dah = debug_alloc_head; - for (i = dah; i > 0; --i) { - if (debug_all_allocs[i].bkt == bkt) { - debug_all_allocs[i].blk.data = NULL; - debug_all_allocs[i].blk.size = 0u; - } - } + if (bkt->debug_blocks != NULL) free(bkt->debug_blocks); #endif + free(bkt); } void -pk_bucket_reset(struct pk_membucket* bkt) +pk_mem_bucket_reset(struct pk_membucket* bkt) { -#ifdef PK_MEMORY_DEBUGGER - size_t i; -#endif - if (bkt->transient != true) { + if (PK_HAS_FLAG(bkt->flags, PK_MEMBUCKET_FLAG_TRANSIENT) == false) { PK_LOG_ERR("WARNING: pk_bucket_reset called on non-transient pk_membucket\n"); } bkt->head = 0; - bkt->lostBytes = 0; - bkt->allocs = 0; - bkt->lastEmptyBlockIndex = 0; - bkt->blocks->data = bkt->ptr; - bkt->blocks->size = bkt->size - (sizeof(struct pk_memblock) * bkt->maxBlockCount); + bkt->block_capacity = 16; + char* blocks_addr = (char*)bkt + bkt->size - (sizeof(struct pk_memblock) * bkt->block_capacity); + blocks_addr -= (size_t)blocks_addr % 64; + bkt->blocks = (struct pk_memblock*)blocks_addr; + bkt->block_capacity = (size_t)(((char*)bkt + bkt->size) - blocks_addr) / sizeof(struct pk_memblock); + bkt->block_head_l = 0; + bkt->block_head_r = 0; + bkt->alloc_count = 0; + + bkt->blocks[pk_memblock_blocks_idx(bkt,0)].size = pk_bkt_data_sz(bkt); + bkt->blocks[pk_memblock_blocks_idx(bkt,0)].ptr = pk_bkt_data(bkt); + #ifdef PK_MEMORY_DEBUGGER - for (i = debug_alloc_head; i > 0; --i) { - if (debug_all_allocs[i].bkt == bkt) { - debug_all_allocs[i].blk.data = NULL; - debug_all_allocs[i].blk.size = 0u; - } - } + bkt->debug_head_l = 0; + bkt->debug_head_r = 0; + bkt->debug_blocks[0].ptr = NULL; + bkt->debug_blocks[0].size = 0; #endif } +void pk_mem_bucket_set_client_mem_bucket(struct pk_membucket *bkt) { + client_bucket = bkt; +} + void pk_bucket_insert_block(struct pk_membucket* bkt, const struct pk_memblock* block) { - int64_t index = bkt->lastEmptyBlockIndex; - while (index >= 0) { - struct pk_memblock* b = &bkt->blocks[index]; - struct pk_memblock* nb = &bkt->blocks[index + 1]; - if (b->data < block->data) { + // 2025-06-03 JCB + // Note that this function should only be called if we're INSERTING. + // This means that the block will never go at the END of the list - that would be an append. + // It can, however, be placed at the beginning, in which case the entire array shifts. + + struct pk_memblock* new_block = NULL; + struct pk_memblock* old_block = NULL; + size_t i, k; + + // 1. resize if needed + if (bkt->block_head_r+1 == bkt->block_capacity) { + if (bkt->blocks[pk_memblock_blocks_idx(bkt, bkt->block_head_r)].size < sizeof(struct pk_memblock)) { + PK_LOG_ERR("[pkmem.h] bkt out of memory when expanding memory blocks."); + exit(1); + } + // this is all that needs done, arr can just grow like this + bkt->blocks[pk_memblock_blocks_idx(bkt, bkt->block_head_r)].size -= sizeof(struct pk_memblock); + bkt->block_capacity += 1; + bkt->blocks -= 1; + } + + // 2. move all blocks forward until we pass the pointer + // reminder that these blocks are in REVERSE order + for (i = bkt->block_head_r+1; i > 0; --i) { + k = pk_memblock_blocks_idx(bkt, i); + new_block = &bkt->blocks[k]; + old_block = new_block+1; + *new_block = *old_block; + if (old_block->data < block->data) { break; } - nb->data = b->data; - nb->size = b->size; - index -= 1; } - struct pk_memblock *b = &bkt->blocks[index + 1]; - b->data = block->data; - b->size = block->size; - bkt->lastEmptyBlockIndex += 1; + assert(old_block != NULL); + if (i == 0 && old_block != NULL) { + *old_block = *block; + } else { + *new_block = *block; + } + bkt->block_head_r += 1; } void -pk_bucket_collapse_empty_blocks(struct pk_membucket* bkt) -{ - size_t i, ii; - for (ii = bkt->lastEmptyBlockIndex+1; ii > 0; --ii) { - i = ii-1; - struct pk_memblock* block = &bkt->blocks[i]; - if (block->size == 0 && i == bkt->lastEmptyBlockIndex) { - block->data = nullptr; - bkt->lastEmptyBlockIndex -= 1; - continue; - } - if (block->size > 0) { - continue; - } - for (size_t k = i; k < bkt->lastEmptyBlockIndex; ++k) { - bkt->blocks[k].data = bkt->blocks[k + 1].data; - bkt->blocks[k].size = bkt->blocks[k + 1].size; - } - bkt->lastEmptyBlockIndex -= 1; +pk_bucket_collapse_blocks(struct pk_membucket* bkt) +{ + // 1. loop through from (rev_idx)0 to head_r, shifting any blocks that have size 0 + struct pk_memblock* new_block; + struct pk_memblock* old_block; + size_t i, ii, bhr; + // there's an off by one annoynce here + // start with ii = 0 + // if we start with ii = 1, we might subtract from block_head_r when nothing was shifted + for (i = 0, ii = 0, bhr = bkt->block_head_r; (i + ii) <= bhr; ++i) { + new_block = &bkt->blocks[pk_memblock_blocks_idx(bkt, i)]; + if (new_block->size > 0) continue; + do { + old_block = new_block - ii; + if (old_block->size == 0) { + ii+=1; + } else { + break; + } + } while (i + ii <= bhr); + *new_block = *old_block; + old_block->size = 0; + old_block->ptr = NULL; } + bkt->block_head_r -= ii; } void* @@ -1026,130 +1062,122 @@ pk_new_bkt(size_t sz, size_t alignment, struct pk_membucket* bkt) #ifdef PK_MEMORY_FORCE_MALLOC return malloc(sz); #endif - if (sz == 0) return nullptr; - if (bkt == nullptr) return nullptr; + if (sz == 0) return NULL; + if (bkt == NULL) return NULL; // TODO some type of error handling - if ((bkt->size - bkt->head) < (sz + alignment - 1)) return nullptr; - size_t i; + if ((bkt->size - bkt->head) < (sz + alignment - 1)) return NULL; + size_t i, k; size_t calculatedAlignment = alignment < PK_MINIMUM_ALIGNMENT ? PK_MINIMUM_ALIGNMENT : alignment; size_t misalignment = 0; - struct pk_memblock* prevBlock = nullptr; - struct pk_memblock* block = nullptr; - struct pk_memblock* nextBlock = nullptr; - void* data = nullptr; + struct pk_memblock tmp_blk; + struct pk_memblock* block = NULL; + void* data = NULL; mtx_lock(&bkt->mtx); - for (i = 0; i <= bkt->lastEmptyBlockIndex; ++i) { - struct pk_memblock* blk = &bkt->blocks[i]; - misalignment = (size_t)(blk->data) % calculatedAlignment; + + // find block + for (i = 0; i <= bkt->block_head_r; ++i) { + k = pk_memblock_blocks_idx(bkt, i); + tmp_blk = bkt->blocks[k]; + misalignment = (size_t)(tmp_blk.data) % calculatedAlignment; misalignment = (calculatedAlignment - misalignment) % calculatedAlignment; - if (blk->size >= sz + misalignment) { - block = blk; - if (i < bkt->lastEmptyBlockIndex && bkt->blocks[i + 1].data == block->data + block->size) { - nextBlock = &bkt->blocks[i + 1]; - } - if (i > 0 && i != bkt->lastEmptyBlockIndex && (bkt->blocks[i-1].data + bkt->blocks[i-1].size) == block->data) { - prevBlock = &bkt->blocks[i - 1]; - } - break; + if (tmp_blk.size < sz + misalignment) { + continue; } + block = &bkt->blocks[k]; + break; } - if (block == nullptr) { + if (block == NULL) { mtx_unlock(&bkt->mtx); - assert(block != nullptr && "memory corruption: not enough space in chosen bkt"); + assert(block != NULL && "memory corruption: not enough space in chosen bkt"); } data = block->data + misalignment; #ifdef PK_MEMORY_DEBUGGER - bool handled = bkt->transient; - struct pk_dbg_memblock* mb = nullptr; - if (handled == false) { - for (i = 0; i < debug_alloc_head; ++i) { - mb = &debug_all_allocs[i]; - if (mb->bkt != NULL) continue; - assert((mb->blk.size == 0 || (void*)(mb->blk.data) != data) && "mem address alloc'd twice!"); - if (mb->blk.size == 0) { - mb->blk.data = (char*)(data); - mb->blk.size = sz; - mb->bkt = bkt; - handled = true; - break; + size_t ii; + if (PK_HAS_FLAG(bkt->flags, PK_MEMBUCKET_FLAG_TRANSIENT) == false) { + for (i = 0; i < bkt->debug_head_r; ++i) { + assert((bkt->debug_blocks[i].size == 0 || (void*)(bkt->debug_blocks[i].data) != data) && "mem address alloc'd twice!"); + } + i = bkt->debug_head_l; + if (bkt->debug_head_l == bkt->debug_head_r) { + bkt->debug_head_l++; + bkt->debug_head_r++; + + if (bkt->debug_head_r == bkt->debug_block_capacity) { + struct pk_memblock *debug_blocks; + debug_blocks = (struct pk_memblock*)aligned_alloc(alignof(struct pk_memblock), sizeof(struct pk_memblock) * (bkt->debug_block_capacity + 128)); + assert(debug_blocks != NULL); + memcpy(debug_blocks, bkt->debug_blocks, sizeof(struct pk_memblock) * bkt->debug_block_capacity); + free(bkt->debug_blocks); + bkt->debug_blocks = debug_blocks; + bkt->debug_block_capacity += 128; } + + bkt->debug_blocks[bkt->debug_head_r].ptr = NULL; + bkt->debug_blocks[bkt->debug_head_r].size = 0; + + } else { + // 2025-06-05 JCB + // This intentionally looks at debug_head_r, which could potentially + // be uninitialized. I added some logic elsewhere to ensure that + // whenever debug_head_r is incremented, we set the related block + // to NULL/0 so that this will catch size==0. + // I was experiencing an issue where in testing it was initialized to + // NULL/0, but then in a client application it was garbage data. + for (ii = bkt->debug_head_l+1; ii <= bkt->debug_head_r; ++ii) { + if (bkt->debug_blocks[ii].size == 0) { + bkt->debug_head_l = ii; + break; + } + } + assert(ii != bkt->debug_head_r+1); } - } - if (handled == false) { - mtx_lock(&debug_alloc_mtx); - i = debug_alloc_head++; - mtx_unlock(&debug_alloc_mtx); - debug_all_allocs[i].blk.data = (char*)data; - debug_all_allocs[i].blk.size = sz; - debug_all_allocs[i].bkt = bkt; + assert(bkt->debug_head_l <= bkt->debug_head_r); + bkt->debug_blocks[i].data = (char*)data; + bkt->debug_blocks[i].size = sz; } #endif - int64_t afterSize = block->size - (misalignment + sz); - if (block->data == bkt->ptr + bkt->head) { + if (block->data == pk_bkt_head(bkt)) { bkt->head += (sz + misalignment); } - if (afterSize > 0 && nextBlock == nullptr) { - struct pk_memblock newBlock; - memset(&newBlock, 0, sizeof(struct pk_memblock)); - newBlock.data = block->data + misalignment + sz; - newBlock.size = afterSize; - pk_bucket_insert_block(bkt, &newBlock); - } - if (prevBlock == nullptr && nextBlock == nullptr) { - block->size = misalignment; - } else if (nextBlock != nullptr) { - block->size = misalignment; - nextBlock->data -= afterSize; - nextBlock->size += afterSize; - } else if (prevBlock != nullptr) { - prevBlock->size += misalignment; - block->data += misalignment + sz; - block->size = 0; // if you make it here, afterSize has already been handled - } else { - assert(true == false && "this shouldn't happen"); + + tmp_blk.data = block->data; + tmp_blk.size = misalignment; + block->data += misalignment + sz; + block->size -= misalignment + sz; + + if (tmp_blk.size > 0) { + pk_bucket_insert_block(bkt, &tmp_blk); } - bkt->allocs++; - assert(data >= bkt->raw && "allocated data is before bucket data"); - assert((char*)data <= bkt->ptr + bkt->size && "allocated data is after bucket data"); - pk_bucket_collapse_empty_blocks(bkt); + pk_bucket_collapse_blocks(bkt); + + bkt->alloc_count++; + assert(data >= (void*)pk_bkt_data(bkt) && "allocated data is before bucket data"); + assert((char*)data <= pk_bkt_data(bkt) + bkt->size && "allocated data is after bucket data"); #ifdef PK_MEMORY_DEBUGGER - if (!bkt->transient) { + if (PK_HAS_FLAG(bkt->flags, PK_MEMBUCKET_FLAG_TRANSIENT) == false) { + size_t k; int64_t debug_tracked_alloc_size = 0; - int64_t debug_bucket_alloc_size = bkt->size - (sizeof(struct pk_memblock) * bkt->maxBlockCount); - for (i = 0; i < debug_alloc_head; ++i) { - if (debug_all_allocs[i].bkt != bkt) continue; - debug_tracked_alloc_size += debug_all_allocs[i].blk.size; + int64_t debug_bucket_alloc_size = pk_bkt_data_sz(bkt); + for (i = 0; i < bkt->debug_head_r; ++i) { + debug_tracked_alloc_size += bkt->debug_blocks[i].size; } - for (i = 0; i <= bkt->lastEmptyBlockIndex; ++i) { - debug_bucket_alloc_size -= bkt->blocks[i].size; + for (i = 0; i <= bkt->block_head_r; ++i) { + k = pk_memblock_blocks_idx(bkt, i); + debug_bucket_alloc_size -= bkt->blocks[k].size; } assert(debug_tracked_alloc_size == debug_bucket_alloc_size && "allocation size mismatch!"); } #endif mtx_unlock(&bkt->mtx); + memset(data, 0, sz); return data; } void* pk_new_base(size_t sz, size_t alignment) { - struct pk_membucket* bkt = nullptr; - for (size_t i = 0; i < pk_bucket_head; ++i) { - if (pk_buckets[i].transient == true) { - continue; - } - if (pk_buckets[i].size - pk_buckets[i].head < sz + (alignment - 1)) { - continue; - } - bkt = &pk_buckets[i]; - break; - } - if (bkt == nullptr) { - int64_t bkt_index = pk_bucket_create_inner(PK_DEFAULT_BUCKET_SIZE, false, "pk_bucket internally created"); - // TODO some of of error handling - if (bkt_index >= 0) bkt = &pk_buckets[bkt_index]; - } - return pk_new_bkt(sz, alignment, bkt); + if (client_bucket == NULL) return NULL; + return pk_new_bkt(sz, alignment, client_bucket); } void* @@ -1163,97 +1191,121 @@ void pk_delete_bkt(const void* ptr, size_t sz, struct pk_membucket* bkt) { #ifdef PK_MEMORY_FORCE_MALLOC +#if defined(__cplusplus) return std::free(const_cast<void*>(ptr)); +#else + return free((void*)ptr); #endif - size_t i; +#endif + size_t i, k; mtx_lock(&bkt->mtx); - assert(bkt->allocs > 0); - assert(ptr >= bkt->raw && (char*)ptr < bkt->ptr + bkt->size && "pointer not in memory bucket range"); + assert(bkt->alloc_count > 0); + assert(pk_mem_bucket_ptr_is_in_mem_bucket(ptr, bkt) && "pointer not in memory bucket range"); assert(sz > 0 && "attempted to free pointer of size 0"); #ifdef PK_MEMORY_DEBUGGER - size_t ii; - bool found = bkt->transient; - mtx_lock(&debug_alloc_mtx); + bool found = PK_HAS_FLAG(bkt->flags, PK_MEMBUCKET_FLAG_TRANSIENT); + struct pk_memblock *debug_memblocks = bkt->debug_blocks; + struct pk_memblock *mb; if (found == false) { - for (ii = debug_alloc_head; ii > 0; --ii) { - i = ii-1; - struct pk_dbg_memblock* mb = &debug_all_allocs[i]; - if (mb->bkt != bkt) continue; - if (mb->blk.size == 0) continue; - if ((void*)(mb->blk.data) == ptr) { - assert(mb->blk.size == sz && "[pkmem.h] incorrect free size"); - mb->blk.size = 0; - mb->bkt = NULL; + for (i = bkt->debug_head_r+1; i > 0; --i) { + mb = &debug_memblocks[i-1]; + if (mb->size == 0) continue; + if ((void*)(mb->ptr) == ptr) { + assert(mb->size == sz && "[pkmem.h] incorrect free size"); + mb->ptr = NULL; + mb->size = 0; found = true; - if (i == (debug_alloc_head - 1)) { - debug_alloc_head--; + if (i <= bkt->debug_head_l) { + bkt->debug_head_l = i-1; } + if (i == bkt->debug_head_r+1) { + if (bkt->debug_head_l == bkt->debug_head_r) { + bkt->debug_head_l--; + } + bkt->debug_head_r--; + } + assert(bkt->debug_head_l <= bkt->debug_head_r); break; } } } - mtx_unlock(&debug_alloc_mtx); assert(found && "[pkmem.h] double free or invalid ptr"); #endif - bkt->allocs--; - if (bkt->allocs == 0) { + bkt->alloc_count--; + if (bkt->alloc_count == 0) { bkt->head = 0; - bkt->lastEmptyBlockIndex = 0; - bkt->blocks[0].data = bkt->ptr; - bkt->blocks[0].size = bkt->size - (sizeof(struct pk_memblock) * bkt->maxBlockCount); + bkt->block_head_l = 0; + bkt->block_head_r = 0; + bkt->blocks[0].data = pk_bkt_data(bkt); + bkt->blocks[0].size = pk_bkt_data_sz(bkt); +#ifdef PK_MEMORY_DEBUGGER + bkt->debug_head_l = 0; + bkt->debug_head_r = 0; + bkt->debug_blocks[0].data = NULL; + bkt->debug_blocks[0].size = 0; +#endif mtx_unlock(&bkt->mtx); return; } char* afterPtr = ((char*)(ptr))+sz; - struct pk_memblock* beforeBlk = nullptr; - struct pk_memblock* afterBlk = nullptr; - for (i = bkt->lastEmptyBlockIndex; i > 0; --i) { - if (bkt->blocks[i-1].data + bkt->blocks[i-1].size == ptr) { - beforeBlk = &bkt->blocks[i-1]; + struct pk_memblock* tmp_blk = NULL; + struct pk_memblock* beforeBlk = NULL; + struct pk_memblock* afterBlk = NULL; + for (i = bkt->block_head_r+1; i > 0 ; --i) { + k = pk_memblock_blocks_idx(bkt, i-2); + tmp_blk = &bkt->blocks[k]; + if (tmp_blk->data + tmp_blk->size == ptr) { + beforeBlk = tmp_blk; } - if (bkt->blocks[i].data == afterPtr) { - afterBlk = &bkt->blocks[i]; + tmp_blk -= 1; + if (i <= bkt->block_head_r+1 && tmp_blk->data == afterPtr) { + afterBlk = tmp_blk; break; } - if (bkt->blocks[i-1].data < (char*)ptr) { + tmp_blk += 1; + if (tmp_blk->data < (char*)ptr) { break; } } - if (ptr == bkt->ptr && afterBlk == nullptr && bkt->blocks[0].data == afterPtr) { - afterBlk = &bkt->blocks[0]; + if (ptr == &bkt->data[0] && afterBlk == NULL && bkt->blocks[pk_memblock_blocks_idx(bkt, 0)].data == afterPtr) { + afterBlk = &bkt->blocks[pk_memblock_blocks_idx(bkt, 0)]; } - if (afterBlk != nullptr && afterBlk->data == bkt->ptr + bkt->head) { + if (afterBlk != NULL && afterBlk->data == pk_bkt_head(bkt)) { bkt->head -= sz; - if (beforeBlk != nullptr) { + if (beforeBlk != NULL) { bkt->head -= beforeBlk->size; } } - if (beforeBlk == nullptr && afterBlk == nullptr) { + if (beforeBlk == NULL && afterBlk == NULL) { struct pk_memblock newBlock; memset(&newBlock, 0, sizeof(struct pk_memblock)); newBlock.data = (char*)ptr; newBlock.size = sz; pk_bucket_insert_block(bkt, &newBlock); - } else if (beforeBlk != nullptr && afterBlk != nullptr) { + } else if (beforeBlk != NULL && afterBlk != NULL) { beforeBlk->size += sz + afterBlk->size; + if (beforeBlk->data == pk_bkt_head(bkt)) { + bkt->block_head_r--; + } afterBlk->size = 0; - } else if (beforeBlk != nullptr) { + afterBlk->data = NULL; + } else if (beforeBlk != NULL) { beforeBlk->size += sz; - } else if (afterBlk != nullptr) { + } else if (afterBlk != NULL) { afterBlk->data -= sz; afterBlk->size += sz; } - pk_bucket_collapse_empty_blocks(bkt); + pk_bucket_collapse_blocks(bkt); #ifdef PK_MEMORY_DEBUGGER - if (!bkt->transient) { + if (PK_HAS_FLAG(bkt->flags, PK_MEMBUCKET_FLAG_TRANSIENT) == false) { int64_t debug_tracked_alloc_size = 0; - int64_t debug_bucket_alloc_size = bkt->size - (sizeof(struct pk_memblock) * bkt->maxBlockCount); - for (i = 0; i < debug_alloc_head; ++i) { - if (debug_all_allocs[i].bkt != bkt) continue; - debug_tracked_alloc_size += debug_all_allocs[i].blk.size; + int64_t debug_bucket_alloc_size = pk_bkt_data_sz(bkt); + for (i = 0; i < bkt->debug_head_r; ++i) { + debug_tracked_alloc_size += bkt->debug_blocks[i].size; } - for (i = 0; i <= bkt->lastEmptyBlockIndex; ++i) { - debug_bucket_alloc_size -= bkt->blocks[i].size; + for (i = 0; i <= bkt->block_head_r; ++i) { + k = pk_memblock_blocks_idx(bkt, i); + debug_bucket_alloc_size -= bkt->blocks[k].size; } assert(debug_tracked_alloc_size == debug_bucket_alloc_size && "allocation size mismatch!"); } @@ -1264,13 +1316,7 @@ pk_delete_bkt(const void* ptr, size_t sz, struct pk_membucket* bkt) void pk_delete_base(const void* ptr, size_t sz) { - struct pk_membucket* bkt = nullptr; - for (size_t i = 0; i < pk_bucket_head; ++i) { - bkt = &pk_buckets[i]; - if (ptr >= bkt->raw && (char*)ptr < bkt->ptr + bkt->size) break; - } - assert(bkt != nullptr && "failed to determine correct memory bucket"); - pk_delete_bkt(ptr, sz, bkt); + pk_delete_bkt(ptr, sz, client_bucket); } void @@ -1795,6 +1841,8 @@ void pk_arr_append_t(pk_arr_t<T> *arr, const T &item) { #ifdef PK_IMPL_ARR +#include <string.h> + #ifndef PK_ARR_GROW_RATIO #define PK_ARR_GROW_RATIO 1.5 #endif @@ -2613,18 +2661,17 @@ struct pk_bkt_arr_handle pk_bkt_arr_handle_decrement(struct pk_bkt_arr *arr, str #if defined (__cplusplus) #include <assert.h> -#include <future> template<typename T> struct pk_bkt_arr_t : public pk_bkt_arr { pk_bkt_arr_t(); pk_bkt_arr_t(struct pk_bkt_arr_handle limits, struct pk_membucket *bkt_buckets = nullptr, struct pk_membucket *bkt_data = nullptr); ~pk_bkt_arr_t(); T &operator[](struct pk_bkt_arr_handle); + using FN_Iter = pk_tmpln_1<void, T*, void*>; + using FN_Find = pk_tmpln_2<bool, const T*, const T*, const void*, const void*>; }; template<typename T> -pk_bkt_arr_t<T>::pk_bkt_arr_t() { - pk_bkt_arr_init(this, sizeof(T), alignof(T), {PK_BKT_ARR_HANDLE_B_MAX, PK_BKT_ARR_HANDLE_I_MAX}, nullptr, nullptr); -} +pk_bkt_arr_t<T>::pk_bkt_arr_t() { } template<typename T> pk_bkt_arr_t<T>::pk_bkt_arr_t(struct pk_bkt_arr_handle limits, struct pk_membucket *bkt_buckets, struct pk_membucket *bkt_data) { pk_bkt_arr_init(this, sizeof(T), alignof(T), limits, bkt_buckets, bkt_data); @@ -2856,39 +2903,314 @@ struct pk_bkt_arr_handle pk_bkt_arr_handle_decrement(struct pk_bkt_arr *arr, str } #endif /* PK_IMPL_BKTARR */ -#ifndef PK_PKTMPLN_H -#define PK_PKTMPLN_H +#ifndef PK_PKFUNCINSTR_H +#define PK_PKFUNCINSTR_H -#if defined (__cplusplus) -#include <functional> -template<typename Ret, typename A1, typename B1> -struct pk_tmpln_1 { - using FuncType = std::function<Ret(A1)>; - FuncType func; - static Ret invoke(void *ptr, B1 b1) { - auto *self = static_cast<pk_tmpln_1*>(ptr); - return self->func(reinterpret_cast<A1>(b1)); - } + +#define PK_FUNCINSTR_CHILDREN_INCREMENT_COUNT 8 + +struct pk_funcinstr; +struct pk_funcinstr { + void *fn; + struct pk_tmr tmr; + struct pk_funcinstr *parent; + struct pk_funcinstr *first_child; + struct pk_funcinstr **children; + size_t n_children; + size_t r_children; }; -template<typename Ret, typename A1, typename A2, typename B1, typename B2> -struct pk_tmpln_2 { - using FuncType = std::function<Ret(A1, A2)>; - FuncType func; - static Ret invoke(void *ptr, B1 b1, B2 b2) { - auto *self = static_cast<pk_tmpln_2*>(ptr); - return self->func(reinterpret_cast<A1>(b1), reinterpret_cast<A2>(b2)); - } + +void pk_funcinstr_init(); +void pk_funcinstr_teardown(); + +#if defined(__cplusplus) +extern "C" { +#endif + +#if defined(__clang__) +// clang +#elif defined(__GNUC__) || defined(__GNUG__) + +void __cyg_profile_func_enter(void* this_fn, void* call_site); +void __cyg_profile_func_exit(void* this_fn, void* call_site); + +#else +// other +#endif + +#if defined(__cplusplus) +} // extern "C" +#endif + +#endif /* PK_PKFUNCINSTR_H */ +#if defined(PK_IMPL_FUNCINSTR) + +#include <assert.h> +#include <stdio.h> +#include <threads.h> +#include <string.h> + +// TODO 2025-06-02 JCB +// There's some speed improvements that can be made here by growing faster. +// OR use some type of bucket +// - might be a good chance to isolate some of the pkmem logic + +#define PK_FUNCINSTR_BKT_START_COUNT 4 +#define PK_FUNCINSTR_BKT_GROW_AMOUNT 4 +#define PK_FUNCINSTR_BKT_DATA_COUNT 255 +struct pk_funcinstr_bkt { + uint8_t used_count; + uint8_t guard_enter; + uint8_t guard_exit; + struct timespec reset_time; + struct pk_funcinstr data[PK_FUNCINSTR_BKT_DATA_COUNT+1]; }; -template<typename Ret, typename A1, typename A2, typename A3, typename B1, typename B2, typename B3> -struct pk_tmpln_3 { - using FuncType = std::function<Ret(A1, A2, A3)>; - FuncType func; - static Ret invoke(void *ptr, B1 b1, B2 b2, B3 b3) { - auto *self = static_cast<pk_tmpln_3*>(ptr); - return self->func(reinterpret_cast<A1>(b1), reinterpret_cast<A2>(b2), reinterpret_cast<A3>(b3)); - } +struct pk_funcinstr_mstr { + mtx_t mtx; + struct timespec reset_time; + struct pk_funcinstr_bkt **buckets; + size_t r_buckets; + size_t n_buckets; }; +// if NULL, get a new bucket (or alloc if full). if !NULL, existing thread +static thread_local struct pk_funcinstr_bkt *pk_funcinstr_thrd_bkt = NULL; +// last function call (should be NULL or parent of current) +static thread_local struct pk_funcinstr *pk_funcinstr_thrd_instr = NULL; +static struct pk_funcinstr_mstr thrd_mstr; + +__attribute__((no_instrument_function)) +void pk_funcinstr_init() { + assert(thrd_mstr.reset_time.tv_sec == 0); + assert(thrd_mstr.reset_time.tv_nsec == 0); + assert(thrd_mstr.buckets == NULL); + assert(thrd_mstr.r_buckets == 0); + assert(thrd_mstr.n_buckets == 0); + mtx_init(&thrd_mstr.mtx, mtx_plain); + thrd_mstr.r_buckets = PK_FUNCINSTR_BKT_START_COUNT; + thrd_mstr.buckets = (struct pk_funcinstr_bkt**)aligned_alloc(alignof(struct pk_funcinstr_bkt *), (sizeof(struct pk_funcinstr_bkt *) * PK_FUNCINSTR_BKT_START_COUNT)); + memset(thrd_mstr.buckets, 0, (sizeof(struct pk_funcinstr_bkt *) * PK_FUNCINSTR_BKT_START_COUNT)); + clock_gettime(PK_TMR_CLOCK, &thrd_mstr.reset_time); +} + +__attribute__((no_instrument_function)) +void pk_funcinstr_teardown() { + size_t i, k; + mtx_lock(&thrd_mstr.mtx); + for (i = 0; i < thrd_mstr.n_buckets; ++i) { + struct pk_funcinstr_bkt *bkt = thrd_mstr.buckets[i]; + for (k = 0; k < bkt->used_count; ++k) { + free(bkt->data[k].children); + } + } + free(thrd_mstr.buckets); + thrd_mstr.reset_time.tv_sec = 0; + thrd_mstr.reset_time.tv_nsec = 0; + thrd_mstr.buckets = NULL; + thrd_mstr.r_buckets = 0; + thrd_mstr.n_buckets = 0; + mtx_unlock(&thrd_mstr.mtx); + mtx_destroy(&thrd_mstr.mtx); +} + +#if defined(__clang__) +// TODO clang XRay +// Come up with pk macros since XRay requires attributes to instrument? +#elif defined(__GNUC__) || defined(__GNUG__) + +#ifndef __USE_GNU + #define __USE_GNU #endif +#if defined(__cplusplus) +#include <cxxabi.h> +#endif +#include <dlfcn.h> +#include <link.h> +#include <string.h> -#endif /* PK_PKTMPLN_H */ +__attribute__((no_instrument_function)) +bool pk_funcinstr_detect_not_initialized() { + if (thrd_mstr.buckets == NULL) return true; + if (thrd_mstr.r_buckets == 0) return true; + return false; +} + +__attribute__((no_instrument_function)) +void pk_funcinstr_detect_and_handle_reset() { + bool should_hard_reset = false; + bool should_reset = pk_funcinstr_thrd_bkt == NULL; + if (pk_funcinstr_thrd_bkt != NULL) { + should_reset = pk_funcinstr_thrd_bkt->used_count == PK_FUNCINSTR_BKT_DATA_COUNT; + should_hard_reset = thrd_mstr.reset_time.tv_sec > pk_funcinstr_thrd_bkt->reset_time.tv_sec; + should_hard_reset = should_hard_reset || (thrd_mstr.reset_time.tv_sec == pk_funcinstr_thrd_bkt->reset_time.tv_sec && thrd_mstr.reset_time.tv_nsec > pk_funcinstr_thrd_bkt->reset_time.tv_nsec); + } + if (should_hard_reset) { + pk_funcinstr_thrd_bkt = NULL; + pk_funcinstr_thrd_instr = NULL; + should_reset = true; + } + if (should_reset) { + if (thrd_mstr.n_buckets == thrd_mstr.r_buckets) { + mtx_lock(&thrd_mstr.mtx); + thrd_mstr.r_buckets += PK_FUNCINSTR_BKT_GROW_AMOUNT; + struct pk_funcinstr_bkt **buckets = (struct pk_funcinstr_bkt**)aligned_alloc(alignof(void *), sizeof(void *) * thrd_mstr.r_buckets); + memcpy(buckets, thrd_mstr.buckets, sizeof(void *) * (thrd_mstr.n_buckets)); + memset((char*)buckets + (sizeof(void *) * (thrd_mstr.n_buckets)), 0, (sizeof(void *) * thrd_mstr.r_buckets) - sizeof(void *) * (thrd_mstr.n_buckets)); + free(thrd_mstr.buckets); + thrd_mstr.buckets = buckets; + mtx_unlock(&thrd_mstr.mtx); + } + struct pk_funcinstr_bkt *bkt = (struct pk_funcinstr_bkt *)aligned_alloc(alignof(struct pk_funcinstr_bkt), sizeof(struct pk_funcinstr_bkt)); + bkt->used_count = 0; + bkt->guard_enter = 0; + bkt->guard_exit = 0; + bkt->reset_time.tv_sec = 0; + bkt->reset_time.tv_nsec = 0; + if (pk_funcinstr_thrd_bkt != NULL) { + pk_funcinstr_thrd_bkt->guard_enter = 0; + pk_funcinstr_thrd_bkt->guard_exit = 0; + } + pk_funcinstr_thrd_bkt = bkt; + mtx_lock(&thrd_mstr.mtx); + thrd_mstr.buckets[thrd_mstr.n_buckets++] = bkt; + mtx_unlock(&thrd_mstr.mtx); + clock_gettime(PK_TMR_CLOCK, &pk_funcinstr_thrd_bkt->reset_time); + } +} + +__attribute__((no_instrument_function)) +bool pk_funcinstr_should_early_exit() { + if (pk_funcinstr_thrd_bkt->guard_enter != 0) return true; + if (pk_funcinstr_thrd_bkt->guard_exit != 0) return true; + return false; +} + +__attribute__((no_instrument_function)) +struct pk_funcinstr *pk_funcinstr_create_funcinstr(void *this_fn) { + struct pk_funcinstr *funcinstr = &pk_funcinstr_thrd_bkt->data[pk_funcinstr_thrd_bkt->used_count]; + pk_funcinstr_thrd_bkt->used_count++; + pk_tmr_start(funcinstr->tmr); + funcinstr->fn = this_fn; + funcinstr->parent = pk_funcinstr_thrd_instr; + funcinstr->children = NULL; + funcinstr->first_child = NULL; + funcinstr->n_children = 0; + funcinstr->r_children = 0; + + if (pk_funcinstr_thrd_instr != NULL) { + if (pk_funcinstr_thrd_instr->first_child == NULL) { + // avoid an malloc if n_children will only == 1 + pk_funcinstr_thrd_instr->first_child = funcinstr; + } else { + if (pk_funcinstr_thrd_instr->n_children == pk_funcinstr_thrd_instr->r_children) { + pk_funcinstr_thrd_instr->r_children += PK_FUNCINSTR_CHILDREN_INCREMENT_COUNT; + struct pk_funcinstr **children = (struct pk_funcinstr **)aligned_alloc(alignof(void *), sizeof(void *) * pk_funcinstr_thrd_instr->r_children); + memset((char*)children + (sizeof(void *) * (pk_funcinstr_thrd_instr->n_children)), 0, (sizeof(void *) * pk_funcinstr_thrd_instr->r_children) - sizeof(void *) * (pk_funcinstr_thrd_instr->n_children)); + if (pk_funcinstr_thrd_instr->children != NULL) { + memcpy(children, pk_funcinstr_thrd_instr->children, sizeof(void *) * pk_funcinstr_thrd_instr->n_children); + free(pk_funcinstr_thrd_instr->children); + } + pk_funcinstr_thrd_instr->children = children; + if (pk_funcinstr_thrd_instr->n_children == 0) { + pk_funcinstr_thrd_instr->children[0] = pk_funcinstr_thrd_instr->first_child; + pk_funcinstr_thrd_instr->n_children++; + } + } + pk_funcinstr_thrd_instr->children[pk_funcinstr_thrd_instr->n_children] = funcinstr; + pk_funcinstr_thrd_instr->n_children++; + } + } + return funcinstr; +} + +__attribute__((no_instrument_function)) +void __cyg_profile_func_enter(void* this_fn, void* call_site) { + (void)call_site; + if (pk_funcinstr_detect_not_initialized()) return; + pk_funcinstr_detect_and_handle_reset(); + if (pk_funcinstr_should_early_exit()) return; + pk_funcinstr_thrd_bkt->guard_enter++; + + pk_funcinstr_thrd_instr = pk_funcinstr_create_funcinstr(this_fn); + + pk_funcinstr_thrd_bkt->guard_enter = 0; +} + +__attribute__((no_instrument_function)) +void __cyg_profile_func_exit(void* this_fn, void* call_site) { + (void)call_site; + if (pk_funcinstr_detect_not_initialized()) return; + pk_funcinstr_detect_and_handle_reset(); + if (pk_funcinstr_should_early_exit()) return; + if (pk_funcinstr_thrd_instr == NULL) return; // exit called before enter? + pk_funcinstr_thrd_bkt->guard_exit++; + + Dl_info info; + + if (this_fn != pk_funcinstr_thrd_instr->fn) { + int64_t i = (int64_t)pk_funcinstr_thrd_bkt->used_count - 1; + for (; i > -1; --i) { + if (pk_funcinstr_thrd_bkt->data[i].fn == this_fn) { + if (pk_funcinstr_thrd_bkt->data[i].tmr.e.tv_sec == 0) { + pk_funcinstr_thrd_instr = &pk_funcinstr_thrd_bkt->data[i]; + break; + } + } + } + } + if (this_fn != pk_funcinstr_thrd_instr->fn) { + if (pk_funcinstr_thrd_instr->parent == NULL) { + struct pk_tmr tmr = pk_funcinstr_thrd_instr->tmr; + pk_funcinstr_thrd_instr = pk_funcinstr_create_funcinstr(this_fn); + pk_funcinstr_thrd_instr->tmr = tmr; + fprintf(stdout, "[pkfuncinstr] func mismatch; Parent func? Duration not accurate."); + } else { + fprintf(stderr, "[pkfuncinstr] func mismatch. Last: '"); + if (dladdr(pk_funcinstr_thrd_instr->fn, &info) != 0) { + fprintf(stderr, "%s", info.dli_sname); + } else { + fprintf(stderr, "(unknown)"); + } + fprintf(stderr, "'. Current: '"); + if (dladdr(this_fn, &info) != 0) { + fprintf(stderr, "%s'.\n", info.dli_sname); + } else { + fprintf(stderr, "(unknown)'.\n"); + } + pk_funcinstr_thrd_bkt->guard_exit=0; + return; + } + } + + pk_tmr_stop(pk_funcinstr_thrd_instr->tmr); + if (dladdr(this_fn, &info) != 0) { + int depth = 0; + // TODO track depth in a better way + struct pk_funcinstr *p = pk_funcinstr_thrd_instr->parent; + while (p != NULL) { + depth += 1; + p = p->parent; + } + char *demangled = NULL; + if (info.dli_sname != NULL) { +#if defined(__cplusplus) + demangled = abi::__cxa_demangle(info.dli_sname, NULL, NULL, NULL); +#endif + } + fprintf(stdout, "[pkfuncinstr] %p %*s %s took %.6f ms\n" + ,this_fn + ,depth, "" + ,demangled != NULL ? demangled : info.dli_sname != NULL ? info.dli_sname : "???" + ,pk_tmr_duration_dbl_mili(pk_funcinstr_thrd_instr->tmr) + ); + if (demangled != NULL) free(demangled); + } + pk_funcinstr_thrd_bkt->guard_exit=0; + pk_funcinstr_thrd_instr = pk_funcinstr_thrd_instr->parent; +} + +#else +// other +#endif + +#endif /* PK_IMPL_FUNCINSTR */ #endif /* PK_SINGLE_HEADER_FILE_H */ diff --git a/src/plugins.cpp b/src/plugins.cpp index 5ec86af..010c20e 100644 --- a/src/plugins.cpp +++ b/src/plugins.cpp @@ -18,7 +18,7 @@ pk_arr_t<PkeCallback> pkePluginCallbacks; pk_arr_t<CallbackSignature> sortedSignatures; void PkePlugin_Load(const char *path) { - if (path == nullptr || path == CAFE_BABE(void)) { + if (path == nullptr || path == CAFE_BABE(const char)) { return; } void *extension = dlopen(path, RTLD_NOW); diff --git a/src/scene.cpp b/src/scene.cpp index 30ef53a..ad6b502 100644 --- a/src/scene.cpp +++ b/src/scene.cpp @@ -15,7 +15,7 @@ struct pke_scene_master { } scene_mstr; void pke_scene_master_init() { - new (&scene_mstr.bc) pk_bkt_arr_t<pke_scene>{}; + new (&scene_mstr.bc) pk_bkt_arr_t<pke_scene>{ pk_bkt_arr_handle_MAX_constexpr }; } void pke_scene_master_teardown() { @@ -60,7 +60,7 @@ struct pke_scene *pke_scene_get_by_path(const char *file_path) { fprintf(stderr, "[pke_scene_get_by_name] Scene not found in memory nor filesystem: '%s'\n", file_path); return nullptr; } - srlztn_deserialize_helper *h = pke_deserialize_init(pkeSettings.mem.bkt); + srlztn_deserialize_helper *h = pke_deserialize_init(pkeSettings.mem_bkt.game_transient); h->scene = pke_scene_create(file_path); pke_deserialize_scene_from_stream(f, h); pke_deserialize_teardown(h); diff --git a/src/static-ui.cpp b/src/static-ui.cpp index 1eca4e4..34887d9 100644 --- a/src/static-ui.cpp +++ b/src/static-ui.cpp @@ -35,7 +35,7 @@ struct pke_ui_master { } pke_ui_master; void pke_ui_init() { - pke_ui_master.bkt = pk_bucket_create("pke ui", PK_DEFAULT_BUCKET_SIZE, false); + pke_ui_master.bkt = pk_mem_bucket_create("pke ui", PK_MEM_DEFAULT_BUCKET_SIZE, PK_MEMBUCKET_FLAG_NONE); pke_ui_master.root_boxes = pk_new<pke_ui_box*>(1, pke_ui_master.bkt); pke_ui_master.h_root_boxes = 0; pke_ui_master.r_root_boxes = 1; @@ -399,8 +399,8 @@ void pke_ui_teardown() { for (pke_ui_box_count_T i = 0; i < pke_ui_master.h_root_boxes; ++i) { pke_ui_teardown_box_recursive(pke_ui_master.root_boxes[i]); } - pk_delete<pke_ui_box *>(pke_ui_master.root_boxes, pke_ui_master.r_root_boxes); - pk_bucket_destroy(pke_ui_master.bkt); + pk_delete<pke_ui_box *>(pke_ui_master.root_boxes, pke_ui_master.r_root_boxes, pke_ui_master.bkt); + pk_mem_bucket_destroy(pke_ui_master.bkt); pke_ui_master.bkt = nullptr; pke_ui_master.root_boxes = nullptr; pke_ui_master.h_root_boxes = 0; diff --git a/src/thread-pool.cpp b/src/thread-pool.cpp index 5bd61c5..f110e42 100644 --- a/src/thread-pool.cpp +++ b/src/thread-pool.cpp @@ -123,7 +123,7 @@ void inline PkeThreads_Shutdown_Inner(ThreadPool &tp) { } void PkeThreads_Init() { - new (&thrdpl_mstr.bc) pk_bkt_arr_t<ThreadPool>; + new (&thrdpl_mstr.bc) pk_bkt_arr_t<ThreadPool>{ pk_bkt_arr_handle_MAX_constexpr }; } ThreadPoolHandle PkeThreads_Init(uint8_t threadCount, uint8_t maxQueueCount, struct pk_membucket *bkt) { @@ -165,7 +165,7 @@ bool PkeThreads_Enqueue(ThreadPoolHandle handle, std::packaged_task<void()> *job * Technically speaking, if we call the right pk_delete * we don't even need to worry about passing the struct pk_membucket */ - assert(pk_memory_is_in_bucket(job, tp->bkt) == true && "cannot enqueue packaged task from a non-matching struct pk_membucket"); + assert(pk_mem_bucket_ptr_is_in_mem_bucket(job, tp->bkt) == true && "cannot enqueue packaged task from a non-matching struct pk_membucket"); } return PkeThreads_Enqueue_Inner(*tp, job); diff --git a/src/window.cpp b/src/window.cpp index 6753cfa..dd871f6 100644 --- a/src/window.cpp +++ b/src/window.cpp @@ -289,7 +289,7 @@ void pkvk_buffer_create(pkvk_buffer_create_data *data, pkvk_buffer_create_data_o memset(&arr_vert_mem_reqs, 0, sizeof(pk_arr)); arr_vert_mem_reqs.alignment = alignof(VkMemoryRequirements); arr_vert_mem_reqs.stride = sizeof(VkMemoryRequirements); - arr_vert_mem_reqs.bkt = pkeSettings.mem.bkt; + arr_vert_mem_reqs.bkt = pkeSettings.mem_bkt.game_transient; /* * buffer setup @@ -636,7 +636,7 @@ unsigned int FindQueueFamilyIndex(VkPhysicalDevice device, char hasPresentSuppor unsigned int queueFamilyPropertyCount = 0U; vkGetPhysicalDeviceQueueFamilyProperties(device, &queueFamilyPropertyCount, nullptr); - auto *queueFamilyProperties = pk_new<VkQueueFamilyProperties>(queueFamilyPropertyCount, pkeSettings.mem.bkt); + auto *queueFamilyProperties = pk_new<VkQueueFamilyProperties>(queueFamilyPropertyCount, pkeSettings.mem_bkt.game_transient); vkGetPhysicalDeviceQueueFamilyProperties(device, &queueFamilyPropertyCount, queueFamilyProperties); for (unsigned int i = 0; i < queueFamilyPropertyCount; i++) { @@ -761,7 +761,7 @@ void InitVulkan() { unsigned int layerCount; vkResult = vkEnumerateInstanceLayerProperties(&layerCount, nullptr); assert(vkResult == VK_SUCCESS); - VkLayerProperties *availableLayerProperties = pk_new<VkLayerProperties>(layerCount, pkeSettings.mem.bkt); + VkLayerProperties *availableLayerProperties = pk_new<VkLayerProperties>(layerCount, pkeSettings.mem_bkt.game_transient); vkResult = vkEnumerateInstanceLayerProperties(&layerCount, availableLayerProperties); assert(vkResult == VK_SUCCESS); printf("Available Layers:\n"); @@ -837,7 +837,7 @@ void InitVulkan() { unsigned int extensionCount = 0; vkResult = vkEnumerateInstanceExtensionProperties(nullptr, &extensionCount, nullptr); assert(vkResult == VK_SUCCESS); - auto *extensions = pk_new<VkExtensionProperties>(extensionCount, pkeSettings.mem.bkt); + auto *extensions = pk_new<VkExtensionProperties>(extensionCount, pkeSettings.mem_bkt.game_transient); vkResult = vkEnumerateInstanceExtensionProperties(nullptr, &extensionCount, extensions); assert(vkResult == VK_SUCCESS); printf("Available Extensions:\n"); @@ -883,7 +883,7 @@ void InitVulkan() { vkResult = vkEnumeratePhysicalDevices(vkInstance, &physicalDeviceCount, nullptr); assert(vkResult == VK_SUCCESS); assert(physicalDeviceCount > 0); - auto *physicalDevices = pk_new<VkPhysicalDevice>(physicalDeviceCount, pkeSettings.mem.bkt); + auto *physicalDevices = pk_new<VkPhysicalDevice>(physicalDeviceCount, pkeSettings.mem_bkt.game_transient); vkResult = vkEnumeratePhysicalDevices(vkInstance, &physicalDeviceCount, physicalDevices); assert(vkResult == VK_SUCCESS); graphicsFamilyIndex = 0; @@ -903,7 +903,7 @@ void InitVulkan() { unsigned int extensionCount = 0; vkResult = vkEnumerateDeviceExtensionProperties(device, nullptr, &extensionCount, nullptr); assert(vkResult == VK_SUCCESS); - auto *extensionProperties = pk_new<VkExtensionProperties>(extensionCount, pkeSettings.mem.bkt); + auto *extensionProperties = pk_new<VkExtensionProperties>(extensionCount, pkeSettings.mem_bkt.game_transient); vkResult = vkEnumerateDeviceExtensionProperties(device, nullptr, &extensionCount, extensionProperties); assert(vkResult == VK_SUCCESS); for (long k = 0; k < extensionCount; ++k) { @@ -1160,7 +1160,7 @@ void CreateSwapchain() { unsigned int surfaceFormatCounts; vkResult = vkGetPhysicalDeviceSurfaceFormatsKHR(vkPhysicalDevice, vkSurfaceKHR, &surfaceFormatCounts, nullptr); assert(vkResult == VK_SUCCESS); - VkSurfaceFormatKHR *surfaceFormats = pk_new<VkSurfaceFormatKHR>(surfaceFormatCounts, pkeSettings.mem.bkt); + VkSurfaceFormatKHR *surfaceFormats = pk_new<VkSurfaceFormatKHR>(surfaceFormatCounts, pkeSettings.mem_bkt.game_transient); vkResult = vkGetPhysicalDeviceSurfaceFormatsKHR(vkPhysicalDevice, vkSurfaceKHR, &surfaceFormatCounts, surfaceFormats); assert(vkResult == VK_SUCCESS); selectedSurfaceIndex = 0; @@ -1186,7 +1186,7 @@ void CreateSwapchain() { unsigned int presentModeCount = 0; vkResult = vkGetPhysicalDeviceSurfacePresentModesKHR(vkPhysicalDevice, vkSurfaceKHR, &presentModeCount, nullptr); assert(vkResult == VK_SUCCESS); - VkPresentModeKHR *presentModes = pk_new<VkPresentModeKHR>(presentModeCount, pkeSettings.mem.bkt); + VkPresentModeKHR *presentModes = pk_new<VkPresentModeKHR>(presentModeCount, pkeSettings.mem_bkt.game_transient); vkResult = vkGetPhysicalDeviceSurfacePresentModesKHR(vkPhysicalDevice, vkSurfaceKHR, &presentModeCount, presentModes); assert(vkResult == VK_SUCCESS); vkPresentModeKHR = PickPresentMode(presentModes, presentModeCount); @@ -1335,8 +1335,8 @@ void CreateSwapchain() { } void UpdatePresentDescriptorSets() { - VkWriteDescriptorSet *writeDescriptorSets = pk_new<VkWriteDescriptorSet>(swapchainLength * 2, pkeSettings.mem.bkt); - VkDescriptorImageInfo *descriptorImageInfo = pk_new<VkDescriptorImageInfo>(swapchainLength * 2, pkeSettings.mem.bkt); + VkWriteDescriptorSet *writeDescriptorSets = pk_new<VkWriteDescriptorSet>(swapchainLength * 2, pkeSettings.mem_bkt.game_transient); + VkDescriptorImageInfo *descriptorImageInfo = pk_new<VkDescriptorImageInfo>(swapchainLength * 2, pkeSettings.mem_bkt.game_transient); for (long i = 0; i < swapchainLength * 2; ++i) { writeDescriptorSets[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; writeDescriptorSets[i].pNext = nullptr; @@ -1725,7 +1725,7 @@ void CreatePresentPipeline() { vkResult = vkCreateDescriptorPool(vkDevice, &descriptorPoolCreateInfo, vkAllocator, &pkvk_present.descriptor_pool); assert(vkResult == VK_SUCCESS); - VkDescriptorSetLayout *setLayouts = pk_new<VkDescriptorSetLayout>(swapchainLength * 2, pkeSettings.mem.bkt); + VkDescriptorSetLayout *setLayouts = pk_new<VkDescriptorSetLayout>(swapchainLength * 2, pkeSettings.mem_bkt.game_transient); for (long i = 0; i < swapchainLength * 2; ++i) { setLayouts[i] = pkvk_present.descriptor_set_layout; } @@ -2973,7 +2973,7 @@ void CreateDebugDescriptors() { auto vkResult = vkCreateDescriptorPool(vkDevice, &vkDescriptorPoolCreateInfo, vkAllocator, &pkeDebugHitbox.vkDescriptorPool); assert(vkResult == VK_SUCCESS); - VkDescriptorSetLayout *descriptorSets = pk_new<VkDescriptorSetLayout>(swapchainLength, pkeSettings.mem.bkt); + VkDescriptorSetLayout *descriptorSets = pk_new<VkDescriptorSetLayout>(swapchainLength, pkeSettings.mem_bkt.game_transient); for (long i = 0; i < swapchainLength; ++i) { descriptorSets[i] = pkePipelines.descr_layouts.named.ubo_txtr; } @@ -2994,7 +2994,7 @@ void CreateDebugDescriptors() { } void UpdateDebugDescriptors() { - VkWriteDescriptorSet *writeDescriptorSets = pk_new<VkWriteDescriptorSet>(2 * swapchainLength, pkeSettings.mem.bkt); + VkWriteDescriptorSet *writeDescriptorSets = pk_new<VkWriteDescriptorSet>(2 * swapchainLength, pkeSettings.mem_bkt.game_transient); for (long i = 0; i < 2 * swapchainLength; ++i) { writeDescriptorSets[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; writeDescriptorSets[i].pNext = nullptr; @@ -3014,7 +3014,7 @@ void UpdateDebugDescriptors() { textureDescriptorInfo.imageView = pkeDebugHitbox.vkImageView; textureDescriptorInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - VkDescriptorBufferInfo *vkDescriptorBufferInfo = pk_new<VkDescriptorBufferInfo>(swapchainLength, pkeSettings.mem.bkt); + VkDescriptorBufferInfo *vkDescriptorBufferInfo = pk_new<VkDescriptorBufferInfo>(swapchainLength, pkeSettings.mem_bkt.game_transient); for (long i = 0; i < swapchainLength; ++i) { vkDescriptorBufferInfo[i].buffer = UniformBuffers[i]; @@ -3547,7 +3547,7 @@ void CreateWindow(PKEWindowProperties wp) { if (vkInstance != nullptr) return; pke_ev_mgr_id_window = pk_ev_create_mgr(); pke_ev_id_framebuffer_length_changed = pk_ev_register_ev(pke_ev_mgr_id_window, NULL); - MemBkt_Vulkan = pk_bucket_create("vulkan", PK_DEFAULT_BUCKET_SIZE, false); + MemBkt_Vulkan = pk_mem_bucket_create("vulkan", PK_MEM_DEFAULT_BUCKET_SIZE, PK_MEMBUCKET_FLAG_NONE); vulkanAllocs.bkt = MemBkt_Vulkan; pk_arr_reserve(&vulkanAllocs, 2048); glfwInit(); @@ -3652,7 +3652,7 @@ void DestroyWindow() { pk_arr_reset(&vulkanAllocs); // TODO there's un-freed vulkan stuff in the bucket // comment this out to see it in the debug printout - pk_bucket_destroy(MemBkt_Vulkan); + pk_mem_bucket_destroy(MemBkt_Vulkan); } VkShaderModule UploadShader(AssetHandle handle) { |
