1 files changed, 340 insertions, 392 deletions
diff --git a/pkmem.h b/pkmem.h
index 958875c..adfaa36 100644
--- a/pkmem.h
+++ b/pkmem.h
@@ -16,12 +16,13 @@
 #  define PK_MAXIMUM_ALIGNMENT 64
 #endif
 
-struct pk_membucket* pk_bucket_create(const char* description, int64_t sz, bool transient);
+size_t pk_mem_calculate_bkt_size(size_t sz, size_t reserved_block_count);
+struct pk_membucket* pk_bucket_create(const char* description, int64_t sz, enum PK_MEMBUCKET_FLAGS flags);
 void pk_bucket_destroy(struct pk_membucket* bkt);
 void pk_bucket_reset(struct pk_membucket* bkt);
+void pk_bucket_set_client_bucket(struct pk_membucket *bkt);
 
-void pk_memory_debug_print();
-void pk_memory_flush();
+void pk_memory_debug_print(struct pk_membucket *bkt);
 void pk_memory_teardown_all();
 bool pk_memory_is_in_bucket(const void* ptr, const struct pk_membucket* bkt);
 
@@ -41,9 +42,9 @@ static inline void stupid_header_warnings_cpp() { (void)std::is_const<void>::val
 
 template <typename T>
 inline T*
-pk_new(pk_membucket* bucket = nullptr)
+pk_new(pk_membucket* bucket = NULL)
 {
-	void* ptr = nullptr;
+	void* ptr = NULL;
 	if (bucket) {
 		ptr = pk_new_bkt(sizeof(T), alignof(T), bucket);
 	} else {
@@ -57,15 +58,15 @@ pk_new(pk_membucket* bucket = nullptr)
 
 template <typename T>
 inline T*
-pk_new(long count, pk_membucket* bucket = nullptr)
+pk_new(long count, pk_membucket* bucket = NULL)
 {
-	char* ptr = nullptr;
+	char* ptr = NULL;
 	if (bucket) {
 		ptr = static_cast<char*>(pk_new_bkt(sizeof(T) * count, alignof(T), bucket));
 	} else {
 		ptr = static_cast<char*>(pk_new_base(sizeof(T) * count, alignof(T)));
 	}
-	if (ptr == nullptr) return nullptr;
+	if (ptr == NULL) return NULL;
 	if IS_CONSTRUCTIBLE(T) {
 		for (long i = 0; i < count; ++i) {
 			new (ptr + (i * sizeof(T))) T{};
@@ -76,7 +77,7 @@ pk_new(long count, pk_membucket* bucket = nullptr)
 
 template <typename T>
 inline void
-pk_delete(const T* ptr, pk_membucket* bucket = nullptr)
+pk_delete(const T* ptr, pk_membucket* bucket = NULL)
 {
 	if IS_DESTRUCTIBLE(T) {
 		reinterpret_cast<const T*>(ptr)->~T();
@@ -90,7 +91,7 @@ pk_delete(const T* ptr, pk_membucket* bucket = nullptr)
 
 template <typename T>
 inline void
-pk_delete(const T* ptr, long count, pk_membucket* bucket = nullptr)
+pk_delete(const T* ptr, long count, pk_membucket* bucket = NULL)
 {
 	if IS_DESTRUCTIBLE(T) {
 		for (long i = 0; i < count; ++i) {
@@ -118,311 +119,290 @@ pk_delete(const T* ptr, long count, pk_membucket* bucket = nullptr)
 static inline void pkmem_stupid_header_warnings() { (void)stdout; }
 
 #if defined(PK_MEMORY_DEBUGGER)
-/*
- * Note that certain aspects of this expect that you only have one non-transient bucket.
- * If you need to track multiple non-transient buckets, these sections will need a refactor.
- */
 #endif
 
-#ifndef PK_MAX_BUCKET_COUNT
-# define PK_MAX_BUCKET_COUNT 8
-#endif
+#define EXPECTED_PK_MEMBLOCK_SIZE 128
+
+#define pk_memblock_blocks_idx(bkt, idx) ((bkt->block_capacity-1)-(idx))
+#define pk_bkt_data(bkt) ((char*)bkt + EXPECTED_PK_MEMBLOCK_SIZE)
+#define pk_bkt_head(bkt) ((&bkt->data[0]) + bkt->head)
+#define pk_bkt_data_sz(bkt) (size_t)((char*)&bkt->blocks[0] - &bkt->data[0])
 
 struct pk_memblock {
-	char* data;
+	union {
+		char* data;
+		void* ptr;
+	};
 	size_t size;
 };
 
 struct pk_membucket {
-	// the total size of the bucket, `blocks+ptr`
+	// 00
+	mtx_t mtx;
+	// 40
+	// the total size of the bucket, struct+data
 	size_t size;
-	// the current head of the bucket: byte offset from `ptr`.
+	// 48
+	// the current head of the bucket: byte offset from `data`.
 	// All currently alloc'd data is before this offset
 	size_t head;
-	// amount of lost bytes in this membucket, hopefully zero
-	size_t lostBytes;
+	// 56
+	uint32_t block_capacity;
+	// 60
+	uint32_t block_head_l;
+	// 64
+	// this should ALWAYS point to the last block containing unalloced space in bkt
+	uint32_t block_head_r;
+	// 68
 	// the number of active allocations from this bucket
-	size_t allocs;
-	// the index of the last empty block.
-	// Should always point to `pk_memblock{ .data = ptr+head, .size=size-head }`
-	size_t lastEmptyBlockIndex;
-	// number of pk_memblocks in the `*blocks` array
-	size_t maxBlockCount;
-	// ptr to an array of pk_memblock to track ALL free space between ptr and ptr+sz
-	struct pk_memblock* blocks;
+	// -should correlate to blocks that have a sz > 0
+	uint32_t alloc_count;
+	// 72
+	struct pk_memblock *blocks;
+	// 80
+	enum PK_MEMBUCKET_FLAGS flags;
+	// 88
+	const char *description;
+	// 96
+#ifdef PK_MEMORY_DEBUGGER
+	uint32_t debug_bkt_index;
+	// 100
+	uint32_t debug_head_l;
+	// 104
+	uint32_t debug_head_r;
+	// 108
+	char padding[4+(8*1)];
+	// 120
+	size_t debug_block_capacity;
+#else
+	char padding[8*4];
+#endif
+	// 128
 	// starting point for alloc'd data
-	union {
-		char* ptr;
-		void* raw;
-	};
-	const char* description;
-	mtx_t mtx;
-	bool transient;
+	// data[] is illegal in c++ (though it works in gcc/clang, but so does this)
+	char data[1];
 };
 
-static struct pk_membucket pk_buckets[PK_MAX_BUCKET_COUNT];
-static size_t pk_bucket_head = 0;
+static struct pk_membucket *client_bucket = NULL;
 
 #ifdef PK_MEMORY_DEBUGGER
 #include <stdatomic.h>
-struct pk_dbg_memblock {
-	struct pk_memblock blk;
-	struct pk_membucket *bkt;
-};
-static struct pk_dbg_memblock debug_all_allocs[1024 * 1024];
-static atomic_size_t debug_alloc_head_l = 0;
-static atomic_size_t debug_alloc_head_r = 0;
-static atomic_bool has_init_debug = false;
-static mtx_t debug_alloc_mtx;
+static struct pk_memblock *debug_allocs[16];
+static size_t debug_alloc_head = 0;
+static mtx_t debug_mtx;
 #endif
 
+size_t
+pk_mem_calculate_bkt_size(size_t sz, size_t reserved_block_count)
+{
+	size_t base_size = EXPECTED_PK_MEMBLOCK_SIZE + sz + (sizeof(struct pk_memblock) * reserved_block_count);
+	// This trick ensures that our array of pk_memblocks at the end is mem-aligned.
+	// We do, however, still have to do the math when setting the ptr.
+	// Why? the user may have strict memory requirements and didn't call this function.
+	return base_size + (64 - (base_size % 64));
+}
+
 bool
 pk_memory_is_in_bucket(const void* ptr, const struct pk_membucket* bkt)
 {
-	if (ptr >= bkt->raw && (const char*)ptr < bkt->ptr + bkt->size) return true;
-	return false;
+	return (ptr >= (void*)bkt && ptr < (void*)pk_bkt_head(bkt));
 }
 
 void
-pk_memory_debug_print()
+pk_memory_debug_print(struct pk_membucket *bkt)
 {
-	size_t i;
-#ifdef PK_MEMORY_DEBUGGER
-	size_t d;
-	size_t count = 0;
-	size_t dahl = debug_alloc_head_l;
-	size_t dahr = debug_alloc_head_r;
-#endif
-	PK_LOGV_INF("Memory Manager printout:\nBucket count: %li\n", pk_bucket_head);
-	for (i = 0; i < pk_bucket_head; ++i) {
-		PK_LOGV_INF("- bucket #%li\n", i);
-		PK_LOGV_INF("\tdescription:         %s\n", pk_buckets[i].description);
-		PK_LOGV_INF("\tsize:                %li\n", pk_buckets[i].size);
-		PK_LOGV_INF("\thead:                %li\n", pk_buckets[i].head);
-		PK_LOGV_INF("\tlostBytes:           %li\n", pk_buckets[i].lostBytes);
-		PK_LOGV_INF("\tallocs:              %li\n", pk_buckets[i].allocs);
-		PK_LOGV_INF("\tlastEmptyBlockIndex: %li\n", pk_buckets[i].lastEmptyBlockIndex);
-		PK_LOGV_INF("\tmaxBlockCount:       %li\n", pk_buckets[i].maxBlockCount);
-		PK_LOGV_INF("\tblocks:              %p\n", (void *)pk_buckets[i].blocks);
-		PK_LOGV_INF("\tptr:                 %p\n", (void *)pk_buckets[i].ptr);
-		PK_LOGV_INF("\ttransient:           %i\n", pk_buckets[i].transient);
+	PK_LOG_INF("pk_membucket details:\n");
+	PK_LOGV_INF("\tbkt:                 %p\n",  (void *)bkt);
+	PK_LOGV_INF("\tdescription:         %s\n",  bkt->description);
+	PK_LOGV_INF("\tsize:                %lu\n", bkt->size);
+	PK_LOGV_INF("\thead:                %lu\n", bkt->head);
+	PK_LOGV_INF("\tallocs:              %u\n", bkt->alloc_count);
+	PK_LOGV_INF("\tblock head_l:        %u\n", bkt->block_head_l);
+	PK_LOGV_INF("\tblock head_r:        %u\n", bkt->block_head_r);
+	PK_LOGV_INF("\tflags:               %lu\n",  bkt->flags);
 #ifdef PK_MEMORY_DEBUGGER
-		for (d = 0; d < dahr; ++d) {
-			if (debug_all_allocs[d].bkt == &pk_buckets[d] && debug_all_allocs[d].blk.size > 0) {
-				count += 1;
-			}
-		}
-		PK_LOGV_INF("\tdebug alloc count:   %lu\n", count);
-		PK_LOGV_INF("\tdebug alloc head_l:  %lu\n", dahl);
-		PK_LOGV_INF("\tdebug alloc head_r:  %lu\n", dahr);
+	PK_LOGV_INF("\tdebug index:         %u\n", bkt->debug_bkt_index);
+	PK_LOGV_INF("\tdebug alloc head_l:  %u\n", bkt->debug_head_l);
+	PK_LOGV_INF("\tdebug alloc head_r:  %u\n", bkt->debug_head_r);
+	PK_LOGV_INF("\tdebug cappacity:     %lu\n", bkt->debug_block_capacity);
 #endif
-	}
-}
-
-void
-pk_memory_flush()
-{
-	for (long i = pk_bucket_head - 1; i > -1; --i) {
-		if (pk_buckets[i].head != 0) break;
-		if (pk_buckets[i].transient == true) break;
-		pk_bucket_head--;
-		if (pk_buckets[i].raw == CAFE_BABE(void)) continue;
-		pk_bucket_destroy(&pk_buckets[i]);
-	}
 }
 
 void
 pk_memory_teardown_all()
 {
-	for (int64_t i = pk_bucket_head; i > 0; --i) {
-		if (pk_buckets[i - 1].ptr == nullptr) continue;
-		if (pk_buckets[i - 1].ptr == CAFE_BABE(char)) continue;
-		pk_bucket_destroy(&pk_buckets[i - 1]);
-	}
-	pk_bucket_head = 0;
+	client_bucket = NULL;
 #ifdef PK_MEMORY_DEBUGGER
-	debug_alloc_head_l = 0;
-	debug_alloc_head_r = 0;
-	mtx_destroy(&debug_alloc_mtx);
-	has_init_debug = false;
+	mtx_lock(&debug_mtx);
+	for (size_t i = 0; i < debug_alloc_head; ++i) {
+		if (debug_allocs[i] != NULL) {
+			free(debug_allocs[i]);
+		}
+		debug_allocs[i] = NULL;
+	}
+	debug_alloc_head = 0;
+	mtx_unlock(&debug_mtx);
 #endif
 }
 
-static int64_t
-pk_bucket_create_inner(int64_t sz, bool transient, const char* description)
+struct pk_membucket*
+pk_bucket_create(const char* description, int64_t sz, enum PK_MEMBUCKET_FLAGS flags)
 {
-	if (pk_bucket_head >= PK_MAX_BUCKET_COUNT) return -1;
-#ifdef PK_MEMORY_DEBUGGER
-	if (has_init_debug == false) {
-		has_init_debug = true;
-		mtx_init(&debug_alloc_mtx, mtx_plain);
-	}
-#endif
-	int64_t blockCount = sz * 0.01;
-	struct pk_membucket* bkt = &pk_buckets[pk_bucket_head];
+	// 512 example:
+	// [000-127] pk_membucket
+	// [128-191] 64 bytes of data LOL
+	// [192-511] 20 pk_memblocks (20 is worst-case, start 16, 4 per 64 bytes)
+	assert(sz >= 512 && "[pkmem.h] bucket too small to track allocation data");
+	struct pk_membucket* bkt = (struct pk_membucket*)aligned_alloc(64, sz);
+	if (bkt == NULL) return NULL;
+	mtx_init(&bkt->mtx, mtx_plain);
 	bkt->size = sz;
 	bkt->head = 0;
-	bkt->lostBytes = 0;
-	bkt->allocs = 0;
-	bkt->lastEmptyBlockIndex = 0;
-	bkt->maxBlockCount = blockCount < 10 ? 10 : blockCount;
-	bkt->blocks = (struct pk_memblock*)malloc(sz);
-	mtx_init(&bkt->mtx, mtx_plain);
-	assert(bkt->blocks != nullptr && "failed to allocate memory");
-	bkt->ptr = ((char*)(bkt->blocks)) + (sizeof(struct pk_memblock) * bkt->maxBlockCount);
-	size_t misalignment = (size_t)(bkt->ptr) % PK_MAXIMUM_ALIGNMENT;
-	if (misalignment != 0) {
-		size_t moreBlocks = misalignment / sizeof(struct pk_memblock);
-		bkt->maxBlockCount += moreBlocks;
-		bkt->ptr += (PK_MAXIMUM_ALIGNMENT - misalignment);
-	}
+	bkt->block_capacity = 16;
+	bkt->block_head_l = 0;
+	bkt->block_head_r = 0;
+	bkt->alloc_count = 0;
+	bkt->flags = flags;
 	bkt->description = description;
-	bkt->transient = transient;
-	struct pk_memblock* memBlock = (struct pk_memblock*)(bkt->blocks);
-	memBlock->data = bkt->ptr;
-	memBlock->size = sz - (sizeof(struct pk_memblock) * bkt->maxBlockCount);
-	return pk_bucket_head++;
-}
+	char* blocks_addr = (char*)bkt + sz - (sizeof(struct pk_memblock) * bkt->block_capacity);
+	blocks_addr -= (size_t)blocks_addr % 64;
+	bkt->blocks = (struct pk_memblock*)blocks_addr;
+	bkt->block_capacity = (size_t)(((char*)bkt + sz) - blocks_addr) / sizeof(struct pk_memblock);
 
-struct pk_membucket*
-pk_bucket_create(const char* description, int64_t sz, bool transient)
-{
-	int64_t bkt_index = pk_bucket_create_inner(sz, transient, description);
-	// TODO some of of error handling
-	if (bkt_index < 0) { return nullptr; }
-	return &pk_buckets[bkt_index];
+	bkt->blocks[pk_memblock_blocks_idx(bkt,0)].size = pk_bkt_data_sz(bkt);
+	bkt->blocks[pk_memblock_blocks_idx(bkt,0)].ptr = pk_bkt_data(bkt);
+
+#ifdef PK_MEMORY_DEBUGGER
+	mtx_lock(&debug_mtx);
+	bkt->debug_bkt_index = debug_alloc_head++;
+	mtx_unlock(&debug_mtx);
+	bkt->debug_head_l = 0;
+	bkt->debug_head_r = 0;
+	bkt->debug_block_capacity = 128;
+	debug_allocs[bkt->debug_bkt_index] = (struct pk_memblock*)aligned_alloc(alignof(struct pk_memblock), sizeof(struct pk_memblock) * 128);
+#endif
+
+	return bkt;
 }
 
 void
 pk_bucket_destroy(struct pk_membucket* bkt)
 {
-	size_t i;
-	for (i = 0; i < pk_bucket_head; ++i) {
-		if (&pk_buckets[i] == bkt) {
-			if (pk_bucket_head == i + 1)
-				pk_bucket_head--;
-			break;
-		}
-	}
-	if (bkt->blocks != NULL && bkt->blocks != CAFE_BABE(struct pk_memblock)) free(bkt->blocks);
-	bkt->size = 0;
-	bkt->head = 0;
-	bkt->lostBytes = 0;
-	bkt->allocs = 0;
-	bkt->lastEmptyBlockIndex = 0;
-	bkt->maxBlockCount = 0;
-	bkt->blocks = CAFE_BABE(struct pk_memblock);
-	bkt->ptr = CAFE_BABE(char);
-	bkt->transient = false;
-	mtx_destroy(&bkt->mtx);
+	assert(bkt != NULL);
 #ifdef PK_MEMORY_DEBUGGER
-	mtx_lock(&debug_alloc_mtx);
-	size_t dahl = debug_alloc_head_l;
-	size_t dahr = debug_alloc_head_r;
-	size_t found_dahl = dahl;
-	for (i = dahr; i > 0; --i) {
-		if (debug_all_allocs[i-1].bkt == bkt) {
-			if (i < found_dahl) {
-				if (found_dahl == dahr) {
-					dahr = i;
-				}
-				found_dahl = i;
-			}
-			debug_all_allocs[i-1].blk.data = NULL;
-			debug_all_allocs[i-1].blk.size = 0u;
+	mtx_lock(&debug_mtx);
+	assert(bkt->debug_bkt_index <= debug_alloc_head);
+	if (debug_allocs[bkt->debug_bkt_index] != NULL) free(debug_allocs[bkt->debug_bkt_index]);
+	debug_allocs[bkt->debug_bkt_index] = NULL;
+	while (debug_alloc_head > 0) {
+		if (debug_allocs[debug_alloc_head-1] != NULL) {
+			break;
 		}
+		debug_alloc_head--;
 	}
-	debug_alloc_head_l = found_dahl;
-	debug_alloc_head_r = dahr;
-#ifdef PK_MEMORY_DEBUGGER_L2
-	assert(debug_alloc_head_l <= debug_alloc_head_r);
-#endif
-	mtx_unlock(&debug_alloc_mtx);
+	mtx_unlock(&debug_mtx);
 #endif
+	free(bkt);
 }
 
 void
 pk_bucket_reset(struct pk_membucket* bkt)
 {
-#ifdef PK_MEMORY_DEBUGGER
-	size_t i;
-#endif
-	if (bkt->transient != true) {
+	if (PK_HAS_FLAG(bkt->flags, PK_MEMBUCKET_FLAG_TRANSIENT) == true) {
 		PK_LOG_ERR("WARNING: pk_bucket_reset called on non-transient pk_membucket\n");
 	}
 	bkt->head = 0;
-	bkt->lostBytes = 0;
-	bkt->allocs = 0;
-	bkt->lastEmptyBlockIndex = 0;
-	bkt->blocks->data = bkt->ptr;
-	bkt->blocks->size = bkt->size - (sizeof(struct pk_memblock) * bkt->maxBlockCount);
+	bkt->block_capacity = 16;
+	char* blocks_addr = (char*)bkt + bkt->size - (sizeof(struct pk_memblock) * bkt->block_capacity);
+	blocks_addr -= (size_t)blocks_addr % 64;
+	bkt->blocks = (struct pk_memblock*)blocks_addr;
+	bkt->block_capacity = (size_t)(((char*)bkt + bkt->size) - blocks_addr) / sizeof(struct pk_memblock);
+	bkt->block_head_l = 0;
+	bkt->block_head_r = 0;
+	bkt->alloc_count = 0;
+
+	bkt->blocks[pk_memblock_blocks_idx(bkt,0)].size = pk_bkt_data_sz(bkt);
+	bkt->blocks[pk_memblock_blocks_idx(bkt,0)].ptr = pk_bkt_data(bkt);
+
 #ifdef PK_MEMORY_DEBUGGER
-	mtx_lock(&debug_alloc_mtx);
-	size_t dahl = debug_alloc_head_l;
-	size_t dahr = debug_alloc_head_r;
-	size_t found_dahl = dahl;
-	for (i = dahr; i > 0; --i) {
-		if (debug_all_allocs[i-1].bkt == bkt) {
-			if (i < found_dahl) {
-				if (found_dahl == dahr) {
-					dahr = i;
-				}
-				found_dahl = i;
-			}
-			debug_all_allocs[i-1].blk.data = NULL;
-			debug_all_allocs[i-1].blk.size = 0u;
-		}
-	}
-	debug_alloc_head_l = found_dahl;
-	debug_alloc_head_r = dahr;
-#ifdef PK_MEMORY_DEBUGGER_L2
-	assert(debug_alloc_head_l <= debug_alloc_head_r);
-#endif
-	mtx_unlock(&debug_alloc_mtx);
+	bkt->debug_head_l = 0;
+	bkt->debug_head_r = 0;
 #endif
 }
 
+void pk_bucket_set_client_bucket(struct pk_membucket *bkt) {
+	client_bucket = bkt;
+}
+
 void
 pk_bucket_insert_block(struct pk_membucket* bkt, const struct pk_memblock* block)
 {
-	int64_t index = bkt->lastEmptyBlockIndex;
-	while (index >= 0) {
-		struct pk_memblock* b = &bkt->blocks[index];
-		struct pk_memblock* nb = &bkt->blocks[index + 1];
-		if (b->data < block->data) {
+	// 2025-06-03 JCB
+	// Note that this function should only be called if we're INSERTING.
+	// This means that the block will never go at the END of the list - that would be an append.
+	// It can, however, be placed at the beginning, in which case the entire array shifts.
+
+	struct pk_memblock* new_block;
+	struct pk_memblock* old_block;
+	size_t i, k;
+
+	// 1. resize if needed
+	if (bkt->block_head_r+1 == bkt->block_capacity) {
+		if (bkt->blocks[pk_memblock_blocks_idx(bkt, bkt->block_head_r)].size < sizeof(struct pk_memblock)) {
+			PK_LOG_ERR("[pkmem.h] bkt out of memory when expanding memory blocks.");
+			exit(1);
+		}
+		// this is all that needs done, arr can just grow like this
+		bkt->blocks[pk_memblock_blocks_idx(bkt, bkt->block_head_r)].size -= sizeof(struct pk_memblock);
+		bkt->block_capacity += 1;
+		bkt->blocks -= 1;
+	}
+
+	// 2. move all blocks forward until we pass the pointer
+	// reminder that these blocks are in REVERSE order
+	for (i = bkt->block_head_r+1; i > 0; --i) {
+		k = pk_memblock_blocks_idx(bkt, i);
+		new_block = &bkt->blocks[k];
+		old_block = new_block+1;
+		*new_block = *old_block;
+		if (old_block->data < block->data) {
 			break;
 		}
-		nb->data = b->data;
-		nb->size = b->size;
-		index -= 1;
 	}
-	struct pk_memblock *b = &bkt->blocks[index + 1];
-	b->data = block->data;
-	b->size = block->size;
-	bkt->lastEmptyBlockIndex += 1;
+	if (i == 0) {
+		*old_block = *block;
+	} else {
+		*new_block = *block;
+	}
+	bkt->block_head_r += 1;
 }
 
 void
-pk_bucket_collapse_empty_blocks(struct pk_membucket* bkt)
+pk_bucket_collapse_blocks(struct pk_membucket* bkt)
 {
-	size_t i, ii;
-	for (ii = bkt->lastEmptyBlockIndex+1; ii > 0; --ii) {
-		i = ii-1;
-		struct pk_memblock* block = &bkt->blocks[i];
-		if (block->size == 0 && i == bkt->lastEmptyBlockIndex) {
-			block->data = nullptr;
-			bkt->lastEmptyBlockIndex -= 1;
-			continue;
-		}
-		if (block->size > 0) {
-			continue;
-		}
-		for (size_t k = i; k < bkt->lastEmptyBlockIndex; ++k) {
-			bkt->blocks[k].data = bkt->blocks[k + 1].data;
-			bkt->blocks[k].size = bkt->blocks[k + 1].size;
-		}
-		bkt->lastEmptyBlockIndex -= 1;
+	// 1. loop through from (rev_idx)0 to head_r, shifting any blocks that have size 0
+	struct pk_memblock* new_block;
+	struct pk_memblock* old_block;
+	size_t i, ii, bhr;
+	// there's an off by one annoynce here
+	// start with ii = 0
+	// if we start with ii = 1, we might subtract from block_head_r when nothing was shifted
+	for (i = 0, ii = 0, bhr = bkt->block_head_r; (i + ii) <= bhr; ++i) {
+		new_block = &bkt->blocks[pk_memblock_blocks_idx(bkt, i)];
+		if (new_block->size > 0) continue;
+		do {
+			old_block = new_block - ii;
+			if (old_block->size == 0) {
+				ii+=1;
+			} else {
+				break;
+			}
+		} while (i + ii <= bhr);
+		*new_block = *old_block;
+		old_block->size = 0;
+		old_block->ptr = NULL;
 	}
+	bkt->block_head_r -= ii;
 }
 
 void*
@@ -431,107 +411,86 @@ pk_new_bkt(size_t sz, size_t alignment, struct pk_membucket* bkt)
 #ifdef PK_MEMORY_FORCE_MALLOC
 	return malloc(sz);
 #endif
-	if (sz == 0) return nullptr;
-	if (bkt == nullptr) return nullptr;
+	if (sz == 0) return NULL;
+	if (bkt == NULL) return NULL;
 	// TODO some type of error handling
-	if ((bkt->size - bkt->head) < (sz + alignment - 1)) return nullptr;
-	size_t i;
+	if ((bkt->size - bkt->head) < (sz + alignment - 1)) return NULL;
+	size_t i, k;
 	size_t calculatedAlignment = alignment < PK_MINIMUM_ALIGNMENT ? PK_MINIMUM_ALIGNMENT : alignment;
 	size_t misalignment = 0;
-	struct pk_memblock* prevBlock = nullptr;
-	struct pk_memblock* block = nullptr;
-	struct pk_memblock* nextBlock = nullptr;
-	void* data = nullptr;
+	struct pk_memblock tmp_blk;
+	struct pk_memblock* block = NULL;
+	void* data = NULL;
 	mtx_lock(&bkt->mtx);
-	for (i = 0; i <= bkt->lastEmptyBlockIndex; ++i) {
-		struct pk_memblock* blk = &bkt->blocks[i];
-		misalignment = (size_t)(blk->data) % calculatedAlignment;
+
+	// find block
+	for (i = 0; i <= bkt->block_head_r; ++i) {
+		k = pk_memblock_blocks_idx(bkt, i);
+		tmp_blk = bkt->blocks[k];
+		misalignment = (size_t)(tmp_blk.data) % calculatedAlignment;
 		misalignment = (calculatedAlignment - misalignment) % calculatedAlignment;
-		if (blk->size >= sz + misalignment) {
-			block = blk;
-			if (i < bkt->lastEmptyBlockIndex && bkt->blocks[i + 1].data == block->data + block->size) {
-				nextBlock = &bkt->blocks[i + 1];
-			}
-			if (i > 0 && i != bkt->lastEmptyBlockIndex && (bkt->blocks[i-1].data + bkt->blocks[i-1].size) == block->data) {
-				prevBlock = &bkt->blocks[i - 1];
-			}
-			break;
+		if (tmp_blk.size < sz + misalignment) {
+			continue;
 		}
+		block = &bkt->blocks[k];
+		break;
 	}
-	if (block == nullptr) {
+	if (block == NULL) {
 		mtx_unlock(&bkt->mtx);
-		assert(block != nullptr && "memory corruption: not enough space in chosen bkt");
+		assert(block != NULL && "memory corruption: not enough space in chosen bkt");
 	}
 	data = block->data + misalignment;
 #ifdef PK_MEMORY_DEBUGGER
-	size_t dahr = debug_alloc_head_r;
 	size_t ii;
-	if (bkt->transient == false) {
-		for (i = 0; i < dahr; ++i) {
-			if (debug_all_allocs[i].bkt != NULL) continue;
-			assert((debug_all_allocs[i].blk.size == 0 || (void*)(debug_all_allocs[i].blk.data) != data) && "mem address alloc'd twice!");
+	if (PK_HAS_FLAG(bkt->flags, PK_MEMBUCKET_FLAG_TRANSIENT) == false) {
+		for (i = 0; i < bkt->debug_head_r; ++i) {
+			assert((debug_allocs[bkt->debug_bkt_index][i].size == 0 || (void*)(debug_allocs[bkt->debug_bkt_index][i].data) != data) && "mem address alloc'd twice!");
 		}
-		mtx_lock(&debug_alloc_mtx);
-		i = debug_alloc_head_l;
-		if (debug_alloc_head_l == debug_alloc_head_r) {
-			debug_alloc_head_l++;
-			debug_alloc_head_r++;
+		i = bkt->debug_head_l;
+		if (bkt->debug_head_l == bkt->debug_head_r) {
+			bkt->debug_head_l++;
+			bkt->debug_head_r++;
 		} else {
-			for (ii = debug_alloc_head_l+1; ii <= dahr; ++ii) {
-				if (debug_all_allocs[ii].bkt == NULL) {
-					debug_alloc_head_l = ii;
+			for (ii = bkt->debug_head_l+1; ii <= bkt->debug_head_r; ++ii) {
+				if (debug_allocs[bkt->debug_bkt_index][ii].size == 0) {
+					bkt->debug_head_l = ii;
 					break;
 				}
 			}
 		}
-#ifdef PK_MEMORY_DEBUGGER_L2
-		assert(debug_alloc_head_l <= debug_alloc_head_r);
-#endif
-		mtx_unlock(&debug_alloc_mtx);
-		debug_all_allocs[i].blk.data = (char*)data;
-		debug_all_allocs[i].blk.size = sz;
-		debug_all_allocs[i].bkt = bkt;
+		assert(bkt->debug_head_l <= bkt->debug_head_r);
+		debug_allocs[bkt->debug_bkt_index][i].data = (char*)data;
+		debug_allocs[bkt->debug_bkt_index][i].size = sz;
 	}
 #endif
-	int64_t afterSize = block->size - (misalignment + sz);
-	if (block->data == bkt->ptr + bkt->head) {
+	if (block->data == pk_bkt_head(bkt)) {
 		bkt->head += (sz + misalignment);
 	}
-	if (afterSize > 0 && nextBlock == nullptr) {
-		struct pk_memblock newBlock;
-		memset(&newBlock, 0, sizeof(struct pk_memblock));
-		newBlock.data = block->data + misalignment + sz;
-		newBlock.size = afterSize;
-		pk_bucket_insert_block(bkt, &newBlock);
-	}
-	if (prevBlock == nullptr && nextBlock == nullptr) {
-		block->size = misalignment;
-	} else if (nextBlock != nullptr) {
-		block->size = misalignment;
-		nextBlock->data -= afterSize;
-		nextBlock->size += afterSize;
-	} else if (prevBlock != nullptr) {
-		prevBlock->size += misalignment;
-		block->data += misalignment + sz;
-		block->size = 0; // if you make it here, afterSize has already been handled
-	} else {
-		assert(true == false && "this shouldn't happen");
+
+	tmp_blk.data = block->data;
+	tmp_blk.size = misalignment;
+	block->data += misalignment + sz;
+	block->size -= misalignment + sz;
+
+	if (tmp_blk.size > 0) {
+		pk_bucket_insert_block(bkt, &tmp_blk);
 	}
-	bkt->allocs++;
-	assert(data >= bkt->raw && "allocated data is before bucket data");
-	assert((char*)data <= bkt->ptr + bkt->size && "allocated data is after bucket data");
-	pk_bucket_collapse_empty_blocks(bkt);
+	pk_bucket_collapse_blocks(bkt);
+
+	bkt->alloc_count++;
+	assert(data >= (void*)pk_bkt_data(bkt) && "allocated data is before bucket data");
+	assert((char*)data <= pk_bkt_data(bkt) + bkt->size && "allocated data is after bucket data");
 #ifdef PK_MEMORY_DEBUGGER
-	if (!bkt->transient) {
+	if (PK_HAS_FLAG(bkt->flags, PK_MEMBUCKET_FLAG_TRANSIENT) == false) {
+		size_t k;
 		int64_t debug_tracked_alloc_size = 0;
-		int64_t debug_bucket_alloc_size = bkt->size - (sizeof(struct pk_memblock) * bkt->maxBlockCount);
-		size_t dahr = debug_alloc_head_r;
-		for (i = 0; i < dahr; ++i) {
-			if (debug_all_allocs[i].bkt != bkt) continue;
-			debug_tracked_alloc_size += debug_all_allocs[i].blk.size;
+		int64_t debug_bucket_alloc_size = pk_bkt_data_sz(bkt);
+		for (i = 0; i < bkt->debug_head_r; ++i) {
+			debug_tracked_alloc_size += debug_allocs[bkt->debug_bkt_index][i].size;
 		}
-		for (i = 0; i <= bkt->lastEmptyBlockIndex; ++i) {
-			debug_bucket_alloc_size -= bkt->blocks[i].size;
+		for (i = 0; i <= bkt->block_head_r; ++i) {
+			k = pk_memblock_blocks_idx(bkt, i);
+			debug_bucket_alloc_size -= bkt->blocks[k].size;
 		}
 		assert(debug_tracked_alloc_size == debug_bucket_alloc_size && "allocation size mismatch!");
 	}
@@ -544,23 +503,8 @@ pk_new_bkt(size_t sz, size_t alignment, struct pk_membucket* bkt)
 void*
 pk_new_base(size_t sz, size_t alignment)
 {
-	struct pk_membucket* bkt = nullptr;
-	for (size_t i = 0; i < pk_bucket_head; ++i) {
-		if (pk_buckets[i].transient == true) {
-			continue;
-		}
-		if (pk_buckets[i].size - pk_buckets[i].head < sz + (alignment - 1)) {
-			continue;
-		}
-		bkt = &pk_buckets[i];
-		break;
-	}
-	if (bkt == nullptr) {
-		int64_t bkt_index = pk_bucket_create_inner(PK_DEFAULT_BUCKET_SIZE, false, "pk_bucket internally created");
-		// TODO some of of error handling
-		if (bkt_index >= 0) bkt = &pk_buckets[bkt_index];
-	}
-	return pk_new_bkt(sz, alignment, bkt);
+	if (client_bucket == NULL) return NULL;
+	return pk_new_bkt(sz, alignment, client_bucket);
 }
 
 void*
@@ -580,102 +524,112 @@ pk_delete_bkt(const void* ptr, size_t sz, struct pk_membucket* bkt)
 	return free((void*)ptr);
 #endif
 #endif
-	size_t i;
+	size_t i, k;
 	mtx_lock(&bkt->mtx);
-	assert(bkt->allocs > 0);
-	assert(ptr >= bkt->raw && (char*)ptr < bkt->ptr + bkt->size && "pointer not in memory bucket range");
+	assert(bkt->alloc_count > 0);
+	assert(pk_memory_is_in_bucket(ptr, bkt) && "pointer not in memory bucket range");
 	assert(sz > 0 && "attempted to free pointer of size 0");
 #ifdef PK_MEMORY_DEBUGGER
-	bool found = bkt->transient;
-	struct pk_dbg_memblock* mb;
-	mtx_lock(&debug_alloc_mtx);
+	bool found = PK_HAS_FLAG(bkt->flags, PK_MEMBUCKET_FLAG_TRANSIENT);
+	struct pk_memblock *debug_memblocks = debug_allocs[bkt->debug_bkt_index];
+	struct pk_memblock *mb;
 	if (found == false) {
-		size_t dahr = debug_alloc_head_r;
-		for (i = dahr; i > 0; --i) {
-			mb = &debug_all_allocs[i-1];
-			if (mb->bkt != bkt) continue;
-			if (mb->blk.size == 0) continue;
-			if ((void*)(mb->blk.data) == ptr) {
-				assert(mb->blk.size == sz && "[pkmem.h] incorrect free size");
-				mb->blk.size = 0;
-				mb->bkt = NULL;
+		for (i = bkt->debug_head_r+1; i > 0; --i) {
+			mb = &debug_memblocks[i-1];
+			if (mb->size == 0) continue;
+			if ((void*)(mb->ptr) == ptr) {
+				assert(mb->size == sz && "[pkmem.h] incorrect free size");
+				mb->ptr = NULL;
+				mb->size = 0;
 				found = true;
-				if (i <= debug_alloc_head_l) {
-					debug_alloc_head_l = i-1;
+				if (i <= bkt->debug_head_l) {
+					bkt->debug_head_l = i-1;
 				}
-				if (i == debug_alloc_head_r) {
-					debug_alloc_head_r--;
+				if (i == bkt->debug_head_r) {
+					bkt->debug_head_r--;
 				}
-#ifdef PK_MEMORY_DEBUGGER_L2
-				assert(debug_alloc_head_l <= debug_alloc_head_r);
-#endif
+				assert(bkt->debug_head_l <= bkt->debug_head_r);
 				break;
 			}
 		}
 	}
-	mtx_unlock(&debug_alloc_mtx);
 	assert(found && "[pkmem.h] double free or invalid ptr");
 #endif
-	bkt->allocs--;
-	if (bkt->allocs == 0) {
+	bkt->alloc_count--;
+	if (bkt->alloc_count == 0) {
 		bkt->head = 0;
-		bkt->lastEmptyBlockIndex = 0;
-		bkt->blocks[0].data = bkt->ptr;
-		bkt->blocks[0].size = bkt->size - (sizeof(struct pk_memblock) * bkt->maxBlockCount);
+		bkt->block_head_l = 0;
+		bkt->block_head_r = 0;
+		bkt->blocks[0].data = pk_bkt_data(bkt);
+		bkt->blocks[0].size = pk_bkt_data_sz(bkt);
+#ifdef PK_MEMORY_DEBUGGER
+		bkt->debug_head_l = 0;
+		bkt->debug_head_r = 0;
+		debug_allocs[bkt->debug_bkt_index][0].data = NULL;
+		debug_allocs[bkt->debug_bkt_index][0].size = 0;
+#endif
 		mtx_unlock(&bkt->mtx);
 		return;
 	}
 	char* afterPtr = ((char*)(ptr))+sz;
-	struct pk_memblock* beforeBlk = nullptr;
-	struct pk_memblock* afterBlk = nullptr;
-	for (i = bkt->lastEmptyBlockIndex; i > 0; --i) {
-		if (bkt->blocks[i-1].data + bkt->blocks[i-1].size == ptr) {
-			beforeBlk = &bkt->blocks[i-1];
+	struct pk_memblock* tmp_blk = NULL;
+	struct pk_memblock* beforeBlk = NULL;
+	struct pk_memblock* afterBlk = NULL;
+	for (i = bkt->block_head_r+1; i > 0 ; --i) {
+		k = pk_memblock_blocks_idx(bkt, i-2);
+		tmp_blk = &bkt->blocks[k];
+		if (tmp_blk->data + tmp_blk->size == ptr) {
+			beforeBlk = tmp_blk;
 		}
-		if (bkt->blocks[i].data == afterPtr) {
-			afterBlk = &bkt->blocks[i];
+		tmp_blk -= 1;
+		if (i <= bkt->block_head_r+1 && tmp_blk->data == afterPtr) {
+			afterBlk = tmp_blk;
 			break;
 		}
-		if (bkt->blocks[i-1].data < (char*)ptr) {
+		tmp_blk += 1;
+		if (tmp_blk->data < (char*)ptr) {
 			break;
 		}
 	}
-	if (ptr == bkt->ptr && afterBlk == nullptr && bkt->blocks[0].data == afterPtr) {
-		afterBlk = &bkt->blocks[0];
+	if (ptr == &bkt->data[0] && afterBlk == NULL && bkt->blocks[pk_memblock_blocks_idx(bkt, 0)].data == afterPtr) {
+		afterBlk = &bkt->blocks[pk_memblock_blocks_idx(bkt, 0)];
 	}
-	if (afterBlk != nullptr && afterBlk->data == bkt->ptr + bkt->head) {
+	if (afterBlk != NULL && afterBlk->data == pk_bkt_head(bkt)) {
 		bkt->head -= sz;
-		if (beforeBlk != nullptr) {
+		if (beforeBlk != NULL) {
 			bkt->head -= beforeBlk->size;
 		}
 	}
-	if (beforeBlk == nullptr && afterBlk == nullptr) {
+	if (beforeBlk == NULL && afterBlk == NULL) {
 		struct pk_memblock newBlock;
 		memset(&newBlock, 0, sizeof(struct pk_memblock));
 		newBlock.data = (char*)ptr;
 		newBlock.size = sz;
 		pk_bucket_insert_block(bkt, &newBlock);
-	} else if (beforeBlk != nullptr && afterBlk != nullptr) {
+	} else if (beforeBlk != NULL && afterBlk != NULL) {
 		beforeBlk->size += sz + afterBlk->size;
+		if (beforeBlk->data == pk_bkt_head(bkt)) {
+			bkt->block_head_r--;
+		}
 		afterBlk->size = 0;
-	} else if (beforeBlk != nullptr) {
+		afterBlk->data = NULL;
+	} else if (beforeBlk != NULL) {
 		beforeBlk->size += sz;
-	} else if (afterBlk != nullptr) {
+	} else if (afterBlk != NULL) {
 		afterBlk->data -= sz;
 		afterBlk->size += sz;
 	}
-	pk_bucket_collapse_empty_blocks(bkt);
+	pk_bucket_collapse_blocks(bkt);
 #ifdef PK_MEMORY_DEBUGGER
-	if (!bkt->transient) {
+	if (PK_HAS_FLAG(bkt->flags, PK_MEMBUCKET_FLAG_TRANSIENT) == false) {
 		int64_t debug_tracked_alloc_size = 0;
-		int64_t debug_bucket_alloc_size = bkt->size - (sizeof(struct pk_memblock) * bkt->maxBlockCount);
-		size_t dahr = debug_alloc_head_r;
-		for (i = 0; i < dahr; ++i) {
-			if (debug_all_allocs[i].bkt != bkt) continue;
-			debug_tracked_alloc_size += debug_all_allocs[i].blk.size;
+		int64_t debug_bucket_alloc_size = pk_bkt_data_sz(bkt);
+		for (i = 0; i < bkt->debug_head_r; ++i) {
+			debug_tracked_alloc_size += debug_allocs[bkt->debug_bkt_index][i].size;
 		}
-		for (i = 0; i <= bkt->lastEmptyBlockIndex; ++i) {
-			debug_bucket_alloc_size -= bkt->blocks[i].size;
+		for (i = 0; i <= bkt->block_head_r; ++i) {
+			k = pk_memblock_blocks_idx(bkt, i);
+			debug_bucket_alloc_size -= bkt->blocks[k].size;
 		}
 		assert(debug_tracked_alloc_size == debug_bucket_alloc_size && "allocation size mismatch!");
 	}
@@ -686,13 +640,7 @@ pk_delete_bkt(const void* ptr, size_t sz, struct pk_membucket* bkt)
 void
 pk_delete_base(const void* ptr, size_t sz)
 {
-	struct pk_membucket* bkt = nullptr;
-	for (size_t i = 0; i < pk_bucket_head; ++i) {
-		bkt = &pk_buckets[i];
-		if (ptr >= bkt->raw && (char*)ptr < bkt->ptr + bkt->size) break;
-	}
-	assert(bkt != nullptr && "failed to determine correct memory bucket");
-	pk_delete_bkt(ptr, sz, bkt);
+	pk_delete_bkt(ptr, sz, client_bucket);
 }
 
 void