1#include "../include/cache.h"
3#include "../include/align.h"
4#include "../include/aligned_alloc.h"
5#include "../include/spinlock.h"
13#if defined(__linux__) && defined(MADV_HUGEPAGE)
17#define CACHE_LINE_SIZE 64
18#define INITIAL_BUCKET_MULTIPLIER 2
19#define CACHE_FILE_MAGIC 0x45484346
20#define CACHE_FILE_VERSION 1
30#if defined(__GNUC__) || defined(__clang__)
31#define likely(x) __builtin_expect(!!(x), 1)
32#define unlikely(x) __builtin_expect(!!(x), 0)
35#define unlikely(x) (x)
49typedef struct ALIGN(CACHE_LINE_SIZE) {
51 _Atomic uint8_t clock_bit;
57#if defined(_MSC_VER) && !defined(__cplusplus)
58 unsigned char data[1];
95typedef struct ALIGN(CACHE_LINE_SIZE) {
100 size_t tombstone_count;
103} aligned_cache_shard_t;
109_Static_assert(
sizeof(aligned_cache_shard_t) == CACHE_LINE_SIZE,
"Shard size mismatch: False sharing will occur");
112 aligned_cache_shard_t shards[CACHE_SHARD_COUNT];
113 uint32_t default_ttl;
120static inline size_t next_power_of_2(
size_t n) {
121 if (n && !(n & (n - 1)))
return n;
135static inline uint32_t hash_key(
const char* key,
size_t len) {
136 uint32_t hash = 2166136261u;
137 for (
size_t i = 0; i < len; i++) {
138 hash ^= (uint32_t)(
unsigned char)key[i];
143 if (unlikely(hash < HASH_MIN_VAL))
return hash + HASH_MIN_VAL;
151static inline size_t get_shard_idx(uint32_t hash) {
153 return hash & (CACHE_SHARD_COUNT - 1);
161static inline uint64_t pack_meta(uint32_t hash, uint32_t len) {
return ((uint64_t)hash << 32) | (uint32_t)len; }
166static inline uint32_t meta_hash(uint64_t meta) {
return (uint32_t)(meta >> 32); }
172static inline const void* value_ptr_from_entry(
const cache_entry_t* entry) {
173 return (
const void*)(entry->data + entry->key_len + 1 +
sizeof(cache_entry_t*));
180static inline cache_entry_t* entry_from_value(
const void* value_ptr) {
181 if (unlikely(!value_ptr))
return NULL;
182 cache_entry_t*
const* back_ptr = (cache_entry_t*
const*)((
const char*)value_ptr -
sizeof(cache_entry_t*));
191static inline void entry_ref_inc(cache_entry_t* entry) {
192 if (entry) atomic_fetch_add_explicit(&entry->ref_count, 1, memory_order_relaxed);
201static inline void entry_ref_dec(cache_entry_t* entry) {
203 int old_val = atomic_fetch_sub_explicit(&entry->ref_count, 1, memory_order_acq_rel);
204 if (unlikely(old_val == 1)) free(entry);
219static size_t find_slot(aligned_cache_shard_t* shard, uint32_t hash,
const char* key,
size_t klen,
bool* found) {
220 size_t mask = shard->bucket_count - 1;
221 size_t idx = hash & mask;
222 size_t first_tombstone = SIZE_MAX;
225 uint64_t target = pack_meta(hash, (uint32_t)klen);
226 cache_slot_t* slots = shard->slots;
230 for (
size_t probe_count = 0; probe_count < shard->bucket_count; probe_count++) {
231 uint64_t meta = slots[idx].metadata;
232 uint32_t h = meta_hash(meta);
234 if (h == HASH_EMPTY) {
238 return (first_tombstone != SIZE_MAX) ? first_tombstone : idx;
241 if (h == HASH_DELETED) {
244 if (first_tombstone == SIZE_MAX) first_tombstone = idx;
245 }
else if (meta == target) {
250 cache_entry_t* entry = slots[idx].entry;
251 if (memcmp(entry->data, key, klen) == 0) {
257 idx = (idx + 1) & mask;
263 return (first_tombstone != SIZE_MAX) ? first_tombstone : idx;
279static void compact_shard(aligned_cache_shard_t* shard) {
280 size_t old_bucket_count = shard->bucket_count;
281 cache_slot_t* old_slots = shard->slots;
283 cache_slot_t* new_slots = ALIGNED_ALLOC(CACHE_LINE_SIZE, old_bucket_count *
sizeof(cache_slot_t));
284 if (!new_slots)
return;
287 madvise(new_slots, old_bucket_count *
sizeof(cache_slot_t), MADV_HUGEPAGE);
289 memset(new_slots, 0, old_bucket_count *
sizeof(cache_slot_t));
291 size_t mask = old_bucket_count - 1;
292 for (
size_t i = 0; i < old_bucket_count; i++) {
293 uint64_t meta = old_slots[i].metadata;
294 if (meta_hash(meta) >= HASH_MIN_VAL) {
296 uint32_t hash = meta_hash(meta);
297 size_t idx = hash & mask;
300 while (meta_hash(new_slots[idx].metadata) != HASH_EMPTY) {
301 idx = (idx + 1) & mask;
303 new_slots[idx].metadata = meta;
304 new_slots[idx].entry = old_slots[i].entry;
308 shard->slots = new_slots;
309 shard->tombstone_count = 0;
310 shard->clock_hand = 0;
331static bool clock_evict(aligned_cache_shard_t* shard) {
333 size_t mask = shard->bucket_count - 1;
334 cache_slot_t* slots = shard->slots;
337 while (scanned < shard->bucket_count) {
338 size_t idx = shard->clock_hand;
339 shard->clock_hand = (shard->clock_hand + 1) & mask;
341 uint64_t meta = slots[idx].metadata;
343 if (meta_hash(meta) < HASH_MIN_VAL) {
349 cache_entry_t* entry = slots[idx].entry;
350 uint8_t bit = atomic_load_explicit(&entry->clock_bit, memory_order_relaxed);
355 slots[idx].metadata = pack_meta(HASH_DELETED, 0);
356 slots[idx].entry = NULL;
358 shard->tombstone_count++;
359 entry_ref_dec(entry);
364 atomic_store_explicit(&entry->clock_bit, 0, memory_order_relaxed);
372 for (
size_t i = 0; i < shard->bucket_count; i++) {
373 size_t idx = shard->clock_hand;
374 shard->clock_hand = (shard->clock_hand + 1) & mask;
375 if (meta_hash(slots[idx].metadata) >= HASH_MIN_VAL) {
376 cache_entry_t* entry = slots[idx].entry;
377 slots[idx].metadata = pack_meta(HASH_DELETED, 0);
378 slots[idx].entry = NULL;
380 shard->tombstone_count++;
381 entry_ref_dec(entry);
392 struct cache_s* c = calloc(1,
sizeof(
struct cache_s));
395 if (capacity == 0) capacity = 1000;
396 size_t shard_cap = capacity / CACHE_SHARD_COUNT;
397 if (shard_cap < 1) shard_cap = 1;
399 c->default_ttl = default_ttl ? default_ttl : CACHE_DEFAULT_TTL;
400 for (
size_t i = 0; i < CACHE_SHARD_COUNT; i++) {
401 aligned_cache_shard_t* s = &c->shards[i];
402 s->capacity = shard_cap;
404 size_t desired = (size_t)(shard_cap * INITIAL_BUCKET_MULTIPLIER);
405 s->bucket_count = next_power_of_2(desired);
407 s->slots = ALIGNED_ALLOC(CACHE_LINE_SIZE, s->bucket_count *
sizeof(cache_slot_t));
413 madvise(s->slots, s->bucket_count *
sizeof(cache_slot_t), MADV_HUGEPAGE);
415 memset(s->slots, 0, s->bucket_count *
sizeof(cache_slot_t));
417 fast_rwlock_init(&s->lock);
423 for (
size_t j = 0; j < CACHE_SHARD_COUNT; j++) {
424 aligned_cache_shard_t* s = &c->shards[j];
435 if (!cache_ptr)
return;
436 struct cache_s* cache = (
struct cache_s*)cache_ptr;
438 for (
int i = 0; i < CACHE_SHARD_COUNT; i++) {
439 fast_rwlock_wrlock(&cache->shards[i].lock);
440 if (cache->shards[i].slots) {
441 for (
size_t j = 0; j < cache->shards[i].bucket_count; j++) {
442 if (meta_hash(cache->shards[i].slots[j].metadata) >= HASH_MIN_VAL) {
443 entry_ref_dec(cache->shards[i].slots[j].entry);
446 free(cache->shards[i].slots);
448 fast_rwlock_unlock_wr(&cache->shards[i].lock);
468 if (unlikely(!cache_ptr || !key || !klen))
return NULL;
470 struct cache_s* cache = (
struct cache_s*)cache_ptr;
471 uint32_t hash = hash_key(key, klen);
472 aligned_cache_shard_t* shard = &cache->shards[get_shard_idx(hash)];
474 fast_rwlock_rdlock(&shard->lock);
477 size_t idx = find_slot(shard, hash, key, klen, &found);
480 fast_rwlock_unlock_rd(&shard->lock);
484 cache_slot_t* slot = &shard->slots[idx];
485 cache_entry_t* entry = slot->entry;
487 time_t now = time(NULL);
490 if (unlikely(now >= entry->expires_at)) {
491 fast_rwlock_unlock_rd(&shard->lock);
497 fast_rwlock_wrlock(&shard->lock);
499 size_t re_idx = find_slot(shard, hash, key, klen, &re_found);
501 cache_entry_t* re_entry = shard->slots[re_idx].entry;
504 if (now >= re_entry->expires_at) {
505 shard->slots[re_idx].metadata = pack_meta(HASH_DELETED, 0);
506 shard->slots[re_idx].entry = NULL;
508 shard->tombstone_count++;
512 entry_ref_dec(re_entry);
515 fast_rwlock_unlock_wr(&shard->lock);
521 if (atomic_load_explicit(&entry->clock_bit, memory_order_relaxed) == 0) {
522 atomic_store_explicit(&entry->clock_bit, 1, memory_order_relaxed);
526 entry_ref_inc(entry);
528 if (out_len) *out_len = entry->value_len;
530 fast_rwlock_unlock_rd(&shard->lock);
531 return value_ptr_from_entry(entry);
550bool cache_set(
cache_t* cache_ptr,
const char* key,
size_t klen,
const void* value,
size_t value_len, uint32_t ttl) {
551 if (unlikely(!cache_ptr || !key || !klen || !value || !value_len))
return false;
553 struct cache_s* cache = (
struct cache_s*)cache_ptr;
554 uint32_t hash = hash_key(key, klen);
555 time_t now = time(NULL);
559 size_t alloc_sz = offsetof(cache_entry_t, data) + klen + 1 +
sizeof(
void*) + value_len;
560 cache_entry_t* new_entry = malloc(alloc_sz);
561 if (!new_entry)
return false;
563 new_entry->hash = hash;
564 new_entry->key_len = (uint32_t)klen;
565 new_entry->value_len = value_len;
566 new_entry->expires_at = now + (ttl ? ttl : cache->default_ttl);
567 atomic_init(&new_entry->ref_count, 1);
568 atomic_init(&new_entry->clock_bit, 1);
571 unsigned char* ptr = new_entry->data;
572 memcpy(ptr, key, klen);
575 *(cache_entry_t**)(ptr + klen + 1) = new_entry;
576 memcpy(ptr + klen + 1 +
sizeof(cache_entry_t*), value, value_len);
578 aligned_cache_shard_t* shard = &cache->shards[get_shard_idx(hash)];
579 fast_rwlock_wrlock(&shard->lock);
586 if (unlikely(shard->tombstone_count > (shard->bucket_count * 3) / 4)) {
587 compact_shard(shard);
591 size_t idx = find_slot(shard, hash, key, klen, &found);
595 cache_entry_t* old = shard->slots[idx].entry;
596 shard->slots[idx].entry = new_entry;
600 if (shard->size >= shard->capacity) {
604 if (!clock_evict(shard)) {
608 fast_rwlock_unlock_wr(&shard->lock);
615 if (meta_hash(shard->slots[idx].metadata) == HASH_DELETED) {
616 shard->tombstone_count--;
618 shard->slots[idx].entry = new_entry;
619 shard->slots[idx].metadata = pack_meta(hash, (uint32_t)klen);
623 fast_rwlock_unlock_wr(&shard->lock);
631 if (!cache_ptr || !key)
return;
632 struct cache_s* cache = (
struct cache_s*)cache_ptr;
633 size_t klen = strlen(key);
634 uint32_t hash = hash_key(key, klen);
635 aligned_cache_shard_t* shard = &cache->shards[get_shard_idx(hash)];
637 fast_rwlock_wrlock(&shard->lock);
639 size_t idx = find_slot(shard, hash, key, klen, &found);
642 cache_entry_t* entry = shard->slots[idx].entry;
643 shard->slots[idx].metadata = pack_meta(HASH_DELETED, 0);
644 shard->slots[idx].entry = NULL;
646 shard->tombstone_count++;
649 if (unlikely(shard->tombstone_count > (shard->bucket_count * 3) / 4)) {
650 compact_shard(shard);
652 entry_ref_dec(entry);
654 fast_rwlock_unlock_wr(&shard->lock);
663 entry_ref_dec(entry_from_value(ptr));
670 if (!cache_ptr)
return;
671 struct cache_s* cache = (
struct cache_s*)cache_ptr;
673 for (
int i = 0; i < CACHE_SHARD_COUNT; i++) {
674 fast_rwlock_wrlock(&cache->shards[i].lock);
675 for (
size_t j = 0; j < cache->shards[i].bucket_count; j++) {
676 if (meta_hash(cache->shards[i].slots[j].metadata) >= HASH_MIN_VAL) {
677 entry_ref_dec(cache->shards[i].slots[j].entry);
680 memset(cache->shards[i].slots, 0, cache->shards[i].bucket_count *
sizeof(cache_slot_t));
681 cache->shards[i].size = 0;
682 cache->shards[i].tombstone_count = 0;
683 cache->shards[i].clock_hand = 0;
684 fast_rwlock_unlock_wr(&cache->shards[i].lock);
694 if (!cache_ptr)
return 0;
695 struct cache_s* cache = (
struct cache_s*)cache_ptr;
697 for (
int i = 0; i < CACHE_SHARD_COUNT; i++) {
698 fast_rwlock_rdlock(&cache->shards[i].lock);
699 total += cache->shards[i].size;
700 fast_rwlock_unlock_rd(&cache->shards[i].lock);
711 if (!cache_ptr)
return 0;
712 struct cache_s* cache = (
struct cache_s*)cache_ptr;
714 for (
int i = 0; i < CACHE_SHARD_COUNT; i++) {
715 fast_rwlock_rdlock(&cache->shards[i].lock);
716 total += cache->shards[i].capacity;
717 fast_rwlock_unlock_rd(&cache->shards[i].lock);
728static inline bool file_write_chk(
const void* ptr,
size_t size,
size_t count, FILE* stream) {
729 return fwrite(ptr, size, count, stream) == count;
736static inline bool file_read_chk(
void* ptr,
size_t size,
size_t count, FILE* stream) {
737 return fread(ptr, size, count, stream) == count;
753 if (!cache_ptr || !filename)
return false;
754 struct cache_s* cache = (
struct cache_s*)cache_ptr;
756 FILE* f = fopen(filename,
"wb");
757 if (!f)
return false;
759 uint32_t magic = CACHE_FILE_MAGIC;
760 uint32_t version = CACHE_FILE_VERSION;
761 uint64_t total_entries = 0;
763 if (!file_write_chk(&magic,
sizeof(magic), 1, f) || !file_write_chk(&version,
sizeof(version), 1, f) ||
764 !file_write_chk(&total_entries,
sizeof(total_entries), 1, f)) {
769 uint64_t actual_count = 0;
770 time_t now = time(NULL);
772 for (
int i = 0; i < CACHE_SHARD_COUNT; i++) {
773 aligned_cache_shard_t* shard = &cache->shards[i];
775 fast_rwlock_rdlock(&shard->lock);
777 for (
size_t j = 0; j < shard->bucket_count; j++) {
778 cache_slot_t* slot = &shard->slots[j];
780 if (meta_hash(slot->metadata) < HASH_MIN_VAL)
continue;
782 cache_entry_t* entry = slot->entry;
783 if (!entry || entry->expires_at <= now)
continue;
785 uint32_t klen = entry->key_len;
786 uint64_t vlen = (uint64_t)entry->value_len;
787 int64_t expiry = (int64_t)entry->expires_at;
789 const void* val_ptr = value_ptr_from_entry(entry);
790 if (!val_ptr)
continue;
792 if (!file_write_chk(&klen,
sizeof(klen), 1, f) || !file_write_chk(&vlen,
sizeof(vlen), 1, f) ||
793 !file_write_chk(&expiry,
sizeof(expiry), 1, f) || !file_write_chk(entry->data, 1, klen, f) ||
794 !file_write_chk(val_ptr, 1, (
size_t)vlen, f)) {
795 fast_rwlock_unlock_rd(&shard->lock);
803 fast_rwlock_unlock_rd(&shard->lock);
807 fseek(f,
sizeof(magic) +
sizeof(version), SEEK_SET);
808 if (!file_write_chk(&actual_count,
sizeof(actual_count), 1, f)) {
829 if (!cache_ptr || !filename)
return false;
831 FILE* f = fopen(filename,
"rb");
832 if (!f)
return false;
835 uint32_t version = 0;
836 uint64_t stored_count = 0;
838 if (!file_read_chk(&magic,
sizeof(magic), 1, f) || !file_read_chk(&version,
sizeof(version), 1, f) ||
839 !file_read_chk(&stored_count,
sizeof(stored_count), 1, f)) {
844 if (magic != CACHE_FILE_MAGIC || version != CACHE_FILE_VERSION) {
850 char* big_key_buf = NULL;
851 void* val_buf = NULL;
852 size_t val_buf_cap = 0;
853 time_t now = time(NULL);
857 for (uint64_t i = 0; i < stored_count; i++) {
862 if (!file_read_chk(&klen,
sizeof(klen), 1, f) || !file_read_chk(&vlen,
sizeof(vlen), 1, f) ||
863 !file_read_chk(&expiry,
sizeof(expiry), 1, f)) {
869 char* kptr = key_buf;
870 if (klen >=
sizeof(key_buf)) {
871 big_key_buf = realloc(big_key_buf, klen + 1);
880 if (vlen > val_buf_cap) {
881 void* tmp = realloc(val_buf, vlen);
890 if (!file_read_chk(kptr, 1, klen, f) || !file_read_chk(val_buf, 1, (
size_t)vlen, f)) {
898 if ((time_t)expiry > now) {
900 uint32_t ttl = (uint32_t)((time_t)expiry - now);
902 if (!
cache_set(cache_ptr, kptr, klen, val_buf, (
size_t)vlen, ttl)) {
bool cache_load(cache_t *cache_ptr, const char *filename)
void cache_clear(cache_t *cache)
void cache_release(const void *ptr)
void cache_destroy(cache_t *cache)
size_t get_total_cache_size(cache_t *cache)
bool cache_set(cache_t *cache, const char *key, size_t key_len, const void *value, size_t value_len, uint32_t ttl_override)
bool cache_save(cache_t *cache_ptr, const char *filename)
const void * cache_get(cache_t *cache, const char *key, size_t key_len, size_t *out_len)
size_t get_total_capacity(cache_t *cache)
cache_t * cache_create(size_t capacity, uint32_t default_ttl)
void cache_invalidate(cache_t *cache, const char *key)
Reader-writer spinlock structure.