solidc
Robust collection of general-purpose cross-platform C libraries and data structures designed for rapid and safe development in C
Loading...
Searching...
No Matches
map.c
Go to the documentation of this file.
1
46/* =========================================================================
47 * Internal slot layout (unchanged from original):
48 * keys_values[i*2] = key pointer for slot i (NULL == empty)
49 * keys_values[i*2+1] = value pointer for slot i
50 * deleted[i] = REMOVED tombstone marker
51 *
52 * The Robin Hood implementation repurposes `deleted[]` as a probe-distance
53 * (DIB) array: deleted[i] holds the distance-from-initial-bucket for the
54 * element stored in slot i. 0 means the element is in its natural slot,
55 * 1 means it was displaced by 1, etc. An empty slot is indicated by
56 * keys_values[i*2] == NULL.
57 *
58 * Because DIB replaces the boolean tombstone, `tombstone_count` is always 0
59 * and the 50%-tombstone rehash path is never triggered.
60 * ========================================================================= */
61#include <limits.h>
62#include <stdalign.h>
63#include <stdbool.h>
64#include <stdint.h>
65#include <stdio.h>
66#include <stdlib.h>
67#include <string.h>
68
69#define XXH_INLINE_ALL
70#include "../include/cmp.h"
71#include "../include/lock.h"
72#include "../include/map.h"
73#include "../include/platform.h"
74
75#include <xxhash.h>
76
77// Default maximum load factor
78#define DEFAULT_MAX_LOAD_FACTOR 0.75f
79#define TOMBSTONE_RATIO_THRESHOLD 0.5f // Rehash when tombstones > 50% of size
80#define MIN_CAPACITY 8 // Minimum capacity to avoid frequent resizing
81#define MAX(a, b) ((a) > (b) ? (a) : (b))
82
83/* Distance-from-initial-bucket stored in the (repurposed) deleted[] array. */
84#define _RH_DIB(m, i) ((size_t)(m->deleted[i]))
85#define _RH_SET_DIB(m, i, d) ((m)->deleted[i] = (bool)(d))
86/* Slot is empty when the key pointer is NULL. */
87#define _RH_EMPTY(m, i) ((m)->keys_values[(i) * 2] == NULL)
88
89/* Compute slot index using capacity bitmask (capacity is always power-of-two). */
90static inline size_t _rh_slot(size_t hash, size_t offset, size_t cap_mask) { return (hash + offset) & cap_mask; }
91
92// Optimized map structure with better memory layout
93typedef struct hash_map {
94 void** keys_values; // Interleaved keys and values for better cache locality
95 bool* deleted; // Separate array for deleted markers
96 size_t size; // Number of active entries
97 size_t capacity; // Map capacity
98 float max_load_factor; // Configurable load factor threshold
99 HashFunction hash; // Hash function
100 KeyCmpFunction key_compare; // Key comparison function
101 KeyFreeFunction key_free; // Key free function (optional)
102 ValueFreeFunction value_free; // Value free function (optional)
103 Lock lock; // Lock for thread safety
104} HashMap;
105
106// Fast hash for small keys (up to 8 bytes) - optimized version
107static inline uint64_t fast_small_hash(const void* key, size_t size) {
108 // Use union to avoid strict aliasing violations
109 union {
110 uint64_t u64;
111 uint32_t u32[2];
112 uint16_t u16[4];
113 uint8_t u8[8];
114 } value = {0};
115
116 // Copy only the needed bytes
117 const uint8_t* src = (const uint8_t*)key;
118 for (size_t i = 0; i < size && i < sizeof(value); i++) {
119 value.u8[i] = src[i];
120 }
121 return value.u64;
122}
123
124// xxHash implementation with small key optimization
125static inline unsigned long xxhash(const void* key, size_t size) {
126 if (size <= sizeof(uint64_t)) {
127 return fast_small_hash(key, size);
128 }
129 return XXH3_64bits(key, size);
130}
131
132// Helper function to calculate next power of two for better hashing
133static inline size_t next_power_of_two(size_t n) {
134 if (n == 0) return 1;
135 n--;
136 n |= n >> 1;
137 n |= n >> 2;
138 n |= n >> 4;
139 n |= n >> 8;
140 n |= n >> 16;
141#if SIZE_MAX > UINT32_MAX
142 n |= n >> 32;
143#endif
144 return n + 1;
145}
146
147// Safe capacity growth calculation
148static inline size_t calculate_new_capacity(size_t current) {
149 if (current > SIZE_MAX / 2) {
150 return SIZE_MAX;
151 }
152 size_t new_cap = current * 2;
153 return (new_cap < current) ? SIZE_MAX : new_cap;
154}
155
156// Wrapper to match HashFunction signature
157static inline size_t xxhash_wrapper(const void* key, size_t len) { return (size_t)xxhash(key, (size_t)len); }
158
159// Map creation with better error handling and memory optimization
160HashMap* map_create(const MapConfig* config) {
161 if (!config || !config->key_compare) {
162 return NULL;
163 }
164
165 size_t capacity = MAX(
166 MIN_CAPACITY, config->initial_capacity > 0 ? next_power_of_two(config->initial_capacity) : INITIAL_MAP_SIZE);
167
168 if (capacity > SIZE_MAX / 2) {
169 return NULL;
170 }
171
172 float max_load_factor = config->max_load_factor > 0.1f && config->max_load_factor <= 0.95f
173 ? config->max_load_factor
174 : DEFAULT_MAX_LOAD_FACTOR;
175
176 HashMap* m = (HashMap*)malloc(sizeof(HashMap));
177 if (!m) {
178 return NULL;
179 }
180
181 // Allocate interleaved keys and values for better cache locality
182 m->keys_values = (void**)calloc(capacity * 2, sizeof(void*));
183 m->deleted = (bool*)calloc(capacity, sizeof(bool));
184
185 if (!m->keys_values || !m->deleted) {
186 free(m->keys_values);
187 free(m->deleted);
188 free(m);
189 return NULL;
190 }
191
192 m->size = 0;
193 m->capacity = capacity;
194 m->max_load_factor = max_load_factor;
195 m->hash = config->hash_func ? config->hash_func : xxhash_wrapper;
196 m->key_compare = config->key_compare;
197 m->key_free = config->key_free;
198 m->value_free = config->value_free;
199
200 lock_init(&m->lock);
201 return m;
202}
203
204// Helper to get key pointer from interleaved array
205static inline void** get_key_ptr(HashMap* m, size_t index) { return &m->keys_values[index * 2]; }
206
207// Helper to get value pointer from interleaved array
208static inline void** get_value_ptr(HashMap* m, size_t index) { return &m->keys_values[index * 2 + 1]; }
209
210// Resize the map with optimized rehashing and error handling
211static bool map_resize(HashMap* m, size_t new_capacity, size_t key_len) {
212 if (new_capacity <= m->capacity || new_capacity > SIZE_MAX / 2) {
213 return false;
214 }
215
216 void** new_keys_values = (void**)calloc(new_capacity * 2, sizeof(void*));
217 bool* new_deleted = (bool*)calloc(new_capacity, sizeof(bool));
218
219 if (!new_keys_values || !new_deleted) {
220 free(new_keys_values);
221 free(new_deleted);
222 return false;
223 }
224
225 // Save old data
226 void** old_keys_values = m->keys_values;
227 bool* old_deleted = m->deleted;
228 size_t old_capacity = m->capacity;
229
230 // Swap in new arrays
231 m->keys_values = new_keys_values;
232 m->deleted = new_deleted;
233 m->capacity = new_capacity;
234 size_t old_size = m->size;
235 m->size = 0;
236
237 // Rehash all active entries
238 bool success = true;
239 for (size_t i = 0; i < old_capacity; i++) {
240 if (old_keys_values[i * 2] && !old_deleted[i]) {
241 void* key = old_keys_values[i * 2];
242 void* value = old_keys_values[i * 2 + 1];
243
244 // Manually rehash to avoid function call overhead
245 size_t hash = m->hash(key, key_len);
246 size_t index = hash & (new_capacity - 1); // Use bitmask for power-of-two
247 size_t hash2 = (hash >> 5) | 1; // Ensure odd step for probing
248 size_t probe_count = 0;
249
250 while (*get_key_ptr(m, index) != NULL) {
251 index = (index + hash2) & (new_capacity - 1);
252 if (++probe_count >= new_capacity) {
253 success = false;
254 break;
255 }
256 }
257
258 if (success) {
259 *get_key_ptr(m, index) = key;
260 *get_value_ptr(m, index) = value;
261 m->size++;
262 } else {
263 break;
264 }
265 }
266 }
267
268 if (success) {
269 // Free old arrays
270 free(old_keys_values);
271 free(old_deleted);
272 } else {
273 // Restore original state on failure
274 m->keys_values = old_keys_values;
275 m->deleted = old_deleted;
276 m->capacity = old_capacity;
277 m->size = old_size;
278 free(new_keys_values);
279 free(new_deleted);
280 return false;
281 }
282
283 return true;
284}
285
286size_t map_length(HashMap* m) { return m->size; }
287
288// Set a key-value pair with optimizations and better error handling
289bool map_set(HashMap* m, void* key, size_t key_len, void* value) {
290 if (!m || !key) return false;
291
292 /* Grow before inserting if load would exceed threshold. */
293 size_t total_used = m->size; /* no tombstones in Robin Hood */
294 if ((float)total_used / (float)m->capacity >= m->max_load_factor) {
295 size_t new_cap = calculate_new_capacity(m->capacity);
296 if (new_cap <= m->capacity || !map_resize(m, new_cap, key_len)) return false;
297 }
298
299 const size_t mask = m->capacity - 1;
300 size_t hash = m->hash(key, key_len);
301 size_t idx = hash & mask;
302
303 /* The element we're about to insert. */
304 void* ins_key = key;
305 void* ins_value = value;
306 size_t ins_dist = 0;
307
308 for (size_t i = 0; i < m->capacity; i++) {
309 if (_RH_EMPTY(m, idx)) {
310 /* Empty slot — place the element here. */
311 *get_key_ptr(m, idx) = ins_key;
312 *get_value_ptr(m, idx) = ins_value;
313 _RH_SET_DIB(m, idx, ins_dist);
314 m->deleted[idx] = false; /* slot is live, not deleted */
315 m->size++;
316 return true;
317 }
318
319 /* Slot occupied. Check for key match (update). */
320 void** cur_kp = get_key_ptr(m, idx);
321 if (m->key_compare(*cur_kp, ins_key)) {
322 /* Key already exists — update value. */
323 if (m->value_free) m->value_free(*get_value_ptr(m, idx));
324 *get_value_ptr(m, idx) = ins_value;
325 return true;
326 }
327
328 /* Robin Hood: if resident has smaller DIB ("richer"), steal its slot. */
329 size_t cur_dist = _RH_DIB(m, idx);
330 if (cur_dist < ins_dist) {
331 /* Swap incoming element with resident. */
332 void* tmp_k = *cur_kp;
333 void* tmp_v = *get_value_ptr(m, idx);
334
335 *get_key_ptr(m, idx) = ins_key;
336 *get_value_ptr(m, idx) = ins_value;
337 _RH_SET_DIB(m, idx, ins_dist);
338
339 ins_key = tmp_k;
340 ins_value = tmp_v;
341 ins_dist = cur_dist;
342 }
343
344 idx = (idx + 1) & mask;
345 ins_dist++;
346 }
347
348 return false; /* table full (shouldn't happen at <75% load) */
349}
350
351// Get a value by key with optimized probing
352void* map_get(HashMap* m, void* key, size_t key_len) {
353 if (!m || !key) return NULL;
354
355 const size_t hash = m->hash(key, key_len);
356 const size_t mask = m->capacity - 1;
357 size_t idx = hash & mask;
358
359 for (size_t dist = 0; dist < m->capacity; dist++) {
360 if (_RH_EMPTY(m, idx)) return NULL;
361
362 /* Robin Hood early exit: element would have robbed this slot. */
363 if (_RH_DIB(m, idx) < dist) return NULL;
364
365 void** kp = get_key_ptr(m, idx);
366 if (m->key_compare(*kp, key)) return *get_value_ptr(m, idx);
367
368 idx = (idx + 1) & mask;
369 }
370 return NULL;
371}
372
373// Remove a key-value pair with tombstone optimization
374bool map_remove(HashMap* m, void* key, size_t key_len) {
375 if (!m || !key) return false;
376
377 const size_t mask = m->capacity - 1;
378 size_t hash = m->hash(key, key_len);
379 size_t idx = hash & mask;
380
381 /* Locate the element. */
382 size_t pos = SIZE_MAX;
383 for (size_t dist = 0; dist < m->capacity; dist++) {
384 if (_RH_EMPTY(m, idx)) return false;
385 if (_RH_DIB(m, idx) < dist) return false; /* Robin Hood miss */
386
387 if (m->key_compare(*get_key_ptr(m, idx), key)) {
388 pos = idx;
389 break;
390 }
391 idx = (idx + 1) & mask;
392 }
393 if (pos == SIZE_MAX) return false;
394
395 /* Free key/value if cleanup functions were provided. */
396 if (m->key_free) m->key_free(*get_key_ptr(m, pos));
397 if (m->value_free) m->value_free(*get_value_ptr(m, pos));
398
399 /* Backward-shift: pull forward any displaced neighbours. */
400 size_t hole = pos;
401 for (;;) {
402 size_t next = (hole + 1) & mask;
403 if (_RH_EMPTY(m, next) || _RH_DIB(m, next) == 0) break;
404
405 /* Move next into hole and decrement its DIB. */
406 *get_key_ptr(m, hole) = *get_key_ptr(m, next);
407 *get_value_ptr(m, hole) = *get_value_ptr(m, next);
408 _RH_SET_DIB(m, hole, _RH_DIB(m, next) - 1);
409 m->deleted[hole] = false;
410
411 hole = next;
412 }
413
414 /* Mark the last vacated slot as empty. */
415 *get_key_ptr(m, hole) = NULL;
416 *get_value_ptr(m, hole) = NULL;
417 m->deleted[hole] = false;
418 m->size--;
419 return true;
420}
421
422void map_destroy(HashMap* m) {
423 if (!m) return;
424
425 // no cleanup functions are provided
426 if (!m->key_free && !m->value_free) {
427 goto cleanup;
428 }
429
430 void** keys_values = m->keys_values;
431 bool* deleted = m->deleted;
432 size_t capacity = m->capacity;
433 KeyFreeFunction key_free = m->key_free;
434 ValueFreeFunction value_free = m->value_free;
435
436 for (size_t i = 0; i < capacity; i++) {
437 void* key = keys_values[i * 2];
438 if (key && !deleted[i]) {
439 if (key_free) key_free(key);
440 if (value_free) value_free(keys_values[i * 2 + 1]);
441 }
442 }
443
444cleanup:
445 free(m->keys_values);
446 free(m->deleted);
447 lock_free(&m->lock);
448 free(m);
449}
450
451map_iterator map_iter(HashMap* map) {
452 map_iterator it = {.map = map, .index = 0};
453 return it;
454}
455
456bool map_next(map_iterator* it, void** key, void** value) {
457 HashMap* map = it->map;
458 size_t capacity = map->capacity;
459 bool* deleted = map->deleted;
460 void** keys_values = map->keys_values;
461 size_t index = it->index;
462
463 while (index < capacity) {
464 void* current_key = keys_values[index * 2];
465 if (current_key && !deleted[index]) {
466 if (key) *key = current_key;
467 if (value) *value = keys_values[index * 2 + 1];
468 it->index = index + 1;
469 return true;
470 }
471 index++;
472 }
473 it->index = capacity;
474 return false;
475}
476
477// Thread-safe operations
478bool map_set_safe(HashMap* m, void* key, size_t key_len, void* value) {
479 lock_acquire(&m->lock);
480 bool result = map_set(m, key, key_len, value);
481 lock_release(&m->lock);
482 return result;
483}
484
485void* map_get_safe(HashMap* m, void* key, size_t key_len) {
486 lock_acquire(&m->lock);
487 void* value = map_get(m, key, key_len);
488 lock_release(&m->lock);
489 return value;
490}
491
492bool map_remove_safe(HashMap* m, void* key, size_t key_len) {
493 lock_acquire(&m->lock);
494 bool result = map_remove(m, key, key_len);
495 lock_release(&m->lock);
496 return result;
497}
pthread_mutex_t Lock
Definition lock.h:32
int lock_init(Lock *lock)
Definition lock.c:132
int lock_acquire(Lock *lock)
Definition lock.c:145
int lock_release(Lock *lock)
Definition lock.c:168
int lock_free(Lock *lock)
Definition lock.c:182