solidc
Robust collection of general-purpose cross-platform C libraries and data structures designed for rapid and safe development in C
Loading...
Searching...
No Matches
map.c
Go to the documentation of this file.
1
46/* =========================================================================
47 * Internal slot layout (unchanged from original):
48 * keys_values[i*2] = key pointer for slot i (NULL == empty)
49 * keys_values[i*2+1] = value pointer for slot i
50 * deleted[i] = REMOVED tombstone marker
51 *
52 * The Robin Hood implementation repurposes `deleted[]` as a probe-distance
53 * (DIB) array: deleted[i] holds the distance-from-initial-bucket for the
54 * element stored in slot i. 0 means the element is in its natural slot,
55 * 1 means it was displaced by 1, etc. An empty slot is indicated by
56 * keys_values[i*2] == NULL.
57 *
58 * Because DIB replaces the boolean tombstone, `tombstone_count` is always 0
59 * and the 50%-tombstone rehash path is never triggered.
60 * ========================================================================= */
61#include <limits.h>
62#include <stdalign.h>
63#include <stdbool.h>
64#include <stdint.h>
65#include <stdio.h>
66#include <stdlib.h>
67#include <string.h>
68
69#define XXH_INLINE_ALL
70#include "../include/cmp.h"
71#include "../include/lock.h"
72#include "../include/map.h"
73#include "../include/platform.h"
74
75#include <xxhash.h>
76
77// Default maximum load factor
78#define DEFAULT_MAX_LOAD_FACTOR 0.75f
79#define TOMBSTONE_RATIO_THRESHOLD 0.5f // Rehash when tombstones > 50% of size
80#define MIN_CAPACITY 8 // Minimum capacity to avoid frequent resizing
81#define MAX(a, b) ((a) > (b) ? (a) : (b))
82
83/* Distance-from-initial-bucket stored in the (repurposed) deleted[] array. */
84#define _RH_DIB(m, i) ((m)->deleted[i])
85#define _RH_SET_DIB(m, i, d) ((m)->deleted[i] = (size_t)(d))
86/* Slot is empty when the key pointer is NULL. */
87#define _RH_EMPTY(m, i) ((m)->keys_values[(i) * 2] == NULL)
88
89/* Compute slot index using capacity bitmask (capacity is always power-of-two). */
90static inline size_t _rh_slot(size_t hash, size_t offset, size_t cap_mask) { return (hash + offset) & cap_mask; }
91
92// Optimized map structure with better memory layout
93typedef struct hash_map {
94 void** keys_values; // Interleaved keys and values for better cache locality
95 size_t* deleted; // DIB (distance-from-initial-bucket) array
96 size_t size; // Number of active entries
97 size_t capacity; // Map capacity
98 float max_load_factor; // Configurable load factor threshold
99 HashFunction hash; // Hash function
100 KeyCmpFunction key_compare; // Key comparison function
101 KeyFreeFunction key_free; // Key free function (optional)
102 ValueFreeFunction value_free; // Value free function (optional)
103 Lock lock; // Lock for thread safety
104} HashMap;
105
106// Fast hash for small keys (up to 8 bytes) - optimized version
107static inline uint64_t fast_small_hash(const void* key, size_t size) {
108 // Use union to avoid strict aliasing violations
109 union {
110 uint64_t u64;
111 uint32_t u32[2];
112 uint16_t u16[4];
113 uint8_t u8[8];
114 } value = {0};
115
116 // Copy only the needed bytes
117 const uint8_t* src = (const uint8_t*)key;
118 for (size_t i = 0; i < size && i < sizeof(value); i++) {
119 value.u8[i] = src[i];
120 }
121 return value.u64;
122}
123
124// xxHash implementation with small key optimization
125static inline unsigned long xxhash(const void* key, size_t size) {
126 if (size <= sizeof(uint64_t)) {
127 return fast_small_hash(key, size);
128 }
129 return XXH3_64bits(key, size);
130}
131
132// Helper function to calculate next power of two for better hashing
133static inline size_t next_power_of_two(size_t n) {
134 if (n == 0) return 1;
135 n--;
136 n |= n >> 1;
137 n |= n >> 2;
138 n |= n >> 4;
139 n |= n >> 8;
140 n |= n >> 16;
141#if SIZE_MAX > UINT32_MAX
142 n |= n >> 32;
143#endif
144 return n + 1;
145}
146
147// Safe capacity growth calculation
148static inline size_t calculate_new_capacity(size_t current) {
149 if (current > SIZE_MAX / 2) {
150 return SIZE_MAX;
151 }
152 size_t new_cap = current * 2;
153 return (new_cap < current) ? SIZE_MAX : new_cap;
154}
155
156// Wrapper to match HashFunction signature
157static inline size_t xxhash_wrapper(const void* key, size_t len) { return (size_t)xxhash(key, (size_t)len); }
158
159// Map creation with better error handling and memory optimization
160HashMap* map_create(const MapConfig* config) {
161 if (!config || !config->key_compare) {
162 return NULL;
163 }
164
165 size_t capacity = MAX(
166 MIN_CAPACITY, config->initial_capacity > 0 ? next_power_of_two(config->initial_capacity) : INITIAL_MAP_SIZE);
167
168 if (capacity > SIZE_MAX / 2) {
169 return NULL;
170 }
171
172 float max_load_factor = config->max_load_factor > 0.1f && config->max_load_factor <= 0.95f
173 ? config->max_load_factor
174 : DEFAULT_MAX_LOAD_FACTOR;
175
176 HashMap* m = (HashMap*)malloc(sizeof(HashMap));
177 if (!m) {
178 return NULL;
179 }
180
181 // Allocate interleaved keys and values for better cache locality
182 m->keys_values = (void**)calloc(capacity * 2, sizeof(void*));
183 m->deleted = (size_t*)calloc(capacity, sizeof(size_t));
184
185 if (!m->keys_values || !m->deleted) {
186 free(m->keys_values);
187 free(m->deleted);
188 free(m);
189 return NULL;
190 }
191
192 m->size = 0;
193 m->capacity = capacity;
194 m->max_load_factor = max_load_factor;
195 m->hash = config->hash_func ? config->hash_func : xxhash_wrapper;
196 m->key_compare = config->key_compare;
197 m->key_free = config->key_free;
198 m->value_free = config->value_free;
199
200 lock_init(&m->lock);
201 return m;
202}
203
204// Helper to get key pointer from interleaved array
205static inline void** get_key_ptr(HashMap* m, size_t index) { return &m->keys_values[index * 2]; }
206
207// Helper to get value pointer from interleaved array
208static inline void** get_value_ptr(HashMap* m, size_t index) { return &m->keys_values[index * 2 + 1]; }
209
210// Resize the map with optimized rehashing and error handling
211static bool map_resize(HashMap* m, size_t new_capacity, size_t key_len) {
212 if (new_capacity <= m->capacity || new_capacity > SIZE_MAX / 2) {
213 return false;
214 }
215
216 void** new_keys_values = (void**)calloc(new_capacity * 2, sizeof(void*));
217 size_t* new_deleted = (size_t*)calloc(new_capacity, sizeof(size_t));
218
219 if (!new_keys_values || !new_deleted) {
220 free(new_keys_values);
221 free(new_deleted);
222 return false;
223 }
224
225 // Save old data
226 void** old_keys_values = m->keys_values;
227 size_t* old_deleted = m->deleted;
228 size_t old_capacity = m->capacity;
229
230 // Swap in new arrays
231 m->keys_values = new_keys_values;
232 m->deleted = new_deleted;
233 m->capacity = new_capacity;
234 size_t old_size = m->size;
235 m->size = 0;
236
237 // Rehash all active entries using Robin Hood linear probing (matching map_set)
238 bool success = true;
239 const size_t mask = new_capacity - 1;
240 for (size_t i = 0; i < old_capacity; i++) {
241 if (old_keys_values[i * 2] && old_keys_values[i * 2] != NULL) {
242 void* ins_key = old_keys_values[i * 2];
243 void* ins_value = old_keys_values[i * 2 + 1];
244
245 size_t hash = m->hash(ins_key, key_len);
246 size_t idx = hash & mask;
247 size_t ins_dist = 0;
248
249 for (size_t j = 0; j < new_capacity; j++) {
250 if (_RH_EMPTY(m, idx)) {
251 *get_key_ptr(m, idx) = ins_key;
252 *get_value_ptr(m, idx) = ins_value;
253 _RH_SET_DIB(m, idx, ins_dist);
254 m->size++;
255 break;
256 }
257
258 // Robin Hood: steal from richer elements
259 size_t cur_dist = _RH_DIB(m, idx);
260 if (cur_dist < ins_dist) {
261 void* tmp_k = *get_key_ptr(m, idx);
262 void* tmp_v = *get_value_ptr(m, idx);
263 *get_key_ptr(m, idx) = ins_key;
264 *get_value_ptr(m, idx) = ins_value;
265 _RH_SET_DIB(m, idx, ins_dist);
266 ins_key = tmp_k;
267 ins_value = tmp_v;
268 ins_dist = cur_dist;
269 }
270
271 idx = (idx + 1) & mask;
272 ins_dist++;
273 }
274 }
275 }
276
277 if (success) {
278 // Free old arrays
279 free(old_keys_values);
280 free(old_deleted);
281 } else {
282 // Restore original state on failure
283 m->keys_values = old_keys_values;
284 m->deleted = old_deleted;
285 m->capacity = old_capacity;
286 m->size = old_size;
287 free(new_keys_values);
288 free(new_deleted);
289 return false;
290 }
291
292 return true;
293}
294
295size_t map_length(HashMap* m) { return m->size; }
296
297// Set a key-value pair with optimizations and better error handling
298bool map_set(HashMap* m, void* key, size_t key_len, void* value) {
299 if (!m || !key) return false;
300
301 /* Grow before inserting if load would exceed threshold. */
302 size_t total_used = m->size; /* no tombstones in Robin Hood */
303 if ((float)total_used / (float)m->capacity >= m->max_load_factor) {
304 size_t new_cap = calculate_new_capacity(m->capacity);
305 if (new_cap <= m->capacity || !map_resize(m, new_cap, key_len)) return false;
306 }
307
308 const size_t mask = m->capacity - 1;
309 size_t hash = m->hash(key, key_len);
310 size_t idx = hash & mask;
311
312 /* The element we're about to insert. */
313 void* ins_key = key;
314 void* ins_value = value;
315 size_t ins_dist = 0;
316
317 for (size_t i = 0; i < m->capacity; i++) {
318 if (_RH_EMPTY(m, idx)) {
319 /* Empty slot — place the element here. */
320 *get_key_ptr(m, idx) = ins_key;
321 *get_value_ptr(m, idx) = ins_value;
322 _RH_SET_DIB(m, idx, ins_dist);
323 m->size++;
324 return true;
325 }
326
327 /* Slot occupied. Check for key match (update). */
328 void** cur_kp = get_key_ptr(m, idx);
329 if (m->key_compare(*cur_kp, ins_key)) {
330 /* Key already exists — update value. */
331 if (m->value_free) m->value_free(*get_value_ptr(m, idx));
332 *get_value_ptr(m, idx) = ins_value;
333 return true;
334 }
335
336 /* Robin Hood: if resident has smaller DIB ("richer"), steal its slot. */
337 size_t cur_dist = _RH_DIB(m, idx);
338 if (cur_dist < ins_dist) {
339 /* Swap incoming element with resident. */
340 void* tmp_k = *cur_kp;
341 void* tmp_v = *get_value_ptr(m, idx);
342
343 *get_key_ptr(m, idx) = ins_key;
344 *get_value_ptr(m, idx) = ins_value;
345 _RH_SET_DIB(m, idx, ins_dist);
346
347 ins_key = tmp_k;
348 ins_value = tmp_v;
349 ins_dist = cur_dist;
350 }
351
352 idx = (idx + 1) & mask;
353 ins_dist++;
354 }
355
356 return false; /* table full (shouldn't happen at <75% load) */
357}
358
359// Get a value by key with optimized probing
360void* map_get(HashMap* m, void* key, size_t key_len) {
361 if (!m || !key) return NULL;
362
363 const size_t hash = m->hash(key, key_len);
364 const size_t mask = m->capacity - 1;
365 size_t idx = hash & mask;
366
367 for (size_t dist = 0; dist < m->capacity; dist++) {
368 if (_RH_EMPTY(m, idx)) return NULL;
369
370 /* Robin Hood early exit: element would have robbed this slot. */
371 if (_RH_DIB(m, idx) < dist) return NULL;
372
373 void** kp = get_key_ptr(m, idx);
374 if (m->key_compare(*kp, key)) return *get_value_ptr(m, idx);
375
376 idx = (idx + 1) & mask;
377 }
378 return NULL;
379}
380
381// Remove a key-value pair with tombstone optimization
382bool map_remove(HashMap* m, void* key, size_t key_len) {
383 if (!m || !key) return false;
384
385 const size_t mask = m->capacity - 1;
386 size_t hash = m->hash(key, key_len);
387 size_t idx = hash & mask;
388
389 /* Locate the element. */
390 size_t pos = SIZE_MAX;
391 for (size_t dist = 0; dist < m->capacity; dist++) {
392 if (_RH_EMPTY(m, idx)) return false;
393 if (_RH_DIB(m, idx) < dist) return false; /* Robin Hood miss */
394
395 if (m->key_compare(*get_key_ptr(m, idx), key)) {
396 pos = idx;
397 break;
398 }
399 idx = (idx + 1) & mask;
400 }
401 if (pos == SIZE_MAX) return false;
402
403 /* Free key/value if cleanup functions were provided. */
404 if (m->key_free) m->key_free(*get_key_ptr(m, pos));
405 if (m->value_free) m->value_free(*get_value_ptr(m, pos));
406
407 /* Backward-shift: pull forward any displaced neighbours. */
408 size_t hole = pos;
409 for (;;) {
410 size_t next = (hole + 1) & mask;
411 if (_RH_EMPTY(m, next) || _RH_DIB(m, next) == 0) break;
412
413 /* Move next into hole and decrement its DIB. */
414 *get_key_ptr(m, hole) = *get_key_ptr(m, next);
415 *get_value_ptr(m, hole) = *get_value_ptr(m, next);
416 _RH_SET_DIB(m, hole, _RH_DIB(m, next) - 1);
417
418 hole = next;
419 }
420
421 /* Mark the last vacated slot as empty. */
422 *get_key_ptr(m, hole) = NULL;
423 *get_value_ptr(m, hole) = NULL;
424 _RH_SET_DIB(m, hole, 0);
425 m->size--;
426 return true;
427}
428
429void map_destroy(HashMap* m) {
430 if (!m) return;
431
432 // no cleanup functions are provided
433 if (!m->key_free && !m->value_free) {
434 goto cleanup;
435 }
436
437 void** keys_values = m->keys_values;
438 size_t capacity = m->capacity;
439 KeyFreeFunction key_free = m->key_free;
440 ValueFreeFunction value_free = m->value_free;
441
442 for (size_t i = 0; i < capacity; i++) {
443 void* key = keys_values[i * 2];
444 if (key) {
445 if (key_free) key_free(key);
446 if (value_free) value_free(keys_values[i * 2 + 1]);
447 }
448 }
449
450cleanup:
451 free(m->keys_values);
452 free(m->deleted);
453 lock_free(&m->lock);
454 free(m);
455}
456
457map_iterator map_iter(HashMap* map) {
458 map_iterator it = {.map = map, .index = 0};
459 return it;
460}
461
462bool map_next(map_iterator* it, void** key, void** value) {
463 HashMap* map = it->map;
464 size_t capacity = map->capacity;
465 void** keys_values = map->keys_values;
466 size_t index = it->index;
467
468 while (index < capacity) {
469 void* current_key = keys_values[index * 2];
470 if (current_key) {
471 if (key) *key = current_key;
472 if (value) *value = keys_values[index * 2 + 1];
473 it->index = index + 1;
474 return true;
475 }
476 index++;
477 }
478 it->index = capacity;
479 return false;
480}
481
482// Thread-safe operations
483bool map_set_safe(HashMap* m, void* key, size_t key_len, void* value) {
484 lock_acquire(&m->lock);
485 bool result = map_set(m, key, key_len, value);
486 lock_release(&m->lock);
487 return result;
488}
489
490void* map_get_safe(HashMap* m, void* key, size_t key_len) {
491 lock_acquire(&m->lock);
492 void* value = map_get(m, key, key_len);
493 lock_release(&m->lock);
494 return value;
495}
496
497bool map_remove_safe(HashMap* m, void* key, size_t key_len) {
498 lock_acquire(&m->lock);
499 bool result = map_remove(m, key, key_len);
500 lock_release(&m->lock);
501 return result;
502}
pthread_mutex_t Lock
Definition lock.h:32
int lock_init(Lock *lock)
Definition lock.c:132
int lock_acquire(Lock *lock)
Definition lock.c:145
int lock_release(Lock *lock)
Definition lock.c:168
int lock_free(Lock *lock)
Definition lock.c:182
#define MAX(a, b)
Definition macros.h:385