solidc
Robust collection of general-purpose cross-platform C libraries and data structures designed for rapid and safe development in C
Loading...
Searching...
No Matches
trie.c
1#include "../include/trie.h"
2
3#include <stdbool.h>
4#include <stdint.h>
5#include <stdio.h>
6#include <stdlib.h>
7#include <string.h>
8
9/* =========================================================================
10 * Internal node definition
11 * ========================================================================= */
12
13typedef struct trie_node {
14 uint8_t* chars; /* sorted array of child byte values */
15 struct trie_node** children; /* parallel array of child node pointers */
16 uint8_t nchildren; /* number of live children */
17 uint8_t nalloc; /* allocated capacity of chars/children */
18 bool is_end_of_word;
19 uint8_t _pad;
20 uint32_t frequency;
21} trie_node;
22
23typedef struct _trie {
24 trie_node* root;
25 size_t word_count;
26} trie_t;
27
28/* =========================================================================
29 * Node lifecycle
30 * ========================================================================= */
31
32static trie_node* node_create(void) {
33 trie_node* n = (trie_node*)calloc(1, sizeof(trie_node));
34 return n;
35}
36
37/* Recursively free a node subtree. */
38static void node_destroy(trie_node* n) {
39 if (!n) return;
40 for (uint8_t i = 0; i < n->nchildren; i++) node_destroy(n->children[i]);
41 free(n->chars);
42 free(n->children);
43 free(n);
44}
45
46/* =========================================================================
47 * Child lookup (binary search on sorted chars[])
48 * ========================================================================= */
49
50/*
51 * Returns the index of `c` in n->chars[], or -1 if not present.
52 * Binary search over nchildren (usually ≤ 26 for A-Z text).
53 */
54static inline int child_index(const trie_node* n, uint8_t c) {
55 int lo = 0, hi = (int)n->nchildren - 1;
56 while (lo <= hi) {
57 int mid = (lo + hi) >> 1;
58 if (n->chars[mid] == c)
59 return mid;
60 else if (n->chars[mid] < c)
61 lo = mid + 1;
62 else
63 hi = mid - 1;
64 }
65 return -1;
66}
67
68/*
69 * Find or create a child for byte `c` under parent `n`.
70 * On insert the arrays grow by doubling, using insertion sort to keep
71 * them sorted (cheap because nchildren is tiny in practice).
72 */
73static trie_node* child_find_or_create(trie_node* n, uint8_t c) {
74 int idx = child_index(n, c);
75 if (idx >= 0) return n->children[idx];
76
77 /* Need to insert a new child. Grow arrays if necessary. */
78 if (n->nchildren == n->nalloc) {
79 uint8_t new_alloc = n->nalloc == 0 ? 2 : (uint8_t)(n->nalloc * 2);
80 if (new_alloc < n->nalloc) new_alloc = 255; /* overflow guard */
81
82 uint8_t* nc = (uint8_t*)realloc(n->chars, new_alloc * sizeof(uint8_t));
83 trie_node** np = (trie_node**)realloc(n->children, new_alloc * sizeof(trie_node*));
84
85 if (!nc || !np) {
86 free(nc);
87 free(np);
88 return NULL;
89 }
90
91 n->chars = nc;
92 n->children = np;
93 n->nalloc = new_alloc;
94 }
95
96 /* Insertion sort: find where `c` belongs, shift right. */
97 int8_t pos = (int8_t)n->nchildren;
98 while (pos > 0 && n->chars[pos - 1] > c) {
99 n->chars[pos] = n->chars[pos - 1];
100 n->children[pos] = n->children[pos - 1];
101 pos--;
102 }
103
104 trie_node* child = node_create();
105 if (!child) return NULL;
106
107 n->chars[pos] = c;
108 n->children[pos] = child;
109 n->nchildren++;
110 return child;
111}
112
113/* =========================================================================
114 * Public API
115 * ========================================================================= */
116
118 trie_t* t = (trie_t*)malloc(sizeof(trie_t));
119 if (!t) return NULL;
120 t->root = node_create();
121 if (!t->root) {
122 free(t);
123 return NULL;
124 }
125 t->word_count = 0;
126 return t;
127}
128
130 if (!t) return;
131 node_destroy(t->root);
132 free(t);
133}
134
135bool trie_insert(trie_t* t, const char* word) {
136 if (!t || !word || !*word) return false;
137
138 trie_node* cur = t->root;
139 for (const uint8_t* p = (const uint8_t*)word; *p; p++) {
140 cur = child_find_or_create(cur, *p);
141 if (!cur) return false;
142 }
143
144 if (!cur->is_end_of_word) {
145 cur->is_end_of_word = true;
146 t->word_count++;
147 }
148 cur->frequency++;
149 return true;
150}
151
152bool trie_search(const trie_t* t, const char* word) {
153 if (!t || !word || !*word) return false;
154
155 const trie_node* cur = t->root;
156 for (const uint8_t* p = (const uint8_t*)word; *p; p++) {
157 int idx = child_index(cur, *p);
158 if (idx < 0) return false;
159 cur = cur->children[idx];
160 }
161 return cur->is_end_of_word;
162}
163
164bool trie_starts_with(const trie_t* t, const char* prefix) {
165 if (!t || !prefix || !*prefix) return false;
166
167 const trie_node* cur = t->root;
168 for (const uint8_t* p = (const uint8_t*)prefix; *p; p++) {
169 int idx = child_index(cur, *p);
170 if (idx < 0) return false;
171 cur = cur->children[idx];
172 }
173 return true;
174}
175
176bool trie_delete(trie_t* t, const char* word) {
177 if (!t || !word || !*word) return false;
178
179 const trie_node* cur = t->root;
180 for (const uint8_t* p = (const uint8_t*)word; *p; p++) {
181 int idx = child_index(cur, *p);
182 if (idx < 0) return false;
183 cur = cur->children[idx];
184 }
185
186 if (!cur->is_end_of_word) return false;
187 ((trie_node*)cur)->is_end_of_word = false;
188 ((trie_node*)cur)->frequency = 0;
189 t->word_count--;
190 return true;
191}
192
193uint32_t trie_get_frequency(const trie_t* t, const char* word) {
194 if (!t || !word || !*word) return 0;
195
196 const trie_node* cur = t->root;
197 for (const uint8_t* p = (const uint8_t*)word; *p; p++) {
198 int idx = child_index(cur, *p);
199 if (idx < 0) return 0;
200 cur = cur->children[idx];
201 }
202 return cur->is_end_of_word ? cur->frequency : 0;
203}
204
205size_t trie_get_word_count(const trie_t* t) { return t ? t->word_count : 0; }
206bool trie_is_empty(const trie_t* t) { return !t || t->word_count == 0; }
207
208/* =========================================================================
209 * Autocomplete (DFS with arena allocation — same API as original)
210 * ========================================================================= */
211
212typedef struct {
213 char** suggestions;
214 size_t count;
215 size_t capacity;
216 size_t limit;
217} _collector;
218
219static void _collect(const trie_node* node, char* buf, size_t depth, size_t buf_max, _collector* c, Arena* arena) {
220 if (!node || c->count >= c->limit) return;
221
222 if (node->is_end_of_word) {
223 buf[depth] = '\0';
224 char* dup = arena_strdup(arena, buf);
225 if (dup) c->suggestions[c->count++] = dup;
226 }
227
228 for (uint8_t i = 0; i < node->nchildren && c->count < c->limit; i++) {
229 if (depth + 1 >= buf_max) return;
230 buf[depth] = (char)node->chars[i];
231 _collect(node->children[i], buf, depth + 1, buf_max, c, arena);
232 }
233}
234
235const char** trie_autocomplete(const trie_t* t, const char* prefix, size_t max_suggestions, size_t* out_count,
236 Arena* arena) {
237 if (out_count) *out_count = 0;
238 if (!t || !prefix || !out_count || max_suggestions == 0 || !arena) return NULL;
239
240 /* Navigate to the prefix node. */
241 const trie_node* cur = t->root;
242 for (const uint8_t* p = (const uint8_t*)prefix; *p; p++) {
243 int idx = child_index(cur, *p);
244 if (idx < 0) return NULL;
245 cur = cur->children[idx];
246 }
247
248 /* Allocate suggestion array and word buffer on the arena. */
249 char** suggestions = ARENA_ALLOC_ARRAY(arena, char*, max_suggestions);
250 if (!suggestions) return NULL;
251
252 const size_t buf_max = 1024;
253 char* buf = (char*)arena_alloc(arena, buf_max);
254 if (!buf) return NULL;
255
256 size_t prefix_len = strlen(prefix);
257 if (prefix_len >= buf_max) return NULL;
258 memcpy(buf, prefix, prefix_len);
259
260 _collector c = {.suggestions = suggestions, .count = 0, .capacity = max_suggestions, .limit = max_suggestions};
261
262 _collect(cur, buf, prefix_len, buf_max, &c, arena);
263
264 if (c.count == 0) return NULL;
265
266 *out_count = c.count;
267 return (const char**)suggestions;
268}
struct _trie trie_t
Definition trie.h:15
void trie_destroy(trie_t *trie)
Definition trie.c:129
bool trie_starts_with(const trie_t *trie, const char *prefix)
Definition trie.c:164
bool trie_search(const trie_t *trie, const char *word)
Definition trie.c:152
bool trie_is_empty(const trie_t *trie)
Definition trie.c:206
bool trie_insert(trie_t *trie, const char *word)
Definition trie.c:135
uint32_t trie_get_frequency(const trie_t *trie, const char *word)
Definition trie.c:193
size_t trie_get_word_count(const trie_t *trie)
Definition trie.c:205
trie_t * trie_create(void)
Definition trie.c:117
const char ** trie_autocomplete(const trie_t *trie, const char *prefix, size_t max_suggestions, size_t *out_count, Arena *arena)
Definition trie.c:235
bool trie_delete(trie_t *trie, const char *word)
Definition trie.c:176