solidc
Robust collection of general-purpose cross-platform C libraries and data structures designed for rapid and safe development in C
Loading...
Searching...
No Matches
cstr.c
Go to the documentation of this file.
1
32#include "cstr.h"
33
34#include <assert.h>
35#include <ctype.h>
36#include <stdarg.h>
37#include <stdio.h>
38#include <stdlib.h>
39#include <string.h>
40
41/* -------------------------------------------------------------------------
42 * Internal macros
43 * ---------------------------------------------------------------------- */
44
45#define CSTR_MAX_SIZE ((size_t)CSTR_MAX_LEN)
46
47// Branch prediction hints for better CPU pipeline utilization
48#if defined(__GNUC__) || defined(__clang__)
49#define likely(x) __builtin_expect(!!(x), 1)
50#define unlikely(x) __builtin_expect(!!(x), 0)
51#else
52#define likely(x) (x)
53#define unlikely(x) (x)
54#endif
55
56/* Round x up to the next power-of-two ≥ x. Undefined for x == 0. */
57static inline uint32_t next_pow2_u32(uint32_t x) {
58 if (x <= 1) return 1;
59 x--;
60 x |= x >> 1;
61 x |= x >> 2;
62 x |= x >> 4;
63 x |= x >> 8;
64 x |= x >> 16;
65 return x + 1;
66}
67
68/* Minimum heap allocation that fits `need` bytes + NUL. */
69static inline uint32_t cstr_grow_cap(uint32_t current, uint32_t need) {
70 uint32_t cap = current < CSTR_MIN_HEAP ? CSTR_MIN_HEAP : current;
71 while (cap < need) {
72 if (cap <= CSTR_MAX_LEN / 2) {
73 cap *= 2;
74 } else {
75 cap = CSTR_MAX_LEN;
76 break; /* saturate: cannot grow further */
77 }
78 }
79 return cap;
80}
81
82/* -------------------------------------------------------------------------
83 * Internal: promote SSO → heap, or grow existing heap.
84 *
85 * After a successful call, s->data points to heap memory of size cap,
86 * s->capacity has the heap flag set, and existing content is preserved.
87 * ---------------------------------------------------------------------- */
88static bool cstr_ensure_cap(cstr* s, size_t need) {
89 /* need is the total bytes required INCLUDING the NUL terminator. */
90 if (CSTR_UNLIKELY(need > (size_t)CSTR_MAX_LEN)) return false;
91
92 uint32_t need32 = (uint32_t)need;
93
94 if (!cstr_is_heap(s)) {
95 /* SSO path: need > CSTR_SSO_CAP triggers promotion */
96 if (need32 <= CSTR_SSO_CAP) return true;
97
98 uint32_t cap = cstr_grow_cap(CSTR_SSO_CAP, need32);
99 char* mem = (char*)malloc(cap);
100 if (CSTR_UNLIKELY(!mem)) return false;
101
102 memcpy(mem, s->buf, s->length + 1);
103 s->data = mem;
104 s->capacity = CSTR_HEAP_FLAG | cap;
105 return true;
106 }
107
108 /* Heap path */
109 uint32_t cur_cap = cstr_heap_cap(s);
110 if (need32 <= cur_cap) return true;
111
112 uint32_t new_cap = cstr_grow_cap(cur_cap, need32);
113 if (new_cap == 0) return false;
114
115 char* mem = (char*)realloc(s->data, new_cap);
116 if (CSTR_UNLIKELY(!mem)) return false;
117
118 s->data = mem;
119 s->capacity = CSTR_HEAP_FLAG | new_cap;
120 return true;
121}
122
123/* -------------------------------------------------------------------------
124 * Lifecycle
125 * ---------------------------------------------------------------------- */
126
127cstr* cstr_init(size_t initial_capacity) {
128 if (initial_capacity >= CSTR_MAX_SIZE) {
129 return NULL;
130 }
131
132 cstr* s = (cstr*)malloc(sizeof(cstr));
133 if (CSTR_UNLIKELY(!s)) return NULL;
134
136
137 if (initial_capacity >= CSTR_SSO_CAP) {
138 if (CSTR_UNLIKELY(!cstr_ensure_cap(s, initial_capacity + 1))) {
139 free(s);
140 return NULL;
141 }
142 }
143 return s;
144}
145
146cstr* cstr_new(const char* input) {
147 if (CSTR_UNLIKELY(!input)) return NULL;
148 return cstr_new_len(input, strlen(input));
149}
150
151cstr* cstr_new_len(const char* data, size_t length) {
152 if (CSTR_UNLIKELY(!data && length > 0)) return NULL;
153
154 cstr* s = (cstr*)malloc(sizeof(cstr));
155 if (CSTR_UNLIKELY(!s)) return NULL;
157
158 if (length > 0) {
159 if (CSTR_UNLIKELY(!cstr_ensure_cap(s, length + 1))) {
160 free(s);
161 return NULL;
162 }
163 memcpy(s->data, data, length);
164 s->data[length] = '\0';
165 s->length = (uint32_t)length;
166 }
167 return s;
168}
169
170void cstr_drop(cstr* s) {
171 if (!s) return;
172 if (cstr_is_heap(s)) {
173 free(s->data);
174 s->data = NULL;
175 cstr_init_inplace(s); /* reset to safe SSO state */
176 }
177}
178
179void cstr_free(cstr* s) {
180 if (!s) return;
181 if (cstr_is_heap(s)) {
182 free(s->data);
183 s->data = NULL;
184 };
185 free(s);
186}
187
188void cstr_debug(const cstr* s) {
189 if (!s) {
190 fprintf(stderr, "cstr: NULL\n");
191 return;
192 }
193
194 fprintf(stderr,
195 "cstr { data=%p, length=%u, capacity=%u, mode=%s }\n"
196 " content: \"%.*s\"\n",
197 (const void*)s->data, s->length, (unsigned)(cstr_is_heap(s) ? cstr_heap_cap(s) : CSTR_SSO_CAP - 1u),
198 cstr_is_heap(s) ? "heap" : "sso", (int)s->length, s->data);
199}
200
201/* -------------------------------------------------------------------------
202 * Capacity management
203 * ---------------------------------------------------------------------- */
204
205bool cstr_reserve(cstr* s, size_t capacity) { return cstr_ensure_cap(s, capacity + 1); }
206
208 if (!cstr_is_heap(s)) return;
209 uint32_t needed = s->length + 1;
210 if (cstr_heap_cap(s) == needed) return;
211
212 char* mem = (char*)realloc(s->data, needed);
213 if (mem) {
214 s->data = mem;
215 s->capacity = CSTR_HEAP_FLAG | needed;
216 }
217 /* Failure is non-fatal — we just stay oversized. */
218}
219
220/* -------------------------------------------------------------------------
221 * Append / prepend / insert
222 * ---------------------------------------------------------------------- */
223
224bool cstr_append(cstr* s, const char* CSTR_RESTRICT append_str) {
225 size_t n = strlen(append_str);
226 if (n == 0) return true;
227
228 size_t new_len = (size_t)s->length + n;
229 if (CSTR_UNLIKELY(new_len > CSTR_MAX_SIZE)) return false;
230 if (CSTR_UNLIKELY(!cstr_ensure_cap(s, new_len + 1))) return false;
231
232 memcpy(s->data + s->length, append_str, n + 1); /* +1 copies NUL */
233 s->length = (uint32_t)new_len;
234 return true;
235}
236
237bool cstr_append_cstr(cstr* s, const cstr* append) {
238 uint32_t n = append->length;
239 if (n == 0) return true;
240
241 uint32_t new_len = s->length + n;
242 if (CSTR_UNLIKELY(new_len < s->length)) return false; /* overflow */
243 if (CSTR_UNLIKELY(!cstr_ensure_cap(s, (size_t)new_len + 1))) return false;
244
245 memcpy(s->data + s->length, append->data, (size_t)n + 1);
246 s->length = new_len;
247 return true;
248}
249
250bool cstr_ncat(cstr* dest, const cstr* src, size_t n) {
251 uint32_t copy_n = (n < (size_t)src->length) ? (uint32_t)n : src->length;
252 if (copy_n == 0) return true;
253
254 uint32_t new_len = dest->length + copy_n;
255 if (CSTR_UNLIKELY(new_len < dest->length)) return false;
256 if (CSTR_UNLIKELY(!cstr_ensure_cap(dest, (size_t)new_len + 1))) return false;
257
258 memcpy(dest->data + dest->length, src->data, copy_n);
259 dest->data[new_len] = '\0';
260 dest->length = new_len;
261 return true;
262}
263
264bool cstr_append_char(cstr* s, char c) {
265 uint32_t new_len = s->length + 1;
266 if (CSTR_UNLIKELY(!cstr_ensure_cap(s, (size_t)new_len + 1))) return false;
267 s->data[s->length] = c;
268 s->data[new_len] = '\0';
269 s->length = new_len;
270 return true;
271}
272
273bool cstr_prepend(cstr* s, const char* prepend_str) {
274 size_t n = strlen(prepend_str);
275 if (n == 0) return true;
276
277 size_t new_len = (size_t)s->length + n;
278 if (CSTR_UNLIKELY(new_len > CSTR_MAX_SIZE)) return false;
279 if (CSTR_UNLIKELY(!cstr_ensure_cap(s, new_len + 1))) return false;
280
281 memmove(s->data + n, s->data, s->length + 1);
282 memcpy(s->data, prepend_str, n);
283 s->length = (uint32_t)new_len;
284 return true;
285}
286
287bool cstr_prepend_cstr(cstr* s, const cstr* prepend) {
288 uint32_t n = prepend->length;
289 if (n == 0) return true;
290
291 uint32_t new_len = s->length + n;
292 if (CSTR_UNLIKELY(new_len < s->length)) return false;
293 if (CSTR_UNLIKELY(!cstr_ensure_cap(s, (size_t)new_len + 1))) return false;
294
295 memmove(s->data + n, s->data, s->length + 1);
296 memcpy(s->data, prepend->data, n);
297 s->length = new_len;
298 return true;
299}
300
301bool cstr_prepend_fast(cstr* s, const char* prepend_str) {
302 size_t n = strlen(prepend_str);
303 if (n == 0) return true;
304 memmove(s->data + n, s->data, s->length + 1);
305 memcpy(s->data, prepend_str, n);
306 s->length += (uint32_t)n;
307 return true;
308}
309
310bool cstr_insert(cstr* s, size_t index, const char* insert_str) {
311 if (CSTR_UNLIKELY(index > (size_t)s->length)) return false;
312
313 size_t n = strlen(insert_str);
314 if (n == 0) return true;
315
316 size_t new_len = (size_t)s->length + n;
317 if (CSTR_UNLIKELY(new_len > CSTR_MAX_SIZE)) return false;
318 if (CSTR_UNLIKELY(!cstr_ensure_cap(s, new_len + 1))) return false;
319
320 char* pos = s->data + index;
321 memmove(pos + n, pos, s->length - index + 1);
322 memcpy(pos, insert_str, n);
323 s->length = (uint32_t)new_len;
324 return true;
325}
326
327bool cstr_insert_cstr(cstr* s, size_t index, const cstr* insert) {
328 if (CSTR_UNLIKELY(index > (size_t)s->length)) return false;
329
330 uint32_t n = insert->length;
331 if (n == 0) return true;
332
333 uint32_t new_len = s->length + n;
334 if (CSTR_UNLIKELY(new_len < s->length)) return false;
335 if (CSTR_UNLIKELY(!cstr_ensure_cap(s, (size_t)new_len + 1))) return false;
336
337 char* pos = s->data + index;
338 memmove(pos + n, pos, s->length - index + 1);
339 memcpy(pos, insert->data, n);
340 s->length = new_len;
341 return true;
342}
343
344bool cstr_remove(cstr* s, size_t index, size_t count) {
345 uint32_t len = s->length;
346 if (CSTR_UNLIKELY(index > len)) return (index == len && count == 0);
347
348 uint32_t idx = (uint32_t)index;
349 uint32_t cnt = (count > (size_t)(len - idx)) ? (len - idx) : (uint32_t)count;
350 if (cnt == 0) return true;
351
352 memmove(s->data + idx, s->data + idx + cnt, len - idx - cnt + 1);
353 s->length = len - cnt;
354 return true;
355}
356
357/* -------------------------------------------------------------------------
358 * Printf-style helpers
359 * ---------------------------------------------------------------------- */
360
361cstr* cstr_format(const char* format, ...) {
362 if (CSTR_UNLIKELY(!format)) return NULL;
363
364 va_list a, a2;
365 va_start(a, format);
366 va_copy(a2, a);
367 int need = vsnprintf(NULL, 0, format, a2);
368 va_end(a2);
369
370 if (CSTR_UNLIKELY(need < 0 || (size_t)need > CSTR_MAX_SIZE)) {
371 va_end(a);
372 return NULL;
373 }
374
375 cstr* s = cstr_init((size_t)need);
376 if (CSTR_UNLIKELY(!s)) {
377 va_end(a);
378 return NULL;
379 }
380
381 vsnprintf(s->data, (size_t)need + 1, format, a);
382 va_end(a);
383 s->length = (uint32_t)need;
384 return s;
385}
386
387bool cstr_append_fmt(cstr* s, const char* format, ...) {
388 va_list a, a2;
389 va_start(a, format);
390 va_copy(a2, a);
391 int n = vsnprintf(NULL, 0, format, a2);
392 va_end(a2);
393
394 if (CSTR_UNLIKELY(n < 0)) {
395 va_end(a);
396 return false;
397 }
398
399 uint32_t new_len = s->length + (uint32_t)n;
400 if (CSTR_UNLIKELY(!cstr_ensure_cap(s, (size_t)new_len + 1))) {
401 va_end(a);
402 return false;
403 }
404
405 vsnprintf(s->data + s->length, (size_t)n + 1, format, a);
406 va_end(a);
407 s->length = new_len;
408 return true;
409}
410
411/* -------------------------------------------------------------------------
412 * Copy / assign
413 * ---------------------------------------------------------------------- */
414
415bool cstr_copy(cstr* dest, const cstr* src) {
416 if (dest == src) return true;
417 uint32_t src_len = src->length;
418 if (CSTR_UNLIKELY(!cstr_ensure_cap(dest, (size_t)src_len + 1))) return false;
419 memcpy(dest->data, src->data, (size_t)src_len + 1);
420 dest->length = src_len;
421 return true;
422}
423
424/* -------------------------------------------------------------------------
425 * Remove helpers
426 * ---------------------------------------------------------------------- */
427
428size_t cstr_remove_all(cstr* s, const char* substr) {
429 if (!*substr) return 0;
430 size_t sub_len = strlen(substr);
431 char* d = s->data;
432 char *w = d, *r = d;
433 const char* end = d + s->length;
434 size_t count = 0;
435
436 while (r < end) {
437 size_t rem = (size_t)(end - r);
438 if (rem >= sub_len && memcmp(r, substr, sub_len) == 0) {
439 r += sub_len;
440 count++;
441 } else {
442 *w++ = *r++;
443 }
444 }
445 *w = '\0';
446 s->length = (uint32_t)(w - d);
447 return count;
448}
449
450size_t cstr_remove_all_cstr(cstr* s, const cstr* substr) {
451 uint32_t sub_len = substr->length;
452 if (sub_len == 0) return 0;
453 const char* sub = substr->data;
454 char* d = s->data;
455 char *w = d, *r = d;
456 const char* end = d + s->length;
457 size_t count = 0;
458
459 while (r < end) {
460 size_t rem = (size_t)(end - r);
461 if ((uint32_t)rem >= sub_len && memcmp(r, sub, sub_len) == 0) {
462 r += sub_len;
463 count++;
464 } else {
465 *w++ = *r++;
466 }
467 }
468 *w = '\0';
469 s->length = (uint32_t)(w - d);
470 return count;
471}
472
473void cstr_remove_char(cstr* s, char c) {
474 char* d = s->data;
475 char* w = d;
476 const char* end = d + s->length;
477 while (d < end) {
478 if (*d != c) *w++ = *d;
479 d++;
480 }
481 *w = '\0';
482 s->length = (uint32_t)(w - s->data);
483}
484
485void cstr_remove_substr(cstr* s, size_t start, size_t slen) {
486 uint32_t len = s->length;
487 if (CSTR_UNLIKELY(start >= len || slen == 0)) return;
488 if (slen > len - start) slen = len - start;
489 char* d = s->data;
490 size_t tail = len - start - slen;
491 if (tail > 0)
492 memmove(d + start, d + start + slen, tail + 1);
493 else
494 d[start] = '\0';
495 s->length = len - (uint32_t)slen;
496}
497
498/* -------------------------------------------------------------------------
499 * Search — fast needle-in-haystack without memmem
500 *
501 * Strategy:
502 * needle_len == 0 → trivially found at 0
503 * needle_len == 1 → memchr (branchless SIMD on any modern libc)
504 * needle_len 2-8 → first-byte scan with memchr, then verify remainder
505 * needle_len > 8 → Sunday (simplified Horspool) bad-character skip table
506 *
507 * This beats glibc memmem for short needles because:
508 * - memmem does an internal strlen on the needle even if you know the length
509 * - On older glibc, memmem isn't SIMD-accelerated for small haystacks
510 * - Our memchr-scan path goes through glibc's optimised memchr for the
511 * common first-byte scan and only does a memcmp on real candidates.
512 * ---------------------------------------------------------------------- */
513static const char* cstr_search(const char* hs, size_t hlen, const char* nd, size_t nlen) {
514 // Trivial cases
515 if (unlikely(nlen == 0)) return hs;
516 if (unlikely(hlen < nlen)) return NULL;
517
518 // Single char: Delegate to SIMD
519 if (nlen == 1) return (const char*)memchr(hs, (unsigned char)nd[0], hlen);
520
521 const char* cur = hs;
522 const char* end = hs + hlen - nlen;
523 unsigned char n_first = (unsigned char)nd[0];
524 unsigned char n_last = (unsigned char)nd[nlen - 1];
525
526 // Main Loop
527 while (cur <= end) {
528 // A. SIMD Scan for first char
529 // We calculate the remaining search space to be safe
530 cur = (const char*)memchr(cur, n_first, (size_t)(end - cur + 1));
531 if (unlikely(!cur)) return NULL;
532
533 // B. Guard Byte Check (Check the last char first)
534 if ((unsigned char)cur[nlen - 1] == n_last) {
535 // C. Small String Optimization
536 // For lengths 2-9, a tight unrolled loop is much faster than memcmp call overhead.
537 if (nlen <= 9) {
538 // We already checked [0] and [nlen-1]. Check the middle.
539 // Compiler will unroll this completely for small nlen.
540 const char* p_hay = cur + 1;
541 const char* p_nd = nd + 1;
542 size_t k = nlen - 2;
543
544 // Do a manual check.
545 // Note: We use a do-while or simple for.
546 // Since nlen >= 2, k can be 0.
547 size_t i = 0;
548 for (; i < k; i++) {
549 if (p_hay[i] != p_nd[i]) goto next_iter;
550 }
551 return cur; // Match found
552 } else {
553 // D. Long String: Fallback to memcmp
554 // We offset by 1 and subtract 2 because first/last are already verified.
555 if (memcmp(cur + 1, nd + 1, nlen - 2) == 0) return cur;
556 }
557 }
558
559 next_iter:
560 cur++;
561 }
562
563 return NULL;
564}
565
566/* -------------------------------------------------------------------------
567 * Public find / rfind
568 * ---------------------------------------------------------------------- */
569
570int cstr_find(const cstr* s, const char* substr) {
571 size_t nlen = strlen(substr);
572 const char* found = cstr_search(s->data, s->length, substr, nlen);
573 return found ? (int)(found - s->data) : CSTR_NPOS;
574}
575
576int cstr_find_cstr(const cstr* s, const cstr* sub) {
577 const char* found = cstr_search(s->data, s->length, sub->data, sub->length);
578 return found ? (int)(found - s->data) : CSTR_NPOS;
579}
580
581int cstr_rfind(const cstr* s, const char* substr) {
582 size_t nlen = strlen(substr);
583 if (nlen == 0 || nlen > s->length) return CSTR_NPOS;
584
585 const char* hs = s->data;
586 size_t hlen = s->length;
587 const char* last = NULL;
588 const char* p = hs;
589
590 /* Walk forward collecting last match — memchr makes each step fast. */
591 while ((p = cstr_search(p, hlen - (size_t)(p - hs), substr, nlen)) != NULL) {
592 last = p;
593 p++;
594 if ((size_t)(p - hs) + nlen > hlen) break;
595 }
596 return last ? (int)(last - hs) : CSTR_NPOS;
597}
598
599int cstr_rfind_cstr(const cstr* s, const cstr* sub) {
600 if (sub->length == 0) return (int)s->length;
601 if (sub->length > s->length) return CSTR_NPOS;
602
603 const char* hs = s->data;
604 size_t hlen = s->length;
605 const char* last = NULL;
606 const char* p = hs;
607
608 while ((p = cstr_search(p, hlen - (size_t)(p - hs), sub->data, sub->length)) != NULL) {
609 last = p;
610 p++;
611 if ((size_t)(p - hs) + sub->length > hlen) break;
612 }
613 return last ? (int)(last - hs) : CSTR_NPOS;
614}
615
616/* -------------------------------------------------------------------------
617 * Comparison
618 * ---------------------------------------------------------------------- */
619
620int cstr_cmp(const cstr* s1, const cstr* s2) {
621 if (!s1 && !s2) return 0;
622 if (!s1) return -1;
623 if (!s2) return 1;
624 return strcmp(s1->data, s2->data);
625}
626
627int cstr_ncmp(const cstr* s1, const cstr* s2, size_t n) {
628 if (!s1 && !s2) return 0;
629 if (!s1) return -1;
630 if (!s2) return 1;
631 return strncmp(s1->data, s2->data, n);
632}
633
634/* -------------------------------------------------------------------------
635 * starts_with / ends_with
636 * ---------------------------------------------------------------------- */
637
638bool cstr_starts_with(const cstr* s, const char* prefix) {
639 size_t plen = strlen(prefix);
640 if (plen == 0) return true;
641 if (plen > (size_t)s->length) return false;
642 return memcmp(s->data, prefix, plen) == 0;
643}
644
645bool cstr_starts_with_cstr(const cstr* s, const cstr* prefix) {
646 uint32_t plen = prefix->length;
647 if (plen == 0) return true;
648 if (plen > s->length) return false;
649 return memcmp(s->data, prefix->data, plen) == 0;
650}
651
652bool cstr_ends_with(const cstr* s, const char* suffix) {
653 size_t slen = strlen(suffix);
654 if (slen == 0) return true;
655 if (slen > (size_t)s->length) return false;
656 return memcmp(s->data + s->length - slen, suffix, slen) == 0;
657}
658
659bool cstr_ends_with_cstr(const cstr* s, const cstr* suffix) {
660 uint32_t slen = suffix->length;
661 if (slen == 0) return true;
662 if (slen > s->length) return false;
663 return memcmp(s->data + s->length - slen, suffix->data, slen) == 0;
664}
665
666/* -------------------------------------------------------------------------
667 * Count occurrences
668 * ---------------------------------------------------------------------- */
669
670size_t cstr_count_substr(const cstr* s, const char* substr) {
671 size_t nlen = strlen(substr);
672 if (nlen == 0 || nlen > s->length) return 0;
673
674 size_t count = 0;
675 const char* p = s->data;
676 size_t rem = s->length;
677
678 while ((p = cstr_search(p, rem, substr, nlen)) != NULL) {
679 count++;
680 p += nlen;
681 rem = s->length - (size_t)(p - s->data);
682 if (rem < nlen) break;
683 }
684 return count;
685}
686
687size_t cstr_count_substr_cstr(const cstr* s, const cstr* sub) {
688 if (sub->length == 0 || sub->length > s->length) return 0;
689 size_t nlen = sub->length;
690
691 size_t count = 0;
692 const char* p = s->data;
693 size_t rem = s->length;
694
695 while ((p = cstr_search(p, rem, sub->data, nlen)) != NULL) {
696 count++;
697 p += nlen;
698 rem = s->length - (size_t)(p - s->data);
699 if (rem < nlen) break;
700 }
701 return count;
702}
703
704/* -------------------------------------------------------------------------
705 * Case conversion
706 * ---------------------------------------------------------------------- */
707
708void cstr_lower(cstr* s) {
709 char* d = s->data;
710 for (uint32_t i = 0, n = s->length; i < n; i++) {
711 unsigned char c = (unsigned char)d[i];
712 /* Branch-free ASCII fast path: sets bit 5 for A-Z. */
713 if ((unsigned)(c - 'A') <= 25u) d[i] = (char)(c | 0x20u);
714 }
715}
716
717void cstr_upper(cstr* s) {
718 char* d = s->data;
719 for (uint32_t i = 0, n = s->length; i < n; i++) {
720 unsigned char c = (unsigned char)d[i];
721 /* Branch-free ASCII: clears bit 5 for a-z. */
722 if ((unsigned)(c - 'a') <= 25u) d[i] = (char)(c & ~0x20u);
723 }
724}
725
726bool cstr_snakecase(cstr* s) {
727 uint32_t orig = s->length;
728 if (orig == 0) return true;
729
730 /* Count how many underscores we'll need to insert. */
731 const char* d = s->data;
732 uint32_t extra = 0;
733 for (uint32_t i = 1; i < orig; i++) {
734 if ((unsigned)((unsigned char)d[i] - 'A') <= 25u) extra++;
735 }
736 if (extra == 0) {
737 cstr_lower(s);
738 return true;
739 }
740
741 uint32_t new_len = orig + extra;
742 if (CSTR_UNLIKELY(!cstr_ensure_cap(s, new_len + 1))) return false;
743
744 /* Right-to-left expansion (avoids second pass). */
745 d = s->data; /* pointer may have changed after ensure_cap */
746 char* w = s->data + new_len;
747 *w-- = '\0';
748
749 for (uint32_t i = orig; i > 0;) {
750 i--;
751 unsigned char c = (unsigned char)d[i];
752 if (i > 0 && (unsigned)(c - 'A') <= 25u) {
753 *w-- = (char)(c | 0x20u);
754 *w-- = '_';
755 } else {
756 *w-- = (char)((unsigned)(c - 'A') <= 25u ? (c | 0x20u) : c);
757 }
758 }
759 s->length = new_len;
760 return true;
761}
762
763void cstr_camelcase(cstr* s) {
764 uint32_t len = s->length;
765 if (len == 0) return;
766 char* d = s->data;
767 uint32_t r = 0, w = 0;
768
769 /* Skip leading separators; first real char → lower. */
770 while (r < len && (d[r] == '_' || isspace((unsigned char)d[r]))) r++;
771 if (r < len) {
772 unsigned char c = (unsigned char)d[r++];
773 d[w++] = (char)((unsigned)(c - 'A') <= 25u ? (c | 0x20u) : c);
774 }
775
776 bool cap = false;
777 while (r < len) {
778 unsigned char c = (unsigned char)d[r++];
779 if (c == '_' || isspace(c)) {
780 cap = true;
781 continue;
782 }
783 if (cap) {
784 d[w++] = (char)toupper(c);
785 cap = false;
786 } else {
787 d[w++] = (char)tolower(c);
788 }
789 }
790 d[w] = '\0';
791 s->length = w;
792}
793
794void cstr_pascalcase(cstr* s) {
795 uint32_t len = s->length;
796 if (len == 0) return;
797 char* d = s->data;
798 uint32_t r = 0, w = 0;
799
800 while (r < len && (d[r] == '_' || isspace((unsigned char)d[r]))) r++;
801
802 bool new_word = true;
803 while (r < len) {
804 unsigned char c = (unsigned char)d[r++];
805 if (c == '_' || isspace(c)) {
806 new_word = true;
807 continue;
808 }
809 d[w++] = new_word ? (char)toupper(c) : (char)tolower(c);
810 new_word = false;
811 }
812 d[w] = '\0';
813 s->length = w;
814}
815
816void cstr_titlecase(cstr* s) {
817 uint32_t len = s->length;
818 char* d = s->data;
819 bool cap = true;
820 for (uint32_t i = 0; i < len; i++) {
821 unsigned char c = (unsigned char)d[i];
822 if (isspace(c)) {
823 cap = true;
824 } else if (cap) {
825 d[i] = (char)toupper(c);
826 cap = false;
827 } else {
828 d[i] = (char)tolower(c);
829 }
830 }
831}
832
833/* -------------------------------------------------------------------------
834 * Trim
835 * ---------------------------------------------------------------------- */
836
837void cstr_trim(cstr* s) {
838 uint32_t len = s->length;
839 if (len == 0) return;
840 char* d = s->data;
841
842 uint32_t start = 0, end = len - 1;
843 while (start < len && isspace((unsigned char)d[start])) start++;
844 while (end > start && isspace((unsigned char)d[end])) end--;
845
846 uint32_t new_len = (start > end) ? 0 : (end - start + 1);
847 if (new_len && start) memmove(d, d + start, new_len);
848 d[new_len] = '\0';
849 s->length = new_len;
850}
851
852void cstr_rtrim(cstr* s) {
853 uint32_t len = s->length;
854 if (len == 0) return;
855 char* d = s->data;
856 uint32_t e = len;
857 while (e > 0 && isspace((unsigned char)d[e - 1])) e--;
858 d[e] = '\0';
859 s->length = e;
860}
861
862void cstr_ltrim(cstr* s) {
863 uint32_t len = s->length;
864 if (len == 0) return;
865 char* d = s->data;
866 uint32_t start = 0;
867 while (start < len && isspace((unsigned char)d[start])) start++;
868 if (start == 0) return;
869 uint32_t new_len = len - start;
870 memmove(d, d + start, new_len + 1);
871 s->length = new_len;
872}
873
874void cstr_trim_chars(cstr* s, const char* chars) {
875 uint32_t len = s->length;
876 if (len == 0 || *chars == '\0') return;
877 char* d = s->data;
878
879 uint32_t start = 0;
880 while (start < len && strchr(chars, d[start])) start++;
881 if (start == len) {
882 s->length = 0;
883 d[0] = '\0';
884 return;
885 }
886
887 uint32_t end = len - 1;
888 while (end > start && strchr(chars, d[end])) end--;
889
890 uint32_t new_len = end - start + 1;
891 if (start) memmove(d, d + start, new_len);
892 d[new_len] = '\0';
893 s->length = new_len;
894}
895
896/* -------------------------------------------------------------------------
897 * Substrings
898 * ---------------------------------------------------------------------- */
899
900cstr* cstr_substr(const cstr* s, size_t start, size_t length) {
901 uint32_t slen = s->length;
902 if (CSTR_UNLIKELY(start > slen)) return NULL;
903 uint32_t avail = slen - (uint32_t)start;
904 uint32_t copy = (length > avail) ? avail : (uint32_t)length;
905 return cstr_new_len(s->data + start, copy);
906}
907
908/* -------------------------------------------------------------------------
909 * Replace (first occurrence) — builds result in one allocation
910 * ---------------------------------------------------------------------- */
911
912cstr* cstr_replace(const cstr* s, const char* old_str, const char* new_str) {
913 size_t old_len = strlen(old_str);
914 if (old_len == 0) return cstr_new_len(s->data, s->length);
915
916 const char* found = cstr_search(s->data, s->length, old_str, old_len);
917 if (!found) return cstr_new_len(s->data, s->length);
918
919 size_t new_len = strlen(new_str);
920 size_t prefix_len = (size_t)(found - s->data);
921 size_t suffix_len = s->length - prefix_len - old_len;
922 size_t result_len = prefix_len + new_len + suffix_len;
923
924 cstr* r = cstr_init(result_len);
925 if (CSTR_UNLIKELY(!r)) return NULL;
926
927 char* d = r->data;
928 memcpy(d, s->data, prefix_len);
929 memcpy(d + prefix_len, new_str, new_len);
930 memcpy(d + prefix_len + new_len, found + old_len, suffix_len);
931 d[result_len] = '\0';
932 r->length = (uint32_t)result_len;
933 return r;
934}
935
936/* -------------------------------------------------------------------------
937 * Replace all — stack-allocated offset table (heap fallback for > 64 hits)
938 * ---------------------------------------------------------------------- */
939
940#define RA_STACK_CAP 64
941
942cstr* cstr_replace_all(const cstr* s, const char* old_sub, const char* new_sub) {
943 size_t old_len = strlen(old_sub);
944 if (old_len == 0) return cstr_new_len(s->data, s->length);
945
946 size_t new_len = strlen(new_sub);
947 const char* hs = s->data;
948 size_t hlen = s->length;
949
950 /* Collect match offsets. */
951 size_t stack_offs[RA_STACK_CAP];
952 size_t* offs = stack_offs;
953 size_t offs_cap = RA_STACK_CAP;
954 size_t count = 0;
955
956 const char* p = hs;
957 size_t rem = hlen;
958
959 while ((p = cstr_search(p, rem, old_sub, old_len)) != NULL) {
960 if (CSTR_UNLIKELY(count >= offs_cap)) {
961 /* FIX: Guard against integer overflow before multiplying. */
962 if (CSTR_UNLIKELY(offs_cap > SIZE_MAX / 2 / sizeof(size_t))) goto oom;
963
964 size_t new_cap = offs_cap * 2;
965 size_t* no;
966 if (offs == stack_offs) {
967 no = (size_t*)malloc(new_cap * sizeof(size_t));
968 if (CSTR_UNLIKELY(!no)) goto oom;
969 memcpy(no, stack_offs, count * sizeof(size_t));
970 } else {
971 no = (size_t*)realloc(offs, new_cap * sizeof(size_t));
972 if (CSTR_UNLIKELY(!no)) goto oom;
973 }
974 offs = no;
975 offs_cap = new_cap;
976 }
977 offs[count++] = (size_t)(p - hs);
978 p += old_len;
979 rem = hlen - (size_t)(p - hs);
980 }
981
982 if (count == 0) {
983 if (offs != stack_offs) free(offs);
984 return cstr_new_len(hs, hlen);
985 }
986
987 /* Compute exact output length. */
988 size_t result_len;
989 if (new_len >= old_len)
990 result_len = hlen + count * (new_len - old_len);
991 else
992 result_len = hlen - count * (old_len - new_len);
993
994 {
995 cstr* r = cstr_init(result_len);
996 if (CSTR_UNLIKELY(!r)) goto oom;
997
998 char* dst = r->data;
999 size_t write_pos = 0;
1000 size_t src_pos = 0;
1001
1002 for (size_t i = 0; i < count; i++) {
1003 size_t gap = offs[i] - src_pos;
1004 if (gap) {
1005 memcpy(dst + write_pos, hs + src_pos, gap);
1006 write_pos += gap;
1007 }
1008 if (new_len) {
1009 memcpy(dst + write_pos, new_sub, new_len);
1010 write_pos += new_len;
1011 }
1012 src_pos = offs[i] + old_len;
1013 }
1014 size_t tail = hlen - src_pos;
1015 if (tail) {
1016 memcpy(dst + write_pos, hs + src_pos, tail);
1017 write_pos += tail;
1018 }
1019
1020 dst[write_pos] = '\0';
1021 r->length = (uint32_t)write_pos;
1022
1023 if (offs != stack_offs) free(offs);
1024 return r;
1025 }
1026
1027oom:
1028 if (offs != stack_offs) free(offs);
1029 return NULL;
1030}
1031
1032#undef RA_STACK_CAP
1033
1034/* -------------------------------------------------------------------------
1035 * Split & join
1036 * ---------------------------------------------------------------------- */
1037
1038cstr** cstr_split(const cstr* s, const char* delim, size_t* count_out) {
1039 *count_out = 0;
1040 if (!delim || !*delim) {
1041 cstr** r = (cstr**)malloc(sizeof(cstr*));
1042 if (!r) return NULL;
1043 r[0] = cstr_new_len(s->data, s->length);
1044 if (!r[0]) {
1045 free(r);
1046 return NULL;
1047 }
1048 *count_out = 1;
1049 return r;
1050 }
1051
1052 size_t dlen = strlen(delim);
1053 size_t cap = 8;
1054 cstr** result = (cstr**)malloc(cap * sizeof(cstr*));
1055 if (!result) return NULL;
1056
1057 const char* start = s->data;
1058 const char* end = s->data + s->length;
1059 size_t count = 0;
1060
1061 while (1) {
1062 const char* found = cstr_search(start, (size_t)(end - start), delim, dlen);
1063 const char* tok_end = found ? found : end;
1064
1065 if (CSTR_UNLIKELY(count >= cap)) {
1066 cap *= 2;
1067 cstr** tmp = (cstr**)realloc(result, cap * sizeof(cstr*));
1068 if (!tmp) goto split_err;
1069 result = tmp;
1070 }
1071
1072 result[count] = cstr_new_len(start, (size_t)(tok_end - start));
1073 if (!result[count]) goto split_err;
1074 count++;
1075
1076 if (!found) break;
1077 start = found + dlen;
1078 }
1079
1080 *count_out = count;
1081 return result;
1082
1083split_err:
1084 for (size_t i = 0; i < count; i++) cstr_free(result[i]);
1085 free(result);
1086 return NULL;
1087}
1088
1089cstr* cstr_join(const cstr** strings, size_t count, const char* delim) {
1090 if (!strings || count == 0) return cstr_new_len("", 0);
1091
1092 size_t dlen = delim ? strlen(delim) : 0;
1093 size_t total = 0;
1094 for (size_t i = 0; i < count; i++) {
1095 if (CSTR_UNLIKELY(!strings[i])) return NULL;
1096 total += strings[i]->length;
1097 if (i + 1 < count) total += dlen;
1098 }
1099
1100 cstr* r = cstr_init(total);
1101 if (!r) return NULL;
1102
1103 char* d = r->data;
1104 size_t pos = 0;
1105 for (size_t i = 0; i < count; i++) {
1106 uint32_t len = strings[i]->length;
1107 if (len) {
1108 memcpy(d + pos, strings[i]->data, len);
1109 pos += len;
1110 }
1111 if (dlen && i + 1 < count) {
1112 memcpy(d + pos, delim, dlen);
1113 pos += dlen;
1114 }
1115 }
1116 d[pos] = '\0';
1117 r->length = (uint32_t)pos;
1118 return r;
1119}
1120
1121/* -------------------------------------------------------------------------
1122 * Reverse
1123 * ---------------------------------------------------------------------- */
1124
1125cstr* cstr_reverse(const cstr* s) {
1126 uint32_t len = s->length;
1127 cstr* r = cstr_init(len);
1128 if (!r) return NULL;
1129 char* dst = r->data;
1130 const char* src = s->data;
1131 for (uint32_t i = 0; i < len; i++) dst[i] = src[len - 1 - i];
1132 dst[len] = '\0';
1133 r->length = len;
1134 return r;
1135}
1136
1137void cstr_reverse_inplace(cstr* s) {
1138 uint32_t len = s->length;
1139 if (len < 2) return;
1140 char* d = s->data;
1141 for (uint32_t i = 0, j = len - 1; i < j; i++, j--) {
1142 char t = d[i];
1143 d[i] = d[j];
1144 d[j] = t;
1145 }
1146}
void cstr_remove_char(cstr *s, char c)
Remove a specific character from every position.
Definition cstr.c:473
cstr * cstr_join(const cstr **strings, size_t count, const char *delim)
Join an array of cstr pointers with a delimiter.
Definition cstr.c:1089
void cstr_debug(const cstr *s)
Print debug information about a cstr to stderr.
Definition cstr.c:188
bool cstr_remove(cstr *s, size_t index, size_t count)
Remove count characters starting at index.
Definition cstr.c:344
cstr * cstr_new(const char *input)
Create a new cstr from a C string.
Definition cstr.c:146
cstr * cstr_new_len(const char *data, size_t length)
Create a new cstr from a buffer of known length (no strlen needed).
Definition cstr.c:151
bool cstr_append_char(cstr *s, char c)
Append a single character.
Definition cstr.c:264
bool cstr_reserve(cstr *s, size_t capacity)
Ensure at least capacity usable bytes are available (NUL extra).
Definition cstr.c:205
void cstr_shrink_to_fit(cstr *s)
Shrink heap allocation to fit the current length (frees wasted memory).
Definition cstr.c:207
void cstr_remove_substr(cstr *s, size_t start, size_t slen)
Remove a run of substr_length bytes starting at start.
Definition cstr.c:485
void cstr_free(cstr *s)
Free a heap-allocated cstr and its storage. Safe to call with NULL.
Definition cstr.c:179
int cstr_cmp(const cstr *s1, const cstr *s2)
Lexicographic compare. NULL < non-NULL; two NULLs are equal.
Definition cstr.c:620
void cstr_drop(cstr *s)
Release only the internal heap buffer of an embedded cstr (one created via cstr_init_inplace)....
Definition cstr.c:170
cstr * cstr_init(size_t initial_capacity)
Create a new heap-allocated cstr with a given initial capacity.
Definition cstr.c:127
High-performance C string with Small String Optimization (SSO).
bool bool cstr_append_cstr(cstr *s, const cstr *append) CSTR_NONNULL(1
Append another cstr.
CSTR_INLINE bool cstr_is_heap(const cstr *s) CSTR_PURE
Definition cstr.h:139
CSTR_INLINE uint32_t cstr_heap_cap(const cstr *s) CSTR_PURE
Definition cstr.h:143
#define CSTR_NPOS
Definition cstr.h:86
size_t cstr_count_substr(const cstr *s, const char *substr) CSTR_NONNULL(1
size_t cstr_remove_all(cstr *s, const char *substr) CSTR_NONNULL(1
Remove all occurrences of substr (in-place, single pass).
#define CSTR_MAX_LEN
Definition cstr.h:77
bool cstr_prepend(cstr *s, const char *prepend) CSTR_NONNULL(1
Prepend a NUL-terminated C string.
#define CSTR_SSO_CAP
Definition cstr.h:80
bool bool bool bool cstr_insert(cstr *s, size_t index, const char *insert) CSTR_NONNULL(1
Insert a C string at byte offset index.
bool cstr_append(cstr *s, const char *CSTR_RESTRICT append) CSTR_NONNULL(1
Append a NUL-terminated C string.
bool bool bool bool bool cstr_insert_cstr(cstr *s, size_t index, const cstr *insert) CSTR_NONNULL(1
Insert a cstr at byte offset index.
int cstr_rfind(const cstr *s, const char *substr) CSTR_NONNULL(1
CSTR_INLINE void cstr_init_inplace(cstr *s)
Initialize an already-allocated cstr in SSO mode (no heap).
Definition cstr.h:225
bool bool bool cstr_prepend_fast(cstr *s, const char *prepend) CSTR_NONNULL(1
Prepend without capacity check — caller guarantees space.
bool bool cstr_prepend_cstr(cstr *s, const cstr *prepend) CSTR_NONNULL(1
Prepend another cstr.
cstr ** cstr_split(const cstr *s, const char *delim, size_t *count_out) CSTR_NONNULL(1
Split on delimiter. Returns array of cstr* (each must be freed), terminated by setting *count_out....
int cstr_find(const cstr *s, const char *substr) CSTR_NONNULL(1
Find first occurrence of substr. Uses optimised search (no memmem).
bool bool bool cstr_ncat(cstr *dest, const cstr *src, size_t n) CSTR_NONNULL(1
Append at most n chars from src.
#define CSTR_HEAP_FLAG
Definition cstr.h:83
bool cstr_copy(cstr *dest, const cstr *src) CSTR_NONNULL(1
Deep copy src into dest (dest is overwritten).
A dynamically resizable C string with SSO.
Definition cstr.h:104
uint32_t length
Definition cstr.h:106
char * data
Definition cstr.h:105
uint32_t capacity
Definition cstr.h:107
char buf[CSTR_SSO_CAP]
Definition cstr.h:108