solidc
Robust collection of general-purpose cross-platform C libraries and data structures designed for rapid and safe development in C
Loading...
Searching...
No Matches
str_slice.h
1#pragma once
2
3#if defined(__cplusplus)
4extern "C" {
5#endif
6
7#include <limits.h>
8#include <stdbool.h>
9#include <stddef.h>
10#include <stdint.h>
11#include <stdio.h>
12#include <string.h>
13
14// ─── Types ────────────────────────────────────────────────────────────────────
15
16// A non-owning view into a byte sequence.
17// The slice does NOT null-terminate and does NOT free its data.
18// The caller must ensure the underlying buffer outlives all slices into it.
19typedef struct {
20 const char* data; // pointer into some external buffer
21 size_t len; // number of bytes in the view
22} StrSlice;
23
24// Typed result — never use errno for slice ops.
25typedef enum {
26 SS_OK = 0,
27 SS_NULL = 1, // null data pointer
28 SS_BOUNDS = 2, // out-of-range indices
29 SS_NOT_FOUND = 3, // substring not found
30 SS_OVERFLOW = 4, // value exceeds the target type's range
31 SS_INVALID = 5, // malformed input (e.g. "1.2.3", bare "e", "maybe")
32} StrSliceErr;
33
34// ─── Construction ─────────────────────────────────────────────────────────────
35
36// Wrap a pointer + explicit length. Does NOT check for null terminator.
37static inline StrSlice ss_from(const char* data, size_t len) { return (StrSlice){.data = data, .len = len}; }
38
39// Wrap a null-terminated C string (measures with strlen at call time).
40static inline StrSlice ss_from_cstr(const char* cstr) {
41 if (!cstr) return (StrSlice){0};
42 return (StrSlice){.data = cstr, .len = strlen(cstr)};
43}
44
45// Convenience macro for string literals — no strlen call at all.
46// Usage: StrSlice s = SS_LIT("hello");
47#define SS_LIT(literal) ((StrSlice){.data = (literal), .len = sizeof(literal) - 1})
48
49// Empty slice (len == 0, data may be NULL).
50static inline StrSlice ss_empty(void) { return (StrSlice){0}; }
51
52// Print a slice to stdout (for debugging).
53static inline void ss_print(StrSlice s) {
54 if (s.data) printf("%.*s", (int)s.len, s.data);
55}
56
57static inline void ss_println(StrSlice s) {
58 ss_print(s);
59 printf("\n");
60}
61
62// ─── Validity ─────────────────────────────────────────────────────────────────
63
64// A slice is valid if it has a non-null data pointer or zero length (empty view).
65static inline bool ss_is_valid(StrSlice s) {
66 // A zero-length slice with a non-null pointer is valid (empty view).
67 // A non-zero length with a null pointer is always invalid.
68 return s.len == 0 || s.data != NULL;
69}
70
71// A slice is empty if its length is zero, regardless of the data pointer.
72static inline bool ss_is_empty(StrSlice s) { return s.len == 0; }
73
74// Convert to a NUL-terminated owned string. Caller must free() the result.
75// Returns NULL if the slice is invalid (e.g. non-null pointer with positive length).
76static inline char* ss_to_owned_cstr(StrSlice s) {
77 if (!ss_is_valid(s)) return NULL;
78 return strndup(s.data, s.len);
79}
80
81// ─── Sub-slicing ──────────────────────────────────────────────────────────────
82
83// Returns a sub-slice [start, start+len).
84// Sets *err on bounds violation; returns ss_empty() on error.
85static inline StrSlice ss_slice(StrSlice s, size_t start, size_t len, StrSliceErr* err) {
86 if (!ss_is_valid(s)) {
87 if (err) *err = SS_NULL;
88 return ss_empty();
89 }
90 if (start + len > s.len) {
91 if (err) *err = SS_BOUNDS;
92 return ss_empty();
93 }
94 if (err) *err = SS_OK;
95 return (StrSlice){.data = s.data + start, .len = len};
96}
97
98// Chop off the first `n` bytes.
99static inline StrSlice ss_skip(StrSlice s, size_t n) {
100 if (n >= s.len) return ss_empty();
101 return (StrSlice){.data = s.data + n, .len = s.len - n};
102}
103
104// Keep only the first `n` bytes.
105static inline StrSlice ss_take(StrSlice s, size_t n) {
106 if (n > s.len) n = s.len;
107 return (StrSlice){.data = s.data, .len = n};
108}
109
110// ─── Comparison ───────────────────────────────────────────────────────────────
111
112static inline bool ss_equal(StrSlice a, StrSlice b) {
113 return a.len == b.len && (a.data == b.data || memcmp(a.data, b.data, a.len) == 0);
114}
115
116// Case-insensitive ASCII equality.
117static inline bool ss_equal_nocase(StrSlice a, StrSlice b) {
118 if (a.len != b.len) return false;
119 for (size_t i = 0; i < a.len; ++i) {
120 unsigned char ca = (unsigned char)a.data[i];
121 unsigned char cb = (unsigned char)b.data[i];
122 if ((ca | 32u) != (cb | 32u)) return false;
123 }
124 return true;
125}
126
127static inline bool ss_starts_with(StrSlice s, StrSlice prefix) {
128 return s.len >= prefix.len && memcmp(s.data, prefix.data, prefix.len) == 0;
129}
130
131static inline bool ss_ends_with(StrSlice s, StrSlice suffix) {
132 return s.len >= suffix.len && memcmp(s.data + s.len - suffix.len, suffix.data, suffix.len) == 0;
133}
134
135// ─── Search ───────────────────────────────────────────────────────────────────
136
137// Returns the byte offset of the first occurrence of `needle`, or (size_t)-1.
138static inline size_t ss_find(StrSlice haystack, StrSlice needle) {
139 if (needle.len == 0) return 0;
140 if (needle.len > haystack.len) return (size_t)-1;
141 size_t limit = haystack.len - needle.len;
142 for (size_t i = 0; i <= limit; ++i) {
143 if (memcmp(haystack.data + i, needle.data, needle.len) == 0) return i;
144 }
145 return (size_t)-1;
146}
147
148static inline bool ss_contains(StrSlice s, StrSlice needle) { return ss_find(s, needle) != (size_t)-1; }
149
150// Split at the first occurrence of `sep`.
151// On success: *head = everything before sep, *tail = everything after sep.
152// Returns SS_NOT_FOUND (and leaves *head/*tail unchanged) if sep absent.
153static inline StrSliceErr ss_split_on(StrSlice s, StrSlice sep, StrSlice* head, StrSlice* tail) {
154 size_t pos = ss_find(s, sep);
155 if (pos == (size_t)-1) return SS_NOT_FOUND;
156 *head = ss_take(s, pos);
157 *tail = ss_skip(s, pos + sep.len);
158 return SS_OK;
159}
160
161// ─── Trimming ─────────────────────────────────────────────────────────────────
162
163static inline bool _ss_is_space(char c) { return c == ' ' || c == '\t' || c == '\n' || c == '\r'; }
164
165static inline StrSlice ss_trim(StrSlice s) {
166 size_t lo = 0, hi = s.len;
167 while (lo < hi && _ss_is_space(s.data[lo])) ++lo;
168 while (hi > lo && _ss_is_space(s.data[hi - 1])) --hi;
169 return (StrSlice){.data = s.data + lo, .len = hi - lo};
170}
171
172// ─── Access ───────────────────────────────────────────────────────────────────
173
174// Safe single-byte fetch. Returns false on out-of-bounds.
175static inline bool ss_get(StrSlice s, size_t i, char* out) {
176 if (i >= s.len) return false;
177 *out = s.data[i];
178 return true;
179}
180
181/*
182 * Parses an optional sign followed by decimal digits.
183 * Stops at the first non-digit after the sign.
184 *
185 * Returns:
186 * SS_OK — *out is set to the parsed value
187 * SS_NOT_FOUND — no digits found (empty slice, sign with no digits)
188 * SS_OVERFLOW — value exceeds [INT_MIN, INT_MAX]
189 * SS_NULL — out is NULL
190 *
191 * Call ss_trim() beforehand if leading whitespace is possible.
192 */
193static inline StrSliceErr ss_to_int(StrSlice s, int* out) {
194 if (!out) return SS_NULL;
195
196 size_t i = 0;
197 bool neg = false;
198
199 if (i < s.len && s.data[i] == '-') {
200 neg = true;
201 ++i;
202 } else if (i < s.len && s.data[i] == '+') {
203 ++i;
204 }
205
206 if (i >= s.len || s.data[i] < '0' || s.data[i] > '9') return SS_NOT_FOUND;
207
208 // Accumulate into unsigned to avoid signed-overflow UB (C11 §6.5),
209 // then range-check before the final cast.
210 unsigned int acc = 0;
211 for (; i < s.len; ++i) {
212 char c = s.data[i];
213 if (c < '0' || c > '9') break;
214 unsigned int d = (unsigned int)(c - '0');
215 // Would acc*10+d wrap past UINT_MAX?
216 if (acc > (UINT_MAX - d) / 10u) return SS_OVERFLOW;
217 acc = acc * 10u + d;
218 }
219
220 if (neg) {
221 // INT_MIN = -(INT_MAX + 1); the +1u is safe in unsigned arithmetic.
222 if (acc > (unsigned int)INT_MAX + 1u) return SS_OVERFLOW;
223 *out = (acc == (unsigned int)INT_MAX + 1u) ? INT_MIN : -(int)acc;
224 } else {
225 if (acc > (unsigned int)INT_MAX) return SS_OVERFLOW;
226 *out = (int)acc;
227 }
228 return SS_OK;
229}
230
231/*
232 * Parses: [sign] digit* ['.' digit*] [('e'|'E') [sign] digit+]
233 *
234 * Accumulates the mantissa as a 64-bit integer (exact for up to 19 significant
235 * digits) then applies the combined decimal exponent in a single step, which
236 * avoids the rounding drift that builds up when multiplying by 0.1 per digit.
237 *
238 * Returns:
239 * SS_OK — *out is set
240 * SS_NOT_FOUND — no digits found
241 * SS_INVALID — exponent marker with no digits following ("1e" "1e+")
242 * SS_NULL — out is NULL
243 *
244 * Overflow/underflow of the final double maps to ±HUGE_VAL / 0.0 respectively
245 * (IEEE 754 behaviour); no SS_OVERFLOW is raised since those are valid doubles.
246 */
247static inline StrSliceErr ss_to_double(StrSlice s, double* out) {
248 if (!out) return SS_NULL;
249
250 size_t i = 0;
251 bool neg = false;
252
253 if (i < s.len && s.data[i] == '-') {
254 neg = true;
255 ++i;
256 } else if (i < s.len && s.data[i] == '+') {
257 ++i;
258 }
259
260 uint64_t mantissa = 0;
261 int dec_shift = 0; // net decimal places (positive = divide)
262 bool seen_dot = false;
263 bool has_digits = false;
264 bool saturated = false; // mantissa too wide; extra digits are dropped
265
266 for (; i < s.len; ++i) {
267 char c = s.data[i];
268 if (c >= '0' && c <= '9') {
269 has_digits = true;
270 if (!saturated) {
271 uint64_t d = (uint64_t)(c - '0');
272 if (mantissa > (UINT64_MAX - d) / 10ull) {
273 // Mantissa full. Integer digits still shift the scale;
274 // fractional digits beyond this point are simply dropped.
275 saturated = true;
276 if (!seen_dot) ++dec_shift;
277 } else {
278 mantissa = mantissa * 10ull + d;
279 if (seen_dot) --dec_shift;
280 }
281 } else if (!seen_dot) {
282 ++dec_shift; // track magnitude of overflowing integer part
283 }
284 } else if (c == '.' && !seen_dot) {
285 seen_dot = true;
286 } else {
287 break;
288 }
289 }
290
291 if (!has_digits) return SS_NOT_FOUND;
292
293 // Optional exponent.
294 int exp_shift = 0;
295 if (i < s.len && (s.data[i] == 'e' || s.data[i] == 'E')) {
296 ++i;
297 bool exp_neg = false;
298 if (i < s.len && s.data[i] == '-') {
299 exp_neg = true;
300 ++i;
301 } else if (i < s.len && s.data[i] == '+') {
302 ++i;
303 }
304
305 // Exponent marker with no digits is malformed.
306 if (i >= s.len || s.data[i] < '0' || s.data[i] > '9') return SS_INVALID;
307
308 for (; i < s.len && s.data[i] >= '0' && s.data[i] <= '9'; ++i) {
309 if (exp_shift < 100000) // cap before int overflow; range check below
310 exp_shift = exp_shift * 10 + (s.data[i] - '0');
311 }
312 if (exp_neg) exp_shift = -exp_shift;
313 }
314
315 int total_exp = dec_shift + exp_shift;
316
317 // Build result = mantissa × 10^total_exp.
318 // Powers up to ±22 are exact in IEEE 754 double; beyond that we iterate.
319 // The range of finite doubles is roughly 10^±308, so cap the loop.
320 static const double _p10[23] = {
321 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11,
322 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22,
323 };
324 double result = (double)mantissa;
325 if (total_exp != 0) {
326 int abs_exp = total_exp < 0 ? -total_exp : total_exp;
327 if (abs_exp > 308) abs_exp = 308; // clamp; IEEE will give ±inf / 0
328 double scale = (abs_exp <= 22) ? _p10[abs_exp] : ({
329 double str = 1.0;
330 for (int j = 0; j < abs_exp; ++j) str *= 10.0;
331 str;
332 });
333 result = (total_exp < 0) ? result / scale : result * scale;
334 }
335
336 *out = neg ? -result : result;
337 return SS_OK;
338}
339
340/*
341 * Recognises the common human-readable boolean vocabulary:
342 *
343 * true : "true", "yes", "on", "1"
344 * false : "false", "no", "off", "0"
345 *
346 * All string forms are matched case-insensitively.
347 * Anything else returns SS_INVALID — the caller knows the input was garbage.
348 */
349static inline StrSliceErr ss_to_bool(StrSlice s, bool* out) {
350 if (!out) return SS_NULL;
351
352 if (ss_equal_nocase(s, SS_LIT("true")) || ss_equal_nocase(s, SS_LIT("yes")) || ss_equal_nocase(s, SS_LIT("on")) ||
353 ss_equal(s, SS_LIT("1"))) {
354 *out = true;
355 return SS_OK;
356 }
357
358 if (ss_equal_nocase(s, SS_LIT("false")) || ss_equal_nocase(s, SS_LIT("no")) || ss_equal_nocase(s, SS_LIT("off")) ||
359 ss_equal(s, SS_LIT("0"))) {
360 *out = false;
361 return SS_OK;
362 }
363 return SS_INVALID;
364}
365
366#if defined(__cplusplus)
367}
368#endif
A dynamically resizable C string with SSO.
Definition cstr.h:104