|
solidc
Robust collection of general-purpose cross-platform C libraries and data structures designed for rapid and safe development in C
|
A robust, idiomatic C API wrapping the PCRE2 library. More...
#include <stdbool.h>#include <stddef.h>#include <stdint.h>#include <stdio.h>#include <pcre2.h>Go to the source code of this file.
Classes | |
| struct | regex_span_t |
| struct | regex_match_t |
Macros | |
| #define | REGEX_MAX_GROUPS 64 |
| #define | REGEX_FLAG_CASELESS (PCRE2_CASELESS) |
| #define | REGEX_FLAG_MULTILINE (PCRE2_MULTILINE) |
| #define | REGEX_FLAG_DOTALL (PCRE2_DOTALL) |
| #define | REGEX_FLAG_EXTENDED (PCRE2_EXTENDED) |
| #define | REGEX_FLAG_UTF (PCRE2_UTF) |
| #define | REGEX_FLAG_UCP (PCRE2_UCP) |
| #define | REGEX_FLAG_UNGREEDY (PCRE2_UNGREEDY) |
| #define | REGEX_FLAG_ANCHORED (PCRE2_ANCHORED) |
Typedefs | |
| typedef uint32_t | regex_flags_t |
| typedef struct regex_s | regex_t |
| typedef struct regex_ctx_s | regex_ctx_t |
| typedef struct regex_iter_s | regex_iter_t |
Enumerations | |
| enum | regex_status_t { REGEX_OK = 0 , REGEX_NO_MATCH = 1 , REGEX_ERROR = -1 , REGEX_ERROR_NOMEM = -2 , REGEX_ERROR_ARGS = -3 , REGEX_ERROR_LIMIT = -4 } |
Functions | |
| regex_status_t | regex_compile (const char *pattern, regex_flags_t flags, regex_t **out, char *errbuf, size_t errbuf_len) |
| regex_t * | regex_retain (regex_t *re) |
| void | regex_free (regex_t *re) |
| regex_status_t | regex_ctx_create (regex_ctx_t **out) |
| void | regex_ctx_free (regex_ctx_t *ctx) |
| regex_status_t | regex_exec (const regex_t *re, regex_ctx_t *ctx, const char *subject, size_t len, size_t offset, regex_match_t *match) |
| regex_status_t | regex_match (const regex_t *re, regex_ctx_t *ctx, const char *subject, regex_match_t *match) |
| bool | regex_is_match (const regex_t *re, regex_ctx_t *ctx, const char *subject, size_t len) |
| regex_status_t | regex_iter_init (regex_t *re, regex_ctx_t *ctx, const char *subject, size_t len, regex_iter_t **out) |
| regex_status_t | regex_iter_next (regex_iter_t *iter, regex_match_t *match) |
| void | regex_iter_free (regex_iter_t *iter) |
| regex_status_t | regex_sub (const regex_t *re, regex_ctx_t *ctx, const char *subject, size_t subject_len, const char *replacement, char *out_buf, size_t *out_len) |
| regex_status_t | regex_gsub (const regex_t *re, regex_ctx_t *ctx, const char *subject, size_t subject_len, const char *replacement, char *out_buf, size_t *out_len) |
| uint32_t | regex_group_count (const regex_t *re) |
| const char * | regex_pattern (const regex_t *re) |
| void | regex_strerror (regex_status_t status, char *buf, size_t buf_len) |
A robust, idiomatic C API wrapping the PCRE2 library.
Design principles:
Thread safety:
Definition in file regex.h.
| #define REGEX_FLAG_ANCHORED (PCRE2_ANCHORED) |
| #define REGEX_FLAG_CASELESS (PCRE2_CASELESS) |
| #define REGEX_FLAG_DOTALL (PCRE2_DOTALL) |
| #define REGEX_FLAG_EXTENDED (PCRE2_EXTENDED) |
| #define REGEX_FLAG_MULTILINE (PCRE2_MULTILINE) |
| #define REGEX_FLAG_UCP (PCRE2_UCP) |
| #define REGEX_FLAG_UNGREEDY (PCRE2_UNGREEDY) |
| #define REGEX_FLAG_UTF (PCRE2_UTF) |
| #define REGEX_MAX_GROUPS 64 |
Maximum number of capture groups (including group 0) supported by this API. PCRE2 itself supports up to 65535; we cap at a practical value to allow stack allocation of regex_match_t.
| typedef struct regex_ctx_s regex_ctx_t |
A per-thread execution context holding PCRE2 match data.
Avoids repeated allocation of the PCRE2 match-data block on the hot path. Create one per thread with regex_ctx_create(); destroy with regex_ctx_free(). Must NOT be shared across threads.
| typedef uint32_t regex_flags_t |
| typedef struct regex_iter_s regex_iter_t |
Iterator for finding all non-overlapping matches of a pattern.
Obtain via regex_iter_init(); advance with regex_iter_next(); release with regex_iter_free(). Must not be shared across threads.
| typedef struct regex_s regex_t |
An opaque, ref-counted compiled regular expression.
Obtain via regex_compile(); release via regex_free(). Safe for concurrent use by multiple threads once compiled.
| enum regex_status_t |
Result codes returned by all regex_* functions.
| regex_status_t regex_compile | ( | const char * | pattern, |
| regex_flags_t | flags, | ||
| regex_t ** | out, | ||
| char * | errbuf, | ||
| size_t | errbuf_len | ||
| ) |
Compiles a NUL-terminated pattern string into a reusable regex_t.
| pattern | NUL-terminated UTF-8 pattern string. Must not be NULL. |
| flags | Combination of REGEX_FLAG_* constants, or REGEX_FLAG_NONE. |
| out | On success, written with a pointer to the new regex_t. On failure, written with NULL. Must not be NULL. |
| errbuf | Optional buffer to receive a human-readable error message. May be NULL if not needed. |
| errbuf_len | Capacity of errbuf in bytes. Ignored if errbuf is NULL. |
Definition at line 93 of file regex.c.
References regex_compile(), REGEX_ERROR, REGEX_ERROR_ARGS, REGEX_ERROR_LIMIT, REGEX_ERROR_NOMEM, REGEX_MAX_GROUPS, and REGEX_OK.
Referenced by regex_compile().
| regex_status_t regex_ctx_create | ( | regex_ctx_t ** | out | ) |
Creates a per-thread execution context for use with regex_exec.
Allocating a context pre-allocates the PCRE2 match data block, avoiding per-call heap traffic on the hot path. A single context may be reused across calls to regex_exec with different patterns, provided the subject string group count does not exceed REGEX_MAX_GROUPS.
| out | Written with a pointer to the new context on success. Must not be NULL. |
Definition at line 183 of file regex.c.
References regex_ctx_create(), REGEX_ERROR_ARGS, REGEX_ERROR_NOMEM, REGEX_MAX_GROUPS, and REGEX_OK.
Referenced by regex_ctx_create().
| void regex_ctx_free | ( | regex_ctx_t * | ctx | ) |
Destroys an execution context created by regex_ctx_create.
| ctx | Context to destroy, or NULL (no-op). |
Definition at line 208 of file regex.c.
References regex_ctx_free().
Referenced by regex_ctx_free().
| regex_status_t regex_exec | ( | const regex_t * | re, |
| regex_ctx_t * | ctx, | ||
| const char * | subject, | ||
| size_t | len, | ||
| size_t | offset, | ||
| regex_match_t * | match | ||
| ) |
Executes the compiled pattern against a byte subject at a given offset.
| re | Compiled pattern. Must not be NULL. |
| ctx | Per-thread context. Must not be NULL. |
| subject | Subject byte string. Need not be NUL-terminated. |
| len | Byte length of the subject. |
| offset | Byte offset within subject at which to start matching. |
| match | Receives match spans on success. Must not be NULL. |
Definition at line 220 of file regex.c.
References REGEX_ERROR, REGEX_ERROR_ARGS, regex_exec(), REGEX_NO_MATCH, and REGEX_OK.
Referenced by regex_exec(), and regex_match().
| void regex_free | ( | regex_t * | re | ) |
Decrements the reference count and frees the regex if it reaches zero.
| re | Pointer to a compiled regex_t, or NULL (no-op). |
Definition at line 166 of file regex.c.
References regex_free().
Referenced by regex_free(), and regex_iter_free().
| uint32_t regex_group_count | ( | const regex_t * | re | ) |
Returns the number of capturing groups in the compiled pattern (excl. g0).
| re | Compiled pattern. Must not be NULL. |
Definition at line 380 of file regex.c.
References regex_group_count().
Referenced by regex_group_count().
| regex_status_t regex_gsub | ( | const regex_t * | re, |
| regex_ctx_t * | ctx, | ||
| const char * | subject, | ||
| size_t | subject_len, | ||
| const char * | replacement, | ||
| char * | out_buf, | ||
| size_t * | out_len | ||
| ) |
Replaces all non-overlapping matches of re in subject with replacement.
Semantics identical to regex_sub except that every match is substituted.
| re | Compiled pattern. Must not be NULL. |
| ctx | Per-thread context. Must not be NULL. |
| subject | Subject string (need not be NUL-terminated). |
| subject_len | Byte length of the subject. |
| replacement | NUL-terminated replacement string. Must not be NULL. |
| out_buf | Buffer to receive the result. Must not be NULL. |
| out_len | In: capacity of out_buf. Out: bytes written (excl. NUL). |
Definition at line 371 of file regex.c.
References regex_gsub().
Referenced by regex_gsub().
| bool regex_is_match | ( | const regex_t * | re, |
| regex_ctx_t * | ctx, | ||
| const char * | subject, | ||
| size_t | len | ||
| ) |
Returns true if the pattern matches anywhere within the subject string.
Convenience predicate; does not surface match spans.
| re | Compiled pattern. Must not be NULL. |
| ctx | Per-thread context. Must not be NULL. |
| subject | NUL-terminated subject string. Must not be NULL. |
| len | Byte length of the subject. |
Definition at line 250 of file regex.c.
References regex_is_match().
Referenced by regex_is_match().
| void regex_iter_free | ( | regex_iter_t * | iter | ) |
Frees the iterator and releases its reference to the compiled pattern.
| iter | Iterator to free, or NULL (no-op). |
Definition at line 321 of file regex.c.
References regex_free(), and regex_iter_free().
Referenced by regex_iter_free().
| regex_status_t regex_iter_init | ( | regex_t * | re, |
| regex_ctx_t * | ctx, | ||
| const char * | subject, | ||
| size_t | len, | ||
| regex_iter_t ** | out | ||
| ) |
Initialises an iterator that yields successive non-overlapping matches.
The iterator holds a reference to re (via regex_retain) and a pointer to subject; the caller must ensure subject remains valid for the iterator's lifetime.
| re | Compiled pattern. Must not be NULL. |
| ctx | Per-thread context. Must not be NULL. |
| subject | Subject byte string (need not be NUL-terminated). |
| len | Byte length of the subject. |
| out | Written with the new iterator on success. Must not be NULL. |
Definition at line 264 of file regex.c.
References REGEX_ERROR_ARGS, REGEX_ERROR_NOMEM, regex_iter_init(), REGEX_OK, and regex_retain().
Referenced by regex_iter_init().
| regex_status_t regex_iter_next | ( | regex_iter_t * | iter, |
| regex_match_t * | match | ||
| ) |
Advances the iterator and writes the next match into match.
| iter | Iterator obtained from regex_iter_init. Must not be NULL. |
| match | Receives the next match. Must not be NULL. |
Definition at line 287 of file regex.c.
References regex_span_t::end, regex_match_t::group, REGEX_ERROR, REGEX_ERROR_ARGS, regex_iter_next(), REGEX_NO_MATCH, and REGEX_OK.
Referenced by regex_iter_next().
| regex_status_t regex_match | ( | const regex_t * | re, |
| regex_ctx_t * | ctx, | ||
| const char * | subject, | ||
| regex_match_t * | match | ||
| ) |
Convenience wrapper: match a NUL-terminated string from its beginning.
Equivalent to regex_exec(re, ctx, subject, strlen(subject), 0, match).
| re | Compiled pattern. Must not be NULL. |
| ctx | Per-thread context. Must not be NULL. |
| subject | NUL-terminated subject string. Must not be NULL. |
| match | Receives match spans on success. Must not be NULL. |
Definition at line 243 of file regex.c.
References REGEX_ERROR_ARGS, regex_exec(), and regex_match().
Referenced by regex_match().
| const char * regex_pattern | ( | const regex_t * | re | ) |
Returns the original pattern string used to compile the regex.
| re | Compiled pattern. Must not be NULL. |
Definition at line 387 of file regex.c.
References regex_pattern().
Referenced by regex_pattern().
Increments the reference count of a compiled regex.
| re | A non-NULL regex_t pointer previously obtained from regex_compile. |
Definition at line 159 of file regex.c.
References regex_retain().
Referenced by regex_iter_init(), and regex_retain().
| void regex_strerror | ( | regex_status_t | status, |
| char * | buf, | ||
| size_t | buf_len | ||
| ) |
Writes a human-readable description of a status code into buf.
| status | A regex_status_t value. |
| buf | Destination buffer. Must not be NULL. |
| buf_len | Capacity of buf in bytes. |
Definition at line 394 of file regex.c.
References REGEX_ERROR, REGEX_ERROR_ARGS, REGEX_ERROR_LIMIT, REGEX_ERROR_NOMEM, REGEX_NO_MATCH, REGEX_OK, and regex_strerror().
Referenced by regex_strerror().
| regex_status_t regex_sub | ( | const regex_t * | re, |
| regex_ctx_t * | ctx, | ||
| const char * | subject, | ||
| size_t | subject_len, | ||
| const char * | replacement, | ||
| char * | out_buf, | ||
| size_t * | out_len | ||
| ) |
Replaces the first match of re in subject with replacement.
Replacement may contain $0..$9 or ${name} back-references. The result is written into out_buf; out_len is updated with the used length (excluding the NUL terminator). If the buffer is too small the function returns REGEX_ERROR and writes the required size (including NUL) into out_len.
| re | Compiled pattern. Must not be NULL. |
| ctx | Per-thread context. Must not be NULL. |
| subject | Subject string (need not be NUL-terminated). |
| subject_len | Byte length of the subject. |
| replacement | NUL-terminated replacement string. Must not be NULL. |
| out_buf | Buffer to receive the result. Must not be NULL. |
| out_len | In: capacity of out_buf. Out: bytes written (excl. NUL). |
Definition at line 366 of file regex.c.
References regex_sub().
Referenced by regex_sub().