Unicode handling library for C with UTF-8 encoding support. More...

#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>

Include dependency graph for unicode.h:

This graph shows which files directly or indirectly include this file:

Classes
struct	utf8_string
	Represents a mutable UTF-8 encoded string with metadata. More...

Macros
#define	UNICODE_VERSION 0x0100

#define	UNICODE_MAX_CODEPOINT 0x10FFFF

#define	UNICODE_MAX_UTF8_BYTES 4

Typedefs
typedef struct utf8_string	utf8_string
	Represents a mutable UTF-8 encoded string with metadata.

Functions
void	ucp_to_utf8 (uint32_t codepoint, char utf8[UTF8_MAX_LEN])
	Converts a Unicode codepoint to its UTF-8 byte sequence.

uint32_t	utf8_to_codepoint (const char *utf8)
	Decodes a UTF-8 byte sequence to its Unicode codepoint.

size_t	utf8_count_codepoints (const char *utf8)
	Counts the number of Unicode codepoints in a UTF-8 string.

size_t	utf8_valid_byte_count (const char *s)
	Counts the number of valid UTF-8 bytes in a string.

size_t	utf8_char_length (const char *str)
	Determines the byte length of a UTF-8 character from its first byte.

bool	is_valid_codepoint (uint32_t codepoint)
	Validates whether a codepoint is within the valid Unicode range.

bool	is_valid_utf8 (const char *utf8)
	Comprehensively validates a UTF-8 encoded string.

bool	is_codepoint_whitespace (uint32_t codepoint)
	Checks if a codepoint represents whitespace.

bool	is_utf8_whitespace (const char *utf8)
	Checks if a UTF-8 character represents whitespace.

bool	is_codepoint_digit (uint32_t codepoint)
	Checks if a codepoint represents a digit.

bool	is_utf8_digit (const char *utf8)
	Checks if a UTF-8 character represents a digit.

bool	is_codepoint_alpha (uint32_t codepoint)
	Checks if a codepoint represents an alphabetic character.

bool	is_utf8_alpha (const char *utf8)
	Checks if a UTF-8 character represents an alphabetic character.

bool	is_codepoint_alnum (uint32_t codepoint)
	Checks if a codepoint represents an alphanumeric character.

bool	is_utf8_alnum (const char *utf8)
	Checks if a UTF-8 character represents an alphanumeric character.

bool	is_codepoint_punct (uint32_t codepoint)
	Checks if a codepoint represents a punctuation character.

bool	is_utf8_punct (const char *utf8)
	Checks if a UTF-8 character represents a punctuation character.

utf8_string *	utf8_new (const char *data)
	Creates a new utf8_string object from a C string.

utf8_string *	utf8_new_with_capacity (size_t capacity)
	Creates an empty utf8_string with preallocated capacity.

void	utf8_free (utf8_string *s)
	Frees all resources associated with a utf8_string.

char *	utf8_copy (const char *data)
	Creates a copy of a UTF-8 string containing only valid UTF-8 sequences.

utf8_string *	utf8_clone (const utf8_string *s)
	Duplicates a utf8_string object.

const char *	utf8_data (const utf8_string *s)
	Returns a pointer to the internal UTF-8 data buffer.

void	utf8_print (const utf8_string *s)
	Prints the UTF-8 string content to stdout followed by a newline.

void	utf8_print_info (const utf8_string *s)
	Prints metadata about the UTF-8 string to stdout.

void	utf8_print_codepoints (const utf8_string *s)
	Prints the Unicode codepoints in U+XXXX format to stdout.

int	utf8_index_of (const utf8_string s, const char utf8)
	Finds the byte index of the first occurrence of a substring.

int	utf8_last_index_of (const utf8_string s, const char utf8)
	Finds the byte index of the last occurrence of a substring.

bool	utf8_starts_with (const char str, const char prefix)
	Checks if a string starts with a given prefix.

bool	utf8_ends_with (const char str, const char suffix)
	Checks if a string ends with a given suffix.

bool	utf8_contains (const char str, const char substr)
	Checks if a string contains a substring.

int	utf8_compare (const char s1, const char s2)
	Compares two UTF-8 strings lexicographically.

bool	utf8_equals (const char s1, const char s2)
	Compares two UTF-8 strings for equality.

bool	utf8_append (utf8_string s, const char data)
	Appends UTF-8 data to the end of a utf8_string.

char *	utf8_substr (const utf8_string *s, size_t index, size_t utf8_byte_len)
	Extracts a substring by byte range.

bool	utf8_insert (utf8_string s, size_t index, const char data)
	Inserts UTF-8 data at a specific byte index.

bool	utf8_remove (utf8_string *s, size_t index, size_t count)
	Removes a specified number of codepoints starting at a byte index.

bool	utf8_replace (utf8_string s, const char old_str, const char *new_str)
	Replaces the first occurrence of a substring with another string.

size_t	utf8_replace_all (utf8_string s, const char old_str, const char *new_str)
	Replaces all occurrences of a substring with another string.

bool	utf8_reverse (utf8_string *s)
	Reverses a UTF-8 string by codepoints.

utf8_string *	utf8_concat (const utf8_string s1, const utf8_string s2)
	Concatenates two utf8_string objects into a new string.

void	utf8_ltrim (char *str)
	Removes leading whitespace from a UTF-8 string in-place.

void	utf8_rtrim (char *str)
	Removes trailing whitespace from a UTF-8 string in-place.

void	utf8_trim (char *str)
	Removes leading and trailing whitespace from a UTF-8 string in-place.

void	utf8_trim_chars (char str, const char c)
	Removes leading and trailing characters from a UTF-8 string in-place.

void	utf8_trim_char (char *str, char c)
	Removes leading and trailing occurrences of a single character.

void	utf8_tolower (char *str)
	Converts all uppercase characters to lowercase in-place.

void	utf8_toupper (char *str)
	Converts all lowercase characters to uppercase in-place.

utf8_string **	utf8_split (const utf8_string str, const char delim, size_t *num_parts)
	Splits a UTF-8 string into parts using a delimiter.

void	utf8_split_free (utf8_string **str, size_t size)
	Frees an array of utf8_string objects returned by utf8_split().

void	utf8_array_remove (utf8_string **array, size_t size, size_t index)
	Removes an element from a utf8_string array and frees it.

long	utf8_writeto (const utf8_string s, const char filename)
	Writes a utf8_string to a file.

utf8_string *	utf8_readfrom (const char *filename)
	Reads a UTF-8 string from a file.

bool	regex_match (const char str, const char pattern)
	Tests if a string matches a regular expression pattern.

Detailed Description

Unicode handling library for C with UTF-8 encoding support.

This library provides comprehensive UTF-8 string manipulation capabilities including encoding/decoding, validation, searching, transformation, and character classification using Unicode standards.

UTF-8 is a variable-length encoding that uses 1-4 bytes per codepoint:

1 byte: U+0000 to U+007F (ASCII compatible)
2 bytes: U+0080 to U+07FF
3 bytes: U+0800 to U+FFFF
4 bytes: U+10000 to U+10FFFF

Note: All functions that modify strings in-place require null-terminated input.; Thread safety depends on the underlying C library (particularly locale functions).

Version: 1.0

Definition in file unicode.h.

Macro Definition Documentation

◆ UNICODE_MAX_CODEPOINT

#define UNICODE_MAX_CODEPOINT 0x10FFFF

Maximum valid Unicode codepoint (last valid: U+10FFFF).

Definition at line 35 of file unicode.h.

◆ UNICODE_MAX_UTF8_BYTES

#define UNICODE_MAX_UTF8_BYTES 4

Maximum number of bytes in a UTF-8 encoded character.

Definition at line 38 of file unicode.h.

◆ UNICODE_VERSION

#define UNICODE_VERSION 0x0100

Unicode version supported by this library.

Definition at line 32 of file unicode.h.

Typedef Documentation

◆ utf8_string

typedef struct utf8_string utf8_string

Represents a mutable UTF-8 encoded string with metadata.

This structure maintains both the UTF-8 byte data and precomputed statistics about the string for efficient operations.

Function Documentation

◆ is_codepoint_alnum()

bool is_codepoint_alnum ( uint32_t codepoint )

Checks if a codepoint represents an alphanumeric character.

Parameters

codepoint The codepoint to test.

Returns: true if alphanumeric.

Checks if a Unicode codepoint represents an alphanumeric character.

Parameters

codepoint The Unicode codepoint to test.

Returns: true if the codepoint is alphanumeric.

Definition at line 373 of file unicode.c.

References is_codepoint_alnum().

Referenced by is_codepoint_alnum(), and is_utf8_alnum().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ is_codepoint_alpha()

bool is_codepoint_alpha ( uint32_t codepoint )

Checks if a codepoint represents an alphabetic character.

Parameters

codepoint The codepoint to test.

Returns: true if alphabetic per current locale.

Checks if a Unicode codepoint represents an alphabetic character.

Parameters

codepoint The Unicode codepoint to test.

Returns: true if the codepoint is alphabetic per the C locale.

Definition at line 352 of file unicode.c.

References is_codepoint_alpha().

Referenced by is_codepoint_alpha(), and is_utf8_alpha().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ is_codepoint_digit()

bool is_codepoint_digit ( uint32_t codepoint )

Checks if a codepoint represents a digit.

Parameters

codepoint The codepoint to test.

Returns: true if digit per current locale.

Checks if a Unicode codepoint represents a digit.

Parameters

codepoint The Unicode codepoint to test.

Returns: true if the codepoint is a digit character per the C locale.

Definition at line 331 of file unicode.c.

References is_codepoint_digit().

Referenced by is_codepoint_digit(), and is_utf8_digit().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ is_codepoint_punct()

bool is_codepoint_punct ( uint32_t codepoint )

Checks if a codepoint represents a punctuation character.

Parameters

codepoint The codepoint to test.

Returns: true if punctuation per current locale.

Checks if a Unicode codepoint represents a punctuation character.

Parameters

codepoint The Unicode codepoint to test.

Returns: true if the codepoint is punctuation per the C locale.

Definition at line 394 of file unicode.c.

References is_codepoint_punct().

Referenced by is_codepoint_punct(), and is_utf8_punct().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ is_codepoint_whitespace()

bool is_codepoint_whitespace ( uint32_t codepoint )

Checks if a codepoint represents whitespace.

Parameters

codepoint The codepoint to test.

Returns: true if whitespace per current locale.

Checks if a Unicode codepoint represents whitespace.

Parameters

codepoint The Unicode codepoint to test.

Returns: true if the codepoint is a whitespace character per the C locale.

Note: Uses iswspace() which respects the current locale settings.

Definition at line 310 of file unicode.c.

References is_codepoint_whitespace().

Referenced by is_codepoint_whitespace(), and is_utf8_whitespace().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ is_utf8_alnum()

bool is_utf8_alnum ( const char * utf8 )

Checks if a UTF-8 character represents an alphanumeric character.

Parameters

utf8	Pointer to UTF-8 character. Must not be NULL.

Returns: true if alphanumeric, false otherwise or on error.

Checks if a UTF-8 character represents an alphanumeric character.

Parameters

utf8	Pointer to a UTF-8 encoded character. Must not be NULL.

Returns: true if the character is alphanumeric, false otherwise or on error.

Definition at line 381 of file unicode.c.

References is_codepoint_alnum(), is_utf8_alnum(), and utf8_to_codepoint().

Referenced by is_utf8_alnum().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ is_utf8_alpha()

bool is_utf8_alpha ( const char * utf8 )

Checks if a UTF-8 character represents an alphabetic character.

Parameters

utf8	Pointer to UTF-8 character. Must not be NULL.

Returns: true if alphabetic, false otherwise or on error.

Checks if a UTF-8 character represents an alphabetic character.

Parameters

utf8	Pointer to a UTF-8 encoded character. Must not be NULL.

Returns: true if the character is alphabetic, false otherwise or on error.

Definition at line 360 of file unicode.c.

References is_codepoint_alpha(), is_utf8_alpha(), and utf8_to_codepoint().

Referenced by is_utf8_alpha().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ is_utf8_digit()

bool is_utf8_digit ( const char * utf8 )

Checks if a UTF-8 character represents a digit.

Parameters

utf8	Pointer to UTF-8 character. Must not be NULL.

Returns: true if digit, false otherwise or on error.

Checks if a UTF-8 character represents a digit.

Parameters

utf8	Pointer to a UTF-8 encoded character. Must not be NULL.

Returns: true if the character is a digit, false otherwise or on error.

Definition at line 339 of file unicode.c.

References is_codepoint_digit(), is_utf8_digit(), and utf8_to_codepoint().

Referenced by is_utf8_digit().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ is_utf8_punct()

bool is_utf8_punct ( const char * utf8 )

Checks if a UTF-8 character represents a punctuation character.

Parameters

utf8	Pointer to UTF-8 character. Must not be NULL.

Returns: true if punctuation, false otherwise or on error.

Checks if a UTF-8 character represents a punctuation character.

Parameters

utf8	Pointer to a UTF-8 encoded character. Must not be NULL.

Returns: true if the character is punctuation, false otherwise or on error.

Definition at line 402 of file unicode.c.

References is_codepoint_punct(), is_utf8_punct(), and utf8_to_codepoint().

Referenced by is_utf8_punct().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ is_utf8_whitespace()

bool is_utf8_whitespace ( const char * utf8 )

Checks if a UTF-8 character represents whitespace.

Parameters

utf8	Pointer to UTF-8 character. Must not be NULL.

Returns: true if whitespace, false otherwise or on error.

Checks if a UTF-8 character represents whitespace.

Parameters

utf8	Pointer to a UTF-8 encoded character. Must not be NULL.

Returns: true if the character is whitespace, false otherwise or on error.

Definition at line 318 of file unicode.c.

References is_codepoint_whitespace(), is_utf8_whitespace(), and utf8_to_codepoint().

Referenced by is_utf8_whitespace(), utf8_ltrim(), and utf8_rtrim().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ is_valid_codepoint()

bool is_valid_codepoint ( uint32_t codepoint )

Validates whether a codepoint is within the valid Unicode range.

Parameters

codepoint The codepoint to validate.

Returns: true if in range [0, 0x10FFFF], false otherwise.

Validates whether a codepoint is within the valid Unicode range.

Parameters

codepoint The Unicode codepoint to validate.

Returns: true if codepoint is in range [0, 0x10FFFF], false otherwise.

Note: This does NOT check for surrogate pairs or other invalid ranges.

Definition at line 240 of file unicode.c.

References is_valid_codepoint(), and UNICODE_MAX_CODEPOINT.

Referenced by is_valid_codepoint().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ is_valid_utf8()

bool is_valid_utf8 ( const char * utf8 )

Comprehensively validates a UTF-8 encoded string.

Parameters

utf8	Null-terminated UTF-8 string. NULL returns false.

Returns: true if entire string is valid UTF-8, false otherwise.

Note: Checks structure, overlong encodings, surrogates, and range.

Comprehensively validates a UTF-8 encoded string.

Validation checks include:

Correct UTF-8 byte sequence structure
Valid continuation bytes (10xxxxxx)
No overlong encodings (shortest form requirement)
No surrogate pairs (0xD800-0xDFFF)
Codepoints within valid range (0x0-0x10FFFF)

Parameters

utf8	The null-terminated UTF-8 string to validate. NULL returns false.

Returns: true if the entire string is valid UTF-8, false otherwise.

Note: An empty string is considered valid.

Definition at line 256 of file unicode.c.

References is_valid_utf8().

Referenced by is_valid_utf8().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ regex_match()

bool regex_match	(	const char *	str,
		const char *	pattern
	)

Tests if a string matches a regular expression pattern.

Parameters

str	Null-terminated UTF-8 string to test. Must not be NULL.
pattern	Regular expression pattern. Must not be NULL.

Returns: true if str matches pattern, false otherwise.

Note: Pattern syntax depends on the underlying regex implementation.

◆ ucp_to_utf8()

void ucp_to_utf8	(	uint32_t	codepoint,
		char	utf8[UTF8_MAX_LEN]
	)

Converts a Unicode codepoint to its UTF-8 byte sequence.

Parameters

codepoint	The Unicode codepoint to encode (must be <= 0x10FFFF).
utf8	Output buffer (minimum 5 bytes) that receives null-terminated UTF-8 string.

Note: Invalid codepoints result in utf8[0] = '\0'.

Converts a Unicode codepoint to its UTF-8 byte sequence representation.

UTF-8 encoding uses 1-4 bytes per codepoint:

1 byte: U+0000 to U+007F (0xxxxxxx)
2 bytes: U+0080 to U+07FF (110xxxxx 10xxxxxx)
3 bytes: U+0800 to U+FFFF (1110xxxx 10xxxxxx 10xxxxxx)
4 bytes: U+10000 to U+10FFFF (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)

Parameters

codepoint	The Unicode codepoint to convert (must be <= 0x10FFFF).
utf8	Output buffer that receives the UTF-8 bytes. Must be at least 5 bytes (4 bytes for max UTF-8 sequence + 1 for null terminator). Buffer is always null-terminated on success.

Note: Invalid codepoints (> 0x10FFFF) result in no output and utf8[0] = '\0'.; This function does not validate surrogate pairs (0xD800-0xDFFF).

Definition at line 27 of file unicode.c.

References ucp_to_utf8().

Referenced by ucp_to_utf8(), utf8_tolower(), and utf8_toupper().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_append()

bool utf8_append	(	utf8_string *	s,
		const char *	data
	)

Appends UTF-8 data to the end of a utf8_string.

Parameters

s	The utf8_string to append to. Must not be NULL.
data	Null-terminated UTF-8 string to append. NULL is safely ignored.

Returns: true on success, false on allocation failure.

Appends UTF-8 data to the end of a utf8_string.

Parameters

s	The utf8_string to append to. Must not be NULL.
data	The null-terminated UTF-8 string to append. NULL is safely ignored.

Returns: true on success, false on allocation failure.

Definition at line 617 of file unicode.c.

References utf8_string::count, utf8_string::data, utf8_string::length, utf8_append(), utf8_count_codepoints(), and utf8_valid_byte_count().

Referenced by utf8_append().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_array_remove()

void utf8_array_remove	(	utf8_string **	array,
		size_t	size,
		size_t	index
	)

Removes an element from a utf8_string array and frees it.

Parameters

array	The array of utf8_string pointers. Must not be NULL.
size	The current size of the array.
index	The index to remove. Must be < size.

Removes an element from a utf8_string array and frees it.

Elements after the removed index are shifted down.

Parameters

array	The array of utf8_string pointers. Must not be NULL.
size	The current size of the array.
index	The index to remove. Must be < size.

Definition at line 1318 of file unicode.c.

References utf8_array_remove(), and utf8_free().

Referenced by utf8_array_remove().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_char_length()

size_t utf8_char_length ( const char * str )

Determines the byte length of a UTF-8 character from its first byte.

Parameters

str	Pointer to the first byte. Must not be NULL.

Returns: Byte length (1-4), or 0 if invalid.

Determines the byte length of a UTF-8 character from its first byte.

UTF-8 character lengths by leading byte:

0x00-0x7F: 1 byte (ASCII)
0xC0-0xDF: 2 bytes
0xE0-0xEF: 3 bytes
0xF0-0xF7: 4 bytes

Parameters

str	Pointer to the first byte of a UTF-8 character. Must not be NULL.

Returns: The byte length (1-4), or 0 if the byte is invalid UTF-8.

Note: This only examines the leading byte and does not validate the full sequence.

Definition at line 214 of file unicode.c.

References utf8_char_length().

Referenced by utf8_char_length(), utf8_ltrim(), utf8_print_codepoints(), utf8_remove(), utf8_reverse(), utf8_rtrim(), utf8_split(), utf8_tolower(), utf8_toupper(), and utf8_trim_chars().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_clone()

utf8_string * utf8_clone ( const utf8_string * s )

Duplicates a utf8_string object.

Parameters

s	The utf8_string to duplicate. Must not be NULL.

Returns: Newly allocated copy, or NULL on failure.

Note: Caller must free using utf8_free().

Duplicates a utf8_string object.

Parameters

s	The utf8_string to duplicate. Must not be NULL.

Returns: A newly allocated copy, or NULL on allocation failure.

Note: Caller must free the returned object using utf8_free().

Definition at line 1448 of file unicode.c.

References utf8_string::data, utf8_clone(), and utf8_new().

Referenced by utf8_clone().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_compare()

int utf8_compare	(	const char *	s1,
		const char *	s2
	)

Compares two UTF-8 strings lexicographically.

Parameters

s1	First UTF-8 string. NULL is treated as empty.
s2	Second UTF-8 string. NULL is treated as empty.

Returns: 0 if equal, <0 if s1 < s2, >0 if s1 > s2.

Compares two UTF-8 strings lexicographically.

Parameters

s1	First UTF-8 string. NULL is treated as empty.
s2	Second UTF-8 string. NULL is treated as empty.

Returns: 0 if equal, <0 if s1 < s2, >0 if s1 > s2.

Definition at line 1414 of file unicode.c.

References utf8_compare().

Referenced by utf8_compare().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_concat()

utf8_string * utf8_concat	(	const utf8_string *	s1,
		const utf8_string *	s2
	)

Concatenates two utf8_string objects into a new string.

Parameters

s1	First utf8_string. Must not be NULL.
s2	Second utf8_string. Must not be NULL.

Returns: Newly allocated utf8_string containing s1 + s2, or NULL on error.

Note: Caller must free using utf8_free().

Concatenates two utf8_string objects into a new string.

Parameters

s1	First utf8_string. Must not be NULL.
s2	Second utf8_string. Must not be NULL.

Returns: A newly allocated utf8_string containing s1 + s2, or NULL on error.

Note: Caller must free the returned object using utf8_free().

Definition at line 1463 of file unicode.c.

References utf8_string::count, utf8_string::data, utf8_string::length, utf8_concat(), and utf8_new_with_capacity().

Referenced by utf8_concat().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_contains()

bool utf8_contains	(	const char *	str,
		const char *	substr
	)

Checks if a string contains a substring.

Parameters

str	Null-terminated UTF-8 string. Must not be NULL.
substr	The substring to search for. Must not be NULL.

Returns: true if substr is found in str, false otherwise.

Checks if a string contains a substring.

Parameters

str	The null-terminated UTF-8 string to search in. Must not be NULL.
substr	The substring to search for. Must not be NULL.

Returns: true if substr is found in str, false otherwise.

Definition at line 1400 of file unicode.c.

References utf8_contains().

Referenced by utf8_contains().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_copy()

char * utf8_copy ( const char * data )

Creates a copy of a UTF-8 string containing only valid UTF-8 sequences.

Parameters

data	Null-terminated UTF-8 string. NULL returns NULL.

Returns: Newly allocated copy, or NULL on failure.

Note: Caller must free using free().

Creates a copy of a UTF-8 string containing only valid UTF-8 sequences.

Parameters

data	The null-terminated UTF-8 string to copy. NULL returns NULL.

Returns: A newly allocated copy of the valid UTF-8 portion, or NULL on allocation failure.

Note: Caller must free the returned string using free().

Definition at line 416 of file unicode.c.

References utf8_copy(), and utf8_valid_byte_count().

Referenced by utf8_copy(), and utf8_new().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_count_codepoints()

size_t utf8_count_codepoints ( const char * s )

Counts the number of Unicode codepoints in a UTF-8 string.

Parameters

utf8	Null-terminated UTF-8 string. NULL returns 0.

Returns: Number of codepoints (characters, not bytes).

Counts the number of Unicode codepoints in a UTF-8 string.

A codepoint is counted by identifying UTF-8 leading bytes, which are any byte that does NOT match the continuation byte pattern (10xxxxxx). This includes:

ASCII bytes (0xxxxxxx)
2-byte sequence leaders (110xxxxx)
3-byte sequence leaders (1110xxxx)
4-byte sequence leaders (11110xxx)

Parameters

s	The null-terminated UTF-8 string. NULL returns 0.

Returns: The number of Unicode codepoints (characters) in the string.

Note: Invalid UTF-8 sequences may result in incorrect counts.

Definition at line 133 of file unicode.c.

References utf8_count_codepoints().

Referenced by utf8_append(), utf8_count_codepoints(), utf8_insert(), utf8_new(), utf8_replace(), utf8_replace_all(), and utf8_split().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_data()

const char * utf8_data ( const utf8_string * s )

Returns a pointer to the internal UTF-8 data buffer.

Parameters

s	The utf8_string object. Must not be NULL.

Returns: Pointer to internal null-terminated UTF-8 string, or NULL if s is NULL.

Note: The pointer is owned by the utf8_string and should not be freed.

Returns a pointer to the internal UTF-8 data buffer.

Parameters

s	The utf8_string object. Must not be NULL.

Returns: Pointer to the internal null-terminated UTF-8 string, or NULL if s is NULL.

Note: The returned pointer is owned by the utf8_string and should not be freed.

Definition at line 437 of file unicode.c.

References utf8_string::data, and utf8_data().

Referenced by utf8_data().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_ends_with()

bool utf8_ends_with	(	const char *	str,
		const char *	suffix
	)

Checks if a string ends with a given suffix.

Parameters

str	Null-terminated UTF-8 string. Must not be NULL.
suffix	The suffix to test for. Must not be NULL.

Returns: true if str ends with suffix, false otherwise.

Checks if a string ends with a given suffix.

Parameters

str	The null-terminated UTF-8 string to check. Must not be NULL.
suffix	The suffix to test for. Must not be NULL.

Returns: true if str ends with suffix, false otherwise.

Definition at line 1375 of file unicode.c.

References utf8_ends_with(), and utf8_valid_byte_count().

Referenced by utf8_ends_with().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_equals()

bool utf8_equals	(	const char *	s1,
		const char *	s2
	)

Compares two UTF-8 strings for equality.

Parameters

s1	First UTF-8 string. Must not be NULL.
s2	Second UTF-8 string. Must not be NULL.

Returns: true if strings are byte-for-byte identical, false otherwise.

Compares two UTF-8 strings for equality.

Parameters

s1	First UTF-8 string. Must not be NULL.
s2	Second UTF-8 string. Must not be NULL.

Returns: true if strings are byte-for-byte identical, false otherwise.

Definition at line 1434 of file unicode.c.

References utf8_equals().

Referenced by utf8_equals().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_free()

void utf8_free ( utf8_string * s )

Frees all resources associated with a utf8_string.

Parameters

s	The utf8_string to free. NULL is safely ignored.

Frees all resources associated with a utf8_string.

Parameters

s	The utf8_string to free. NULL is safely ignored.

Note: After calling this function, the pointer s is invalid and should not be used.

Definition at line 505 of file unicode.c.

References utf8_string::data, and utf8_free().

Referenced by utf8_array_remove(), utf8_free(), and utf8_split_free().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_index_of()

int utf8_index_of	(	const utf8_string *	s,
		const char *	utf8
	)

Finds the byte index of the first occurrence of a substring.

Parameters

s	The utf8_string to search in. Must not be NULL.
utf8	The substring to search for. Must not be NULL.

Returns: Byte index of first occurrence, or -1 if not found or on error.

Finds the byte index of the first occurrence of a substring.

Parameters

s	The utf8_string to search in. Must not be NULL.
utf8	The substring to search for. Must not be NULL.

Returns: The byte index of the first occurrence, or -1 if not found or on error.

Definition at line 568 of file unicode.c.

References utf8_string::data, and utf8_index_of().

Referenced by utf8_index_of().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_insert()

bool utf8_insert	(	utf8_string *	s,
		size_t	index,
		const char *	data
	)

Inserts UTF-8 data at a specific byte index.

Parameters

s	The utf8_string to modify. Must not be NULL.
index	The byte index at which to insert.
data	Null-terminated UTF-8 string to insert. NULL is safely ignored.

Returns: true on success, false on allocation failure or invalid index.

Inserts UTF-8 data at a specific byte index.

Parameters

s	The utf8_string to modify. Must not be NULL.
index	The byte index at which to insert.
data	The null-terminated UTF-8 string to insert. NULL is safely ignored.

Returns: true on success, false on allocation failure or invalid index.

Definition at line 673 of file unicode.c.

References utf8_string::count, utf8_string::data, utf8_string::length, utf8_count_codepoints(), utf8_insert(), and utf8_valid_byte_count().

Referenced by utf8_insert().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_last_index_of()

int utf8_last_index_of	(	const utf8_string *	s,
		const char *	utf8
	)

Finds the byte index of the last occurrence of a substring.

Parameters

s	The utf8_string to search in. Must not be NULL.
utf8	The substring to search for. Must not be NULL.

Returns: Byte index of last occurrence, or -1 if not found or on error.

Finds the byte index of the last occurrence of a substring.

Parameters

s	The utf8_string to search in. Must not be NULL.
utf8	The substring to search for. Must not be NULL.

Returns: The byte index of the last occurrence, or -1 if not found or on error.

Definition at line 587 of file unicode.c.

References utf8_string::data, utf8_string::length, and utf8_last_index_of().

Referenced by utf8_last_index_of().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_ltrim()

void utf8_ltrim ( char * str )

Removes leading whitespace from a UTF-8 string in-place.

Parameters

str	Null-terminated UTF-8 string to modify. NULL is safely ignored.

Removes leading whitespace from a UTF-8 string in-place.

Whitespace is determined by is_utf8_whitespace() which uses locale settings.

Parameters

str	The null-terminated UTF-8 string to modify. NULL is safely ignored.

Definition at line 930 of file unicode.c.

References is_utf8_whitespace(), utf8_char_length(), and utf8_ltrim().

Referenced by utf8_ltrim(), and utf8_trim().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_new()

utf8_string * utf8_new ( const char * data )

Creates a new utf8_string object from a C string.

Parameters

data	Null-terminated UTF-8 string. NULL returns NULL.

Returns: Newly allocated utf8_string, or NULL on failure.

Note: Caller must free using utf8_free().

Creates a new utf8_string object from a C string.

The string is validated and only valid UTF-8 bytes are stored.

Parameters

data	The null-terminated UTF-8 string. NULL returns NULL.

Returns: A newly allocated utf8_string object, or NULL on allocation failure or NULL input.

Note: Caller must free the returned object using utf8_free().

Definition at line 453 of file unicode.c.

References utf8_string::count, utf8_string::data, utf8_string::length, utf8_copy(), utf8_count_codepoints(), utf8_new(), and utf8_valid_byte_count().

Referenced by utf8_clone(), utf8_new(), utf8_readfrom(), and utf8_split().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_new_with_capacity()

utf8_string * utf8_new_with_capacity ( size_t capacity )

Creates an empty utf8_string with preallocated capacity.

Parameters

capacity Initial byte capacity to allocate.

Returns: Newly allocated empty utf8_string, or NULL on failure.

Note: Caller must free using utf8_free().

Creates an empty utf8_string with preallocated capacity.

Parameters

capacity The initial byte capacity to allocate.

Returns: A newly allocated empty utf8_string, or NULL on allocation failure.

Note: Caller must free the returned object using utf8_free().

Definition at line 481 of file unicode.c.

References utf8_string::count, utf8_string::data, utf8_string::length, and utf8_new_with_capacity().

Referenced by utf8_concat(), and utf8_new_with_capacity().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_print()

void utf8_print ( const utf8_string * s )

Prints the UTF-8 string content to stdout followed by a newline.

Parameters

s	The utf8_string to print. Must not be NULL.

Prints the UTF-8 string content to stdout followed by a newline.

Parameters

s	The utf8_string to print. Must not be NULL.

Definition at line 519 of file unicode.c.

References utf8_string::data, and utf8_print().

Referenced by utf8_print().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_print_codepoints()

void utf8_print_codepoints ( const utf8_string * s )

Prints the Unicode codepoints in U+XXXX format to stdout.

Parameters

s	The utf8_string to print. Must not be NULL.

Prints the Unicode codepoints of the string in U+XXXX format.

Parameters

s	The utf8_string to print. Must not be NULL.

Definition at line 544 of file unicode.c.

References utf8_string::data, utf8_char_length(), utf8_print_codepoints(), and utf8_to_codepoint().

Referenced by utf8_print_codepoints().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_print_info()

void utf8_print_info ( const utf8_string * s )

Prints metadata about the UTF-8 string to stdout.

Parameters

s	The utf8_string to inspect. Must not be NULL.

Prints metadata about the UTF-8 string to stdout.

Parameters

s	The utf8_string to inspect. Must not be NULL.

Definition at line 531 of file unicode.c.

References utf8_string::count, utf8_string::length, and utf8_print_info().

Referenced by utf8_print_info().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_readfrom()

utf8_string * utf8_readfrom ( const char * filename )

Reads a UTF-8 string from a file.

Parameters

filename The file path to read from. Must not be NULL.

Returns: Newly allocated utf8_string containing file contents, or NULL on error.

Note: Caller must free using utf8_free().

Reads a UTF-8 string from a file.

Parameters

filename The file path to read from. Must not be NULL.

Returns: A newly allocated utf8_string containing the file contents, or NULL on error.

Note: Caller must free the returned object using utf8_free().

Definition at line 882 of file unicode.c.

References utf8_new(), and utf8_readfrom().

Referenced by utf8_readfrom().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_remove()

bool utf8_remove	(	utf8_string *	s,
		size_t	index,
		size_t	count
	)

Removes a specified number of codepoints starting at a byte index.

Parameters

s	The utf8_string to modify. Must not be NULL.
index	The starting byte index.
count	The number of codepoints (not bytes) to remove.

Returns: true on success, false if parameters are invalid.

Removes a specified number of codepoints starting at a byte index.

Parameters

s	The utf8_string to modify. Must not be NULL.
index	The starting byte index.
count	The number of codepoints (not bytes) to remove.

Returns: true on success, false if parameters are invalid.

Definition at line 702 of file unicode.c.

References utf8_string::count, utf8_string::data, utf8_string::length, utf8_char_length(), and utf8_remove().

Referenced by utf8_remove().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_replace()

bool utf8_replace	(	utf8_string *	s,
		const char *	old_str,
		const char *	new_str
	)

Replaces the first occurrence of a substring with another string.

Parameters

s	The utf8_string to modify. Must not be NULL.
old_str	The substring to find and replace. Must not be NULL.
new_str	The replacement string. Must not be NULL.

Returns: true if replacement occurred, false if old_str not found or on error.

Replaces the first occurrence of a substring with another string.

Parameters

s	The utf8_string to modify. Must not be NULL.
old_str	The substring to find and replace. Must not be NULL.
new_str	The replacement string. Must not be NULL.

Returns: true if replacement occurred, false if old_str not found or on error.

Definition at line 734 of file unicode.c.

References utf8_string::count, utf8_string::data, utf8_string::length, utf8_count_codepoints(), utf8_replace(), and utf8_valid_byte_count().

Referenced by utf8_replace().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_replace_all()

size_t utf8_replace_all	(	utf8_string *	s,
		const char *	old_str,
		const char *	new_str
	)

Replaces all occurrences of a substring with another string.

Parameters

s	The utf8_string to modify. Must not be NULL.
old_str	The substring to find and replace. Must not be NULL or empty.
new_str	The replacement string. Must not be NULL (can be empty).

Returns: Number of replacements made, or 0 if none found or on error.

Replaces all occurrences of a substring with another string.

Parameters

s	The utf8_string to modify. Must not be NULL.
old_str	The substring to find and replace. Must not be NULL or empty.
new_str	The replacement string. Must not be NULL (can be empty for deletion).

Returns: The number of replacements made, or 0 if none found or on error.

Definition at line 773 of file unicode.c.

References utf8_string::count, utf8_string::data, utf8_string::length, utf8_count_codepoints(), utf8_replace_all(), and utf8_valid_byte_count().

Referenced by utf8_replace_all().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_reverse()

bool utf8_reverse ( utf8_string * s )

Reverses a UTF-8 string by codepoints.

Parameters

s	The utf8_string to reverse in-place. Must not be NULL.

Returns: true on success, false on allocation failure.

Reverses a UTF-8 string by codepoints (not bytes).

Each complete UTF-8 character is treated as an atomic unit.

Parameters

s	The utf8_string to reverse in-place. Must not be NULL.

Returns: true on success, false on allocation failure.

Definition at line 821 of file unicode.c.

References utf8_string::data, utf8_string::length, utf8_char_length(), and utf8_reverse().

Referenced by utf8_reverse().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_rtrim()

void utf8_rtrim ( char * str )

Removes trailing whitespace from a UTF-8 string in-place.

Parameters

str	Null-terminated UTF-8 string to modify. NULL is safely ignored.

Removes trailing whitespace from a UTF-8 string in-place.

Whitespace is determined by is_utf8_whitespace() which uses locale settings.

Parameters

str	The null-terminated UTF-8 string to modify. NULL is safely ignored.

Definition at line 957 of file unicode.c.

References is_utf8_whitespace(), utf8_char_length(), and utf8_rtrim().

Referenced by utf8_rtrim(), and utf8_trim().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_split()

utf8_string ** utf8_split	(	const utf8_string *	str,
		const char *	delim,
		size_t *	num_parts
	)

Splits a UTF-8 string into parts using a delimiter.

Parameters

str	The utf8_string to split. Must not be NULL.
delim	The delimiter string. Must not be NULL or empty.
num_parts	Output parameter receiving the number of parts. Must not be NULL.

Returns: Array of utf8_string pointers, or NULL on error.

Note: Caller must free using utf8_split_free().

Splits a UTF-8 string into parts using a delimiter.

The string is divided at each occurrence of the delimiter. Empty parts (when delimiter appears consecutively) are included in the result.

Parameters

str	The utf8_string to split. Must not be NULL.
delim	The delimiter string. Must not be NULL or empty.
num_parts	Output parameter receiving the number of parts. Must not be NULL.

Returns: Array of utf8_string pointers, or NULL on error. Caller must free using utf8_split_free().

Definition at line 1241 of file unicode.c.

References utf8_string::count, utf8_string::data, utf8_string::length, utf8_char_length(), utf8_count_codepoints(), utf8_new(), utf8_split(), utf8_starts_with(), and utf8_valid_byte_count().

Referenced by utf8_split().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_split_free()

void utf8_split_free	(	utf8_string **	str,
		size_t	size
	)

Frees an array of utf8_string objects returned by utf8_split().

Parameters

str	The array of utf8_string pointers. NULL is safely ignored.
size	The number of elements in the array.

Frees an array of utf8_string objects returned by utf8_split().

Parameters

str	The array of utf8_string pointers. NULL is safely ignored.
size	The number of elements in the array.

Definition at line 1336 of file unicode.c.

References utf8_free(), and utf8_split_free().

Referenced by utf8_split_free().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_starts_with()

bool utf8_starts_with	(	const char *	str,
		const char *	prefix
	)

Checks if a string starts with a given prefix.

Parameters

str	Null-terminated UTF-8 string. Must not be NULL.
prefix	The prefix to test for. Must not be NULL.

Returns: true if str starts with prefix, false otherwise.

Checks if a string starts with a given prefix.

Parameters

str	The null-terminated UTF-8 string to check. Must not be NULL.
prefix	The prefix to test for. Must not be NULL.

Returns: true if str starts with prefix, false otherwise.

Definition at line 1354 of file unicode.c.

References utf8_starts_with(), and utf8_valid_byte_count().

Referenced by utf8_split(), and utf8_starts_with().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_substr()

char * utf8_substr	(	const utf8_string *	s,
		size_t	index,
		size_t	utf8_byte_len
	)

Extracts a substring by byte range.

Parameters

s	The source utf8_string. Must not be NULL.
index	The starting byte index.
utf8_byte_len	The number of bytes to extract.

Returns: Newly allocated substring, or NULL on failure.

Note: Caller must free using free().

Extracts a substring by byte range.

Parameters

s	The source utf8_string. Must not be NULL.
index	The starting byte index.
utf8_byte_len	The number of bytes to extract.

Returns: A newly allocated substring, or NULL on allocation failure or invalid params.

Note: Caller must free the returned string using free().; Does not validate that the byte range aligns with UTF-8 character boundaries.

Definition at line 648 of file unicode.c.

References utf8_string::data, utf8_string::length, and utf8_substr().

Referenced by utf8_substr().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_to_codepoint()

uint32_t utf8_to_codepoint ( const char * utf8 )

Decodes a UTF-8 byte sequence to its Unicode codepoint.

Parameters

utf8	Pointer to UTF-8 encoded byte sequence. Must not be NULL.

Returns: The decoded codepoint, or 0xFFFD (replacement char) if invalid.

Note: Only examines the first complete UTF-8 sequence.

Decodes a UTF-8 byte sequence to its Unicode codepoint.

This function validates the UTF-8 sequence for:

Correct continuation byte patterns (10xxxxxx)
Overlong encodings (smallest possible byte sequence)
Surrogate pairs (0xD800-0xDFFF, invalid in UTF-8)
Out-of-range values (> 0x10FFFF)

Parameters

utf8	Pointer to UTF-8 encoded byte sequence. Must not be NULL.

Returns: The decoded Unicode codepoint, or 0xFFFD (replacement character) if the sequence is invalid or malformed.

Note: The function only examines the first complete UTF-8 sequence.

Definition at line 69 of file unicode.c.

References utf8_to_codepoint().

Referenced by is_utf8_alnum(), is_utf8_alpha(), is_utf8_digit(), is_utf8_punct(), is_utf8_whitespace(), utf8_print_codepoints(), utf8_to_codepoint(), utf8_tolower(), utf8_toupper(), and utf8_trim_chars().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_tolower()

void utf8_tolower ( char * str )

Converts all uppercase characters to lowercase in-place.

Parameters

str	Null-terminated UTF-8 string to modify. NULL is safely ignored.

Note: Uses locale-aware conversion. May change string length.

Converts all uppercase characters in a UTF-8 string to lowercase in-place.

Uses locale-aware conversion via towlower().

Parameters

str	The null-terminated UTF-8 string to modify. NULL is safely ignored.

Note: May change string length if case conversion alters byte count.

Definition at line 1144 of file unicode.c.

References ucp_to_utf8(), utf8_char_length(), utf8_to_codepoint(), utf8_tolower(), and utf8_valid_byte_count().

Referenced by utf8_tolower().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_toupper()

void utf8_toupper ( char * str )

Converts all lowercase characters to uppercase in-place.

Parameters

str	Null-terminated UTF-8 string to modify. NULL is safely ignored.

Note: Uses locale-aware conversion. May change string length.

Converts all lowercase characters in a UTF-8 string to uppercase in-place.

Uses locale-aware conversion via towupper().

Parameters

str	The null-terminated UTF-8 string to modify. NULL is safely ignored.

Note: May change string length if case conversion alters byte count.

Definition at line 1191 of file unicode.c.

References ucp_to_utf8(), utf8_char_length(), utf8_to_codepoint(), utf8_toupper(), and utf8_valid_byte_count().

Referenced by utf8_toupper().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_trim()

void utf8_trim ( char * str )

Removes leading and trailing whitespace from a UTF-8 string in-place.

Parameters

str	Null-terminated UTF-8 string to modify. NULL is safely ignored.

Removes leading and trailing whitespace from a UTF-8 string in-place.

Parameters

str	The null-terminated UTF-8 string to modify. NULL is safely ignored.

Definition at line 988 of file unicode.c.

References utf8_ltrim(), utf8_rtrim(), and utf8_trim().

Referenced by utf8_trim().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_trim_char()

void utf8_trim_char	(	char *	str,
		char	c
	)

Removes leading and trailing occurrences of a single character.

Parameters

str	Null-terminated UTF-8 string to modify. NULL is safely ignored.
c	The ASCII character to trim (single-byte only).

Removes leading and trailing occurrences of a single character.

Parameters

str	The null-terminated UTF-8 string to modify. NULL is safely ignored.
c	The ASCII character to trim (only works for single-byte chars).

Note: This function is optimized for ASCII characters only.

Definition at line 1114 of file unicode.c.

References utf8_trim_char().

Referenced by utf8_trim_char().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_trim_chars()

void utf8_trim_chars	(	char *	str,
		const char *	chars
	)

Removes leading and trailing characters from a UTF-8 string in-place.

Parameters

str	Null-terminated UTF-8 string to modify. NULL is safely ignored.
c	String containing codepoints to trim. NULL is safely ignored.

Removes leading and trailing characters from a UTF-8 string in-place.

Any codepoint appearing in the 'chars' string will be trimmed from both ends.

Parameters

str	The null-terminated UTF-8 string to modify. NULL is safely ignored.
chars	String containing codepoints to trim. NULL is safely ignored.

Definition at line 1004 of file unicode.c.

References utf8_char_length(), utf8_to_codepoint(), and utf8_trim_chars().

Referenced by utf8_trim_chars().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_valid_byte_count()

size_t utf8_valid_byte_count ( const char * s )

Counts the number of valid UTF-8 bytes in a string.

Parameters

s	Null-terminated string. NULL returns 0.

Returns: Total bytes in all valid UTF-8 sequences.

Note: Invalid sequences are skipped (counted as 0 bytes).

Counts the number of valid UTF-8 bytes in a string.

This function validates each UTF-8 sequence by checking:

Correct leading byte patterns
Presence of all expected continuation bytes
Proper continuation byte format (10xxxxxx)

Invalid sequences are skipped (counted as 0 bytes), allowing partial processing of corrupted UTF-8 data.

Parameters

s	The null-terminated string to validate. NULL returns 0.

Returns: The total number of bytes in all valid UTF-8 sequences.

Note: This does NOT validate codepoint ranges or overlong encodings.

Definition at line 162 of file unicode.c.

References utf8_valid_byte_count().

Referenced by utf8_append(), utf8_copy(), utf8_ends_with(), utf8_insert(), utf8_new(), utf8_replace(), utf8_replace_all(), utf8_split(), utf8_starts_with(), utf8_tolower(), utf8_toupper(), and utf8_valid_byte_count().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ utf8_writeto()

long utf8_writeto	(	const utf8_string *	s,
		const char *	filename
	)

Writes a utf8_string to a file.

Parameters

s	The utf8_string to write. Must not be NULL.
filename	The file path. Existing files are overwritten. Must not be NULL.

Returns: Number of bytes written, or -1 on error.

Writes a utf8_string to a file.

Parameters

s	The utf8_string to write. Must not be NULL.
filename	The file path. Existing files are overwritten. Must not be NULL.

Returns: The number of bytes written, or -1 on error.

Definition at line 855 of file unicode.c.

References utf8_string::data, utf8_string::length, and utf8_writeto().

Referenced by utf8_writeto().

Here is the call graph for this function:

Here is the caller graph for this function:

Classes

Macros

Typedefs

Functions

Detailed Description

Macro Definition Documentation

◆ UNICODE_MAX_CODEPOINT

◆ UNICODE_MAX_UTF8_BYTES

◆ UNICODE_VERSION

Typedef Documentation

◆ utf8_string

Function Documentation

◆ is_codepoint_alnum()

◆ is_codepoint_alpha()

◆ is_codepoint_digit()

◆ is_codepoint_punct()

◆ is_codepoint_whitespace()

◆ is_utf8_alnum()

◆ is_utf8_alpha()

◆ is_utf8_digit()

◆ is_utf8_punct()

◆ is_utf8_whitespace()

◆ is_valid_codepoint()

◆ is_valid_utf8()

◆ regex_match()

◆ ucp_to_utf8()

◆ utf8_append()

◆ utf8_array_remove()

◆ utf8_char_length()

◆ utf8_clone()

◆ utf8_compare()

◆ utf8_concat()

◆ utf8_contains()

◆ utf8_copy()

◆ utf8_count_codepoints()

◆ utf8_data()

◆ utf8_ends_with()

◆ utf8_equals()

◆ utf8_free()

◆ utf8_index_of()

◆ utf8_insert()

◆ utf8_last_index_of()

◆ utf8_ltrim()

◆ utf8_new()

◆ utf8_new_with_capacity()

◆ utf8_print()

◆ utf8_print_codepoints()

◆ utf8_print_info()

◆ utf8_readfrom()

◆ utf8_remove()

◆ utf8_replace()

◆ utf8_replace_all()

◆ utf8_reverse()

◆ utf8_rtrim()

◆ utf8_split()

◆ utf8_split_free()

◆ utf8_starts_with()

◆ utf8_substr()

◆ utf8_to_codepoint()

◆ utf8_tolower()

◆ utf8_toupper()

◆ utf8_trim()

◆ utf8_trim_char()

◆ utf8_trim_chars()

◆ utf8_valid_byte_count()

◆ utf8_writeto()