diff options
Diffstat (limited to 'doc/unistr.texi')
-rw-r--r-- | doc/unistr.texi | 102 |
1 files changed, 93 insertions, 9 deletions
diff --git a/doc/unistr.texi b/doc/unistr.texi index 60f1daa..da0f4da 100644 --- a/doc/unistr.texi +++ b/doc/unistr.texi @@ -35,31 +35,61 @@ The following functions perform conversions between the different forms of Unico @deftypefun {uint16_t *} u8_to_u16 (const uint8_t *@var{s}, size_t @var{n}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) Converts an UTF-8 string to an UTF-16 string. + +The @var{resultbuf} and @var{lengthp} arguments are as described in +chapter @ref{Conventions}. @end deftypefun @deftypefun {uint32_t *} u8_to_u32 (const uint8_t *@var{s}, size_t @var{n}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) Converts an UTF-8 string to an UTF-32 string. + +The @var{resultbuf} and @var{lengthp} arguments are as described in +chapter @ref{Conventions}. @end deftypefun @deftypefun {uint8_t *} u16_to_u8 (const uint16_t *@var{s}, size_t @var{n}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) Converts an UTF-16 string to an UTF-8 string. + +The @var{resultbuf} and @var{lengthp} arguments are as described in +chapter @ref{Conventions}. @end deftypefun @deftypefun {uint32_t *} u16_to_u32 (const uint16_t *@var{s}, size_t @var{n}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) Converts an UTF-16 string to an UTF-32 string. + +The @var{resultbuf} and @var{lengthp} arguments are as described in +chapter @ref{Conventions}. @end deftypefun @deftypefun {uint8_t *} u32_to_u8 (const uint32_t *@var{s}, size_t @var{n}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) Converts an UTF-32 string to an UTF-8 string. + +The @var{resultbuf} and @var{lengthp} arguments are as described in +chapter @ref{Conventions}. @end deftypefun @deftypefun {uint16_t *} u32_to_u16 (const uint32_t *@var{s}, size_t @var{n}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) Converts an UTF-32 string to an UTF-16 string. + +The @var{resultbuf} and @var{lengthp} arguments are as described in +chapter @ref{Conventions}. @end deftypefun @node Elementary string functions @section Elementary string functions +@menu +* Iterating:: +* Creating Unicode strings:: +* Copying Unicode strings:: +* Comparing Unicode strings:: +* Searching for a character:: +* Counting characters:: +@end menu + +@node Iterating +@subsection Iterating over a Unicode string + @cindex iterating The following functions inspect and return details about the first character in a Unicode string. @@ -75,9 +105,9 @@ This function is similar to @posixfunc{mblen}, except that it operates on a Unicode string and that @var{s} must not be NULL. @end deftypefun -@deftypefun int u8_mbtouc_unsafe (ucs4_t *@var{puc}, const uint8_t *@var{s}, size_t @var{n}) -@deftypefunx int u16_mbtouc_unsafe (ucs4_t *@var{puc}, const uint16_t *@var{s}, size_t @var{n}) -@deftypefunx int u32_mbtouc_unsafe (ucs4_t *@var{puc}, const uint32_t *@var{s}, size_t @var{n}) +@deftypefun int u8_mbtouc (ucs4_t *@var{puc}, const uint8_t *@var{s}, size_t @var{n}) +@deftypefunx int u16_mbtouc (ucs4_t *@var{puc}, const uint16_t *@var{s}, size_t @var{n}) +@deftypefunx int u32_mbtouc (ucs4_t *@var{puc}, const uint32_t *@var{s}, size_t @var{n}) Returns the length (number of units) of the first character in @var{s}, putting its @code{ucs4_t} representation in @code{*@var{puc}}. Upon failure, @code{*@var{puc}} is set to @code{0xfffd}, and an appropriate number of units @@ -85,17 +115,21 @@ is returned. The number of available units, @var{n}, must be > 0. +This function fails if an invalid sequence of units is encountered at the +beginning of @var{s}, or if additional units (after the @var{n} provided units) +would be needed to form a character. + This function is similar to @posixfunc{mbtowc}, except that it operates on a Unicode string, @var{puc} and @var{s} must not be NULL, @var{n} must be > 0, and the NUL character is not treated specially. @end deftypefun -@deftypefun int u8_mbtouc (ucs4_t *@var{puc}, const uint8_t *@var{s}, size_t @var{n}) -@deftypefunx int u16_mbtouc (ucs4_t *@var{puc}, const uint16_t *@var{s}, size_t @var{n}) -@deftypefunx int u32_mbtouc (ucs4_t *@var{puc}, const uint32_t *@var{s}, size_t @var{n}) -This function is like @code{u8_mbtouc_unsafe}, except that it will detect an -invalid UTF-8 character, even if the library is compiled without -@option{--enable-safety}. +@deftypefun int u8_mbtouc_unsafe (ucs4_t *@var{puc}, const uint8_t *@var{s}, size_t @var{n}) +@deftypefunx int u16_mbtouc_unsafe (ucs4_t *@var{puc}, const uint16_t *@var{s}, size_t @var{n}) +@deftypefunx int u32_mbtouc_unsafe (ucs4_t *@var{puc}, const uint32_t *@var{s}, size_t @var{n}) +This function is identical to @code{u8_mbtouc}/@code{u16_mbtouc}/@code{u32_mbtouc}. +Earlier versions of this function performed fewer range-checks on the sequence +of units. @end deftypefun @deftypefun int u8_mbtoucr (ucs4_t *@var{puc}, const uint8_t *@var{s}, size_t @var{n}) @@ -112,6 +146,9 @@ This function is similar to @code{u8_mbtouc}, except that the return value gives more details about the failure, similar to @posixfunc{mbrtowc}. @end deftypefun +@node Creating Unicode strings +@subsection Creating Unicode strings one character at a time + The following function stores a Unicode character as a Unicode string in memory. @@ -127,6 +164,9 @@ Unicode strings, @var{s} must not be NULL, and the argument @var{n} must be specified. @end deftypefun +@node Copying Unicode strings +@subsection Copying Unicode strings + @cindex copying The following functions copy Unicode strings in memory. @@ -161,6 +201,9 @@ This function is similar to @posixfunc{memset}, except that it operates on Unicode strings. @end deftypefun +@node Comparing Unicode strings +@subsection Comparing Unicode strings + @cindex comparing The following function compares two Unicode strings of the same length. @@ -191,6 +234,9 @@ This function is similar to the gnulib function @func{memcmp2}, except that it operates on Unicode strings. @end deftypefun +@node Searching for a character +@subsection Searching for a character in a Unicode string + @cindex searching, for a character The following function searches for a given Unicode character. @@ -205,6 +251,9 @@ This function is similar to @posixfunc{memchr}, except that it operates on Unicode strings. @end deftypefun +@node Counting characters +@subsection Counting the characters in a Unicode string + @cindex counting The following function counts the number of Unicode characters. @@ -233,6 +282,20 @@ Makes a freshly allocated copy of @var{s}, of length @var{n}. @node Elementary string functions on NUL terminated strings @section Elementary string functions on NUL terminated strings +@menu +* Iterating over a NUL terminated Unicode string:: +* Length:: +* Copying a NUL terminated Unicode string:: +* Comparing NUL terminated Unicode strings:: +* Duplicating a NUL terminated Unicode string:: +* Searching for a character in a NUL terminated Unicode string:: +* Searching for a substring:: +* Tokenizing:: +@end menu + +@node Iterating over a NUL terminated Unicode string +@subsection Iterating over a NUL terminated Unicode string + The following functions inspect and return details about the first character in a Unicode string. @@ -273,6 +336,9 @@ Puts the character's @code{ucs4_t} representation in @code{*@var{puc}}. Note that this function works only on well-formed Unicode strings. @end deftypefun +@node Length +@subsection Length of a NUL terminated Unicode string + The following functions determine the length of a Unicode string. @deftypefun size_t u8_strlen (const uint8_t *@var{s}) @@ -293,6 +359,9 @@ This function is similar to @posixfunc{strnlen} and @posixfunc{wcsnlen}, except that it operates on Unicode strings. @end deftypefun +@node Copying a NUL terminated Unicode string +@subsection Copying a NUL terminated Unicode string + @cindex copying The following functions copy portions of Unicode strings in memory. @@ -355,6 +424,9 @@ This function is similar to @posixfunc{strncat} and @posixfunc{wcsncat}, except that it operates on Unicode strings. @end deftypefun +@node Comparing NUL terminated Unicode strings +@subsection Comparing NUL terminated Unicode strings + @cindex comparing The following functions compare two Unicode strings. @@ -396,6 +468,9 @@ This function is similar to @posixfunc{strncmp} and @posixfunc{wcsncmp}, except that it operates on Unicode strings. @end deftypefun +@node Duplicating a NUL terminated Unicode string +@subsection Duplicating a NUL terminated Unicode string + @cindex duplicating The following function allocates a duplicate of a Unicode string. @@ -408,6 +483,9 @@ This function is similar to @posixfunc{strdup} and @posixfunc{wcsdup}, except that it operates on Unicode strings. @end deftypefun +@node Searching for a character in a NUL terminated Unicode string +@subsection Searching for a character in a NUL terminated Unicode string + @cindex searching, for a character The following functions search for a given Unicode character. @@ -461,6 +539,9 @@ This function is similar to @posixfunc{strpbrk} and @posixfunc{wcspbrk}, except that it operates on Unicode strings. @end deftypefun +@node Searching for a substring +@subsection Searching for a substring in a NUL terminated Unicode string + @cindex searching, for a substring The following functions search whether a given Unicode string is a substring of another Unicode string. @@ -486,6 +567,9 @@ Tests whether @var{str} starts with @var{prefix}. Tests whether @var{str} ends with @var{suffix}. @end deftypefun +@node Tokenizing +@subsection Tokenizing a NUL terminated Unicode string + The following function does one step in tokenizing a Unicode string. @deftypefun {uint8_t *} u8_strtok (uint8_t *@var{str}, const uint8_t *@var{delim}, uint8_t **@var{ptr}) |