summaryrefslogtreecommitdiff
path: root/doc/uniconv.texi
diff options
context:
space:
mode:
Diffstat (limited to 'doc/uniconv.texi')
-rw-r--r--doc/uniconv.texi157
1 files changed, 157 insertions, 0 deletions
diff --git a/doc/uniconv.texi b/doc/uniconv.texi
new file mode 100644
index 0000000..07cfa1b
--- /dev/null
+++ b/doc/uniconv.texi
@@ -0,0 +1,157 @@
+@node uniconv.h
+@chapter Conversions between Unicode and encodings @code{<uniconv.h>}
+
+This include file declares functions for converting between Unicode strings
+and @code{char *} strings in locale encoding or in other specified encodings.
+
+@cindex locale encoding
+The following function returns the locale encoding.
+
+@deftypefun {const char *} locale_charset ()
+Determines the current locale's character encoding, and canonicalizes it
+into one of the canonical names listed in @file{config.charset}.
+If the canonical name cannot be determined, the result is a non-canonical
+name.
+
+The result must not be freed; it is statically allocated.
+
+The result of this function can be used as an argument to the @code{iconv_open}
+function in GNU libc, in GNU libiconv, or in the gnulib provided wrapper
+around the native @code{iconv_open} function. It may not work as an argument
+to the native @code{iconv_open} function directly.
+@end deftypefun
+
+The handling of unconvertible characters during the conversions can be
+parametrized through the following enumeration type:
+
+@deftp Type {enum iconv_ilseq_handler}
+This type specifies how unconvertible characters in the input are handled.
+@end deftp
+
+@deftypevr Constant {enum iconv_ilseq_handler} iconveh_error
+This handler causes the function to return with @code{errno} set to
+@code{EILSEQ}.
+@end deftypevr
+
+@deftypevr Constant {enum iconv_ilseq_handler} iconveh_question_mark
+This handler produces one question mark @samp{?} per unconvertible character.
+@end deftypevr
+
+@deftypevr Constant {enum iconv_ilseq_handler} iconveh_escape_sequence
+This handler produces an escape sequence @code{\u@var{xxxx}} or
+@code{\U@var{xxxxxxxx}} for each unconvertible character.
+@end deftypevr
+
+@cindex converting
+The following functions convert between strings in a specified encoding and
+Unicode strings.
+
+@deftypefun {uint8_t *} u8_conv_from_encoding (const char *@var{fromcode}, enum iconv_ilseq_handler @var{handler}, const char *@var{src}, size_t @var{srclen}, size_t *@var{offsets}, uint8_t *@var{resultbuf}, size_t *@var{lengthp})
+@deftypefunx {uint16_t *} u16_conv_from_encoding (const char *@var{fromcode}, enum iconv_ilseq_handler @var{handler}, const char *@var{src}, size_t @var{srclen}, size_t *@var{offsets}, uint16_t *@var{resultbuf}, size_t *@var{lengthp})
+@deftypefunx {uint32_t *} u32_conv_from_encoding (const char *@var{fromcode}, enum iconv_ilseq_handler @var{handler}, const char *@var{src}, size_t @var{srclen}, size_t *@var{offsets}, uint32_t *@var{resultbuf}, size_t *@var{lengthp})
+Converts an entire string, possibly including NUL bytes, from one encoding
+to UTF-8 encoding.
+
+Converts a memory region given in encoding @var{fromcode}. @var{fromcode} is
+as for the @code{iconv_open} function.
+
+The input is in the memory region between @var{src} (inclusive) and
+@code{@var{src} + @var{srclen}} (exclusive).
+
+If @var{offsets} is not NULL, it should point to an array of @var{srclen}
+integers; this array is filled with offsets into the result, i.e@. the
+character starting at @code{@var{src}[i]} corresponds to the character starting
+at @code{@var{result}[@var{offsets}[i]]}, and other offsets are set to
+@code{(size_t)(-1)}.
+
+@code{@var{resultbuf}} and @code{*@var{lengthp}} should be a scratch
+buffer and its size, or @code{@var{resultbuf}} can be NULL.
+
+May erase the contents of the memory at @code{@var{resultbuf}}.
+
+If successful: The resulting Unicode string (non-NULL) is returned and
+its length stored in @code{*@var{lengthp}}. The resulting string is
+@code{@var{resultbuf}} if no dynamic memory allocation was necessary,
+or a freshly allocated memory block otherwise.
+
+In case of error: NULL is returned and @code{errno} is set.
+Particular @code{errno} values: @code{EINVAL}, @code{EILSEQ}, @code{ENOMEM}.
+@end deftypefun
+
+@deftypefun {char *} u8_conv_to_encoding (const char *@var{tocode}, enum iconv_ilseq_handler @var{handler}, const uint8_t *@var{src}, size_t @var{srclen}, size_t *@var{offsets}, char *@var{resultbuf}, size_t *@var{lengthp})
+@deftypefunx {char *} u16_conv_to_encoding (const char *@var{tocode}, enum iconv_ilseq_handler @var{handler}, const uint16_t *@var{src}, size_t @var{srclen}, size_t *@var{offsets}, char *@var{resultbuf}, size_t *@var{lengthp})
+@deftypefunx {char *} u32_conv_to_encoding (const char *@var{tocode}, enum iconv_ilseq_handler @var{handler}, const uint32_t *@var{src}, size_t @var{srclen}, size_t *@var{offsets}, char *@var{resultbuf}, size_t *@var{lengthp})
+Converts an entire Unicode string, possibly including NUL units, from UTF-8
+encoding to a given encoding.
+
+Converts a memory region to encoding @var{tocode}. @var{tocode} is as for
+the @code{iconv_open} function.
+
+The input is in the memory region between @var{src} (inclusive) and
+@code{@var{src} + @var{srclen}} (exclusive).
+
+If @var{offsets} is not NULL, it should point to an array of @var{srclen}
+integers; this array is filled with offsets into the result, i.e@. the
+character starting at @code{@var{src}[i]} corresponds to the character starting
+at @code{@var{result}[@var{offsets}[i]]}, and other offsets are set to
+@code{(size_t)(-1)}.
+
+@code{@var{resultbuf}} and @code{*@var{lengthp}} should be a scratch
+buffer and its size, or @code{@var{resultbuf}} can be NULL.
+
+May erase the contents of the memory at @code{@var{resultbuf}}.
+
+If successful: The resulting Unicode string (non-NULL) is returned and
+its length stored in @code{*@var{lengthp}}. The resulting string is
+@code{@var{resultbuf}} if no dynamic memory allocation was necessary,
+or a freshly allocated memory block otherwise.
+
+In case of error: NULL is returned and @code{errno} is set.
+Particular @code{errno} values: @code{EINVAL}, @code{EILSEQ}, @code{ENOMEM}.
+@end deftypefun
+
+The following functions convert between NUL terminated strings in a specified
+encoding and NUL terminated Unicode strings.
+
+@deftypefun {uint8_t *} u8_strconv_from_encoding (const char *@var{string}, const char *@var{fromcode}, enum iconv_ilseq_handler @var{handler})
+@deftypefunx {uint16_t *} u16_strconv_from_encoding (const char *@var{string}, const char *@var{fromcode}, enum iconv_ilseq_handler @var{handler})
+@deftypefunx {uint32_t *} u32_strconv_from_encoding (const char *@var{string}, const char *@var{fromcode}, enum iconv_ilseq_handler @var{handler})
+Converts a NUL terminated string from a given encoding.
+
+The result is @code{malloc} allocated, or NULL (with @var{errno} set) in case of error.
+
+Particular @code{errno} values: @code{EILSEQ}, @code{ENOMEM}.
+@end deftypefun
+
+@deftypefun {char *} u8_strconv_to_encoding (const uint8_t *@var{string}, const char *@var{tocode}, enum iconv_ilseq_handler @var{handler})
+@deftypefunx {char *} u16_strconv_to_encoding (const uint16_t *@var{string}, const char *@var{tocode}, enum iconv_ilseq_handler @var{handler})
+@deftypefunx {char *} u32_strconv_to_encoding (const uint32_t *@var{string}, const char *@var{tocode}, enum iconv_ilseq_handler @var{handler})
+Converts a NUL terminated string to a given encoding.
+
+The result is @code{malloc} allocated, or NULL (with @code{errno} set) in case of error.
+
+Particular @code{errno} values: @code{EILSEQ}, @code{ENOMEM}.
+@end deftypefun
+
+The following functions are shorthands that convert between NUL terminated
+strings in locale encoding and NUL terminated Unicode strings.
+
+@deftypefun {uint8_t *} u8_strconv_from_locale (const char *@var{string})
+@deftypefunx {uint16_t *} u16_strconv_from_locale (const char *@var{string})
+@deftypefunx {uint32_t *} u32_strconv_from_locale (const char *@var{string})
+Converts a NUL terminated string from the locale encoding.
+
+The result is @code{malloc} allocated, or NULL (with @code{errno} set) in case of error.
+
+Particular @code{errno} values: @code{ENOMEM}.
+@end deftypefun
+
+@deftypefun {char *} u8_strconv_to_locale (const uint8_t *@var{string})
+@deftypefunx {char *} u16_strconv_to_locale (const uint16_t *@var{string})
+@deftypefunx {char *} u32_strconv_to_locale (const uint32_t *@var{string})
+Converts a NUL terminated string to the locale encoding.
+
+The result is @code{malloc} allocated, or NULL (with @code{errno} set) in case of error.
+
+Particular @code{errno} values: @code{ENOMEM}.
+@end deftypefun