summaryrefslogtreecommitdiff
path: root/lib/unistr
diff options
context:
space:
mode:
authorAndreas Rottmann <a.rottmann@gmx.at>2009-09-14 12:32:44 +0200
committerAndreas Rottmann <a.rottmann@gmx.at>2009-09-14 12:32:44 +0200
commitfa095a4504cbe668e4244547e2c141597bea4ecf (patch)
tree06135820a286ffec47804e75fbf8a147e92acd2e /lib/unistr
Imported Upstream version 0.9.1upstream/0.9.1
Diffstat (limited to 'lib/unistr')
-rw-r--r--lib/unistr/u-cmp2.h32
-rw-r--r--lib/unistr/u-cpy-alloc.h39
-rw-r--r--lib/unistr/u-cpy.h32
-rw-r--r--lib/unistr/u-endswith.h28
-rw-r--r--lib/unistr/u-move.h44
-rw-r--r--lib/unistr/u-set.h39
-rw-r--r--lib/unistr/u-startswith.h30
-rw-r--r--lib/unistr/u-stpcpy.h24
-rw-r--r--lib/unistr/u-stpncpy.h30
-rw-r--r--lib/unistr/u-strcat.h26
-rw-r--r--lib/unistr/u-strcoll.h81
-rw-r--r--lib/unistr/u-strcpy.h26
-rw-r--r--lib/unistr/u-strcspn.h54
-rw-r--r--lib/unistr/u-strdup.h40
-rw-r--r--lib/unistr/u-strlen.h26
-rw-r--r--lib/unistr/u-strncat.h28
-rw-r--r--lib/unistr/u-strncpy.h32
-rw-r--r--lib/unistr/u-strnlen.h26
-rw-r--r--lib/unistr/u-strpbrk.h46
-rw-r--r--lib/unistr/u-strspn.h54
-rw-r--r--lib/unistr/u-strstr.h49
-rw-r--r--lib/unistr/u-strtok.h52
-rw-r--r--lib/unistr/u16-check.c51
-rw-r--r--lib/unistr/u16-chr.c56
-rw-r--r--lib/unistr/u16-cmp.c54
-rw-r--r--lib/unistr/u16-cmp2.c28
-rw-r--r--lib/unistr/u16-cpy-alloc.c25
-rw-r--r--lib/unistr/u16-cpy.c25
-rw-r--r--lib/unistr/u16-endswith.c27
-rw-r--r--lib/unistr/u16-mblen.c49
-rw-r--r--lib/unistr/u16-mbsnlen.c39
-rw-r--r--lib/unistr/u16-mbtouc-aux.c51
-rw-r--r--lib/unistr/u16-mbtouc-unsafe-aux.c55
-rw-r--r--lib/unistr/u16-mbtouc-unsafe.c66
-rw-r--r--lib/unistr/u16-mbtouc.c61
-rw-r--r--lib/unistr/u16-mbtoucr.c54
-rw-r--r--lib/unistr/u16-move.c25
-rw-r--r--lib/unistr/u16-next.c37
-rw-r--r--lib/unistr/u16-prev.c53
-rw-r--r--lib/unistr/u16-set.c26
-rw-r--r--lib/unistr/u16-startswith.c25
-rw-r--r--lib/unistr/u16-stpcpy.c25
-rw-r--r--lib/unistr/u16-stpncpy.c25
-rw-r--r--lib/unistr/u16-strcat.c26
-rw-r--r--lib/unistr/u16-strchr.c63
-rw-r--r--lib/unistr/u16-strcmp.c50
-rw-r--r--lib/unistr/u16-strcoll.c33
-rw-r--r--lib/unistr/u16-strcpy.c25
-rw-r--r--lib/unistr/u16-strcspn.c28
-rw-r--r--lib/unistr/u16-strdup.c26
-rw-r--r--lib/unistr/u16-strlen.c25
-rw-r--r--lib/unistr/u16-strmblen.c43
-rw-r--r--lib/unistr/u16-strmbtouc.c50
-rw-r--r--lib/unistr/u16-strncat.c26
-rw-r--r--lib/unistr/u16-strncmp.c54
-rw-r--r--lib/unistr/u16-strncpy.c25
-rw-r--r--lib/unistr/u16-strnlen.c25
-rw-r--r--lib/unistr/u16-strpbrk.c27
-rw-r--r--lib/unistr/u16-strrchr.c64
-rw-r--r--lib/unistr/u16-strspn.c29
-rw-r--r--lib/unistr/u16-strstr.c28
-rw-r--r--lib/unistr/u16-strtok.c27
-rw-r--r--lib/unistr/u16-to-u32.c125
-rw-r--r--lib/unistr/u16-to-u8.c136
-rw-r--r--lib/unistr/u16-uctomb-aux.c58
-rw-r--r--lib/unistr/u16-uctomb.c72
-rw-r--r--lib/unistr/u32-check.c39
-rw-r--r--lib/unistr/u32-chr.c32
-rw-r--r--lib/unistr/u32-cmp.c40
-rw-r--r--lib/unistr/u32-cmp2.c28
-rw-r--r--lib/unistr/u32-cpy-alloc.c25
-rw-r--r--lib/unistr/u32-cpy.c25
-rw-r--r--lib/unistr/u32-endswith.c27
-rw-r--r--lib/unistr/u32-mblen.c37
-rw-r--r--lib/unistr/u32-mbsnlen.c27
-rw-r--r--lib/unistr/u32-mbtouc-unsafe.c48
-rw-r--r--lib/unistr/u32-mbtouc.c43
-rw-r--r--lib/unistr/u32-mbtoucr.c39
-rw-r--r--lib/unistr/u32-move.c25
-rw-r--r--lib/unistr/u32-next.c39
-rw-r--r--lib/unistr/u32-prev.c39
-rw-r--r--lib/unistr/u32-set.c26
-rw-r--r--lib/unistr/u32-startswith.c25
-rw-r--r--lib/unistr/u32-stpcpy.c25
-rw-r--r--lib/unistr/u32-stpncpy.c25
-rw-r--r--lib/unistr/u32-strcat.c26
-rw-r--r--lib/unistr/u32-strchr.c36
-rw-r--r--lib/unistr/u32-strcmp.c36
-rw-r--r--lib/unistr/u32-strcoll.c33
-rw-r--r--lib/unistr/u32-strcpy.c25
-rw-r--r--lib/unistr/u32-strcspn.c51
-rw-r--r--lib/unistr/u32-strdup.c26
-rw-r--r--lib/unistr/u32-strlen.c25
-rw-r--r--lib/unistr/u32-strmblen.c36
-rw-r--r--lib/unistr/u32-strmbtouc.c39
-rw-r--r--lib/unistr/u32-strncat.c26
-rw-r--r--lib/unistr/u32-strncmp.c40
-rw-r--r--lib/unistr/u32-strncpy.c25
-rw-r--r--lib/unistr/u32-strnlen.c25
-rw-r--r--lib/unistr/u32-strpbrk.c50
-rw-r--r--lib/unistr/u32-strrchr.c38
-rw-r--r--lib/unistr/u32-strspn.c50
-rw-r--r--lib/unistr/u32-strstr.c26
-rw-r--r--lib/unistr/u32-strtok.c27
-rw-r--r--lib/unistr/u32-to-u16.c130
-rw-r--r--lib/unistr/u32-to-u8.c130
-rw-r--r--lib/unistr/u32-uctomb.c47
-rw-r--r--lib/unistr/u8-check.c105
-rw-r--r--lib/unistr/u8-chr.c87
-rw-r--r--lib/unistr/u8-cmp.c30
-rw-r--r--lib/unistr/u8-cmp2.c28
-rw-r--r--lib/unistr/u8-cpy-alloc.c25
-rw-r--r--lib/unistr/u8-cpy.c25
-rw-r--r--lib/unistr/u8-endswith.c27
-rw-r--r--lib/unistr/u8-mblen.c98
-rw-r--r--lib/unistr/u8-mbsnlen.c39
-rw-r--r--lib/unistr/u8-mbtouc-aux.c158
-rw-r--r--lib/unistr/u8-mbtouc-unsafe-aux.c168
-rw-r--r--lib/unistr/u8-mbtouc-unsafe.c179
-rw-r--r--lib/unistr/u8-mbtouc.c168
-rw-r--r--lib/unistr/u8-mbtoucr.c285
-rw-r--r--lib/unistr/u8-move.c25
-rw-r--r--lib/unistr/u8-next.c37
-rw-r--r--lib/unistr/u8-prev.c93
-rw-r--r--lib/unistr/u8-set.c44
-rw-r--r--lib/unistr/u8-startswith.c25
-rw-r--r--lib/unistr/u8-stpcpy.c44
-rw-r--r--lib/unistr/u8-stpncpy.c44
-rw-r--r--lib/unistr/u8-strcat.c29
-rw-r--r--lib/unistr/u8-strchr.c100
-rw-r--r--lib/unistr/u8-strcmp.c30
-rw-r--r--lib/unistr/u8-strcoll.c33
-rw-r--r--lib/unistr/u8-strcpy.c29
-rw-r--r--lib/unistr/u8-strcspn.c28
-rw-r--r--lib/unistr/u8-strdup.c40
-rw-r--r--lib/unistr/u8-strlen.c29
-rw-r--r--lib/unistr/u8-strmblen.c96
-rw-r--r--lib/unistr/u8-strmbtouc.c129
-rw-r--r--lib/unistr/u8-strncat.c29
-rw-r--r--lib/unistr/u8-strncmp.c30
-rw-r--r--lib/unistr/u8-strncpy.c29
-rw-r--r--lib/unistr/u8-strnlen.c44
-rw-r--r--lib/unistr/u8-strpbrk.c27
-rw-r--r--lib/unistr/u8-strrchr.c101
-rw-r--r--lib/unistr/u8-strspn.c29
-rw-r--r--lib/unistr/u8-strstr.c28
-rw-r--r--lib/unistr/u8-strtok.c27
-rw-r--r--lib/unistr/u8-to-u16.c136
-rw-r--r--lib/unistr/u8-to-u32.c125
-rw-r--r--lib/unistr/u8-uctomb-aux.c69
-rw-r--r--lib/unistr/u8-uctomb.c88
151 files changed, 7360 insertions, 0 deletions
diff --git a/lib/unistr/u-cmp2.h b/lib/unistr/u-cmp2.h
new file mode 100644
index 0000000..ae3750c
--- /dev/null
+++ b/lib/unistr/u-cmp2.h
@@ -0,0 +1,32 @@
+/* Compare pieces of UTF-8/UTF-16/UTF-32 strings.
+ Copyright (C) 2009 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2009.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+int
+FUNC (const UNIT *s1, size_t n1, const UNIT *s2, size_t n2)
+{
+ int cmp = U_CMP (s1, s2, MIN (n1, n2));
+
+ if (cmp == 0)
+ {
+ if (n1 < n2)
+ cmp = -1;
+ else if (n1 > n2)
+ cmp = 1;
+ }
+
+ return cmp;
+}
diff --git a/lib/unistr/u-cpy-alloc.h b/lib/unistr/u-cpy-alloc.h
new file mode 100644
index 0000000..dace3e2
--- /dev/null
+++ b/lib/unistr/u-cpy-alloc.h
@@ -0,0 +1,39 @@
+/* Copy piece of UTF-8/UTF-16/UTF-32 string.
+ Copyright (C) 1999, 2002, 2006-2007, 2009 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <stdlib.h>
+#include <string.h>
+
+UNIT *
+FUNC (const UNIT *s, size_t n)
+{
+ UNIT *dest;
+
+ dest = (UNIT *) malloc (n > 0 ? n * sizeof (UNIT) : 1);
+ if (dest != NULL)
+ {
+#if 0
+ UNIT *destptr = dest;
+
+ for (; n > 0; n--)
+ *destptr++ = *s++;
+#else
+ memcpy ((char *) dest, (const char *) s, n * sizeof (UNIT));
+#endif
+ }
+ return dest;
+}
diff --git a/lib/unistr/u-cpy.h b/lib/unistr/u-cpy.h
new file mode 100644
index 0000000..c660eae
--- /dev/null
+++ b/lib/unistr/u-cpy.h
@@ -0,0 +1,32 @@
+/* Copy piece of UTF-8/UTF-16/UTF-32 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <string.h>
+
+UNIT *
+FUNC (UNIT *dest, const UNIT *src, size_t n)
+{
+#if 0
+ UNIT *destptr = dest;
+
+ for (; n > 0; n--)
+ *destptr++ = *src++;
+#else
+ memcpy ((char *) dest, (const char *) src, n * sizeof (UNIT));
+#endif
+ return dest;
+}
diff --git a/lib/unistr/u-endswith.h b/lib/unistr/u-endswith.h
new file mode 100644
index 0000000..739bfbb
--- /dev/null
+++ b/lib/unistr/u-endswith.h
@@ -0,0 +1,28 @@
+/* Substring test for UTF-8/UTF-16/UTF-32 strings.
+ Copyright (C) 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+bool
+FUNC (const UNIT *str, const UNIT *suffix)
+{
+ size_t len = U_STRLEN (str);
+ size_t suffixlen = U_STRLEN (suffix);
+
+ if (len >= suffixlen)
+ return (U_CMP (str + (len - suffixlen), suffix, suffixlen) == 0);
+ else
+ return false;
+}
diff --git a/lib/unistr/u-move.h b/lib/unistr/u-move.h
new file mode 100644
index 0000000..77b6788
--- /dev/null
+++ b/lib/unistr/u-move.h
@@ -0,0 +1,44 @@
+/* Copy piece of UTF-8/UTF-16/UTF-32 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <string.h>
+
+UNIT *
+FUNC (UNIT *dest, const UNIT *src, size_t n)
+{
+#if 0
+ if (dest < src)
+ {
+ UNIT *destptr = dest;
+ const UNIT *srcptr = src;
+
+ for (; n > 0; n--)
+ *destptr++ = *srcptr++;
+ }
+ else if (dest > src)
+ {
+ UNIT *destptr = dest + n - 1;
+ const UNIT *srcptr = src + n - 1;
+
+ for (; n > 0; n--)
+ *destptr-- = *srcptr--;
+ }
+#else
+ memmove ((char *) dest, (const char *) src, n * sizeof (UNIT));
+#endif
+ return dest;
+}
diff --git a/lib/unistr/u-set.h b/lib/unistr/u-set.h
new file mode 100644
index 0000000..a093e7f
--- /dev/null
+++ b/lib/unistr/u-set.h
@@ -0,0 +1,39 @@
+/* Fill UTF-8/UTF-16/UTF-32 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+
+UNIT *
+FUNC (UNIT *s, ucs4_t uc, size_t n)
+{
+ if (n > 0)
+ {
+ if (IS_SINGLE_UNIT (uc))
+ {
+ UNIT *ptr = s;
+
+ for (; n > 0; n--)
+ *ptr++ = uc;
+ }
+ else
+ {
+ errno = EILSEQ;
+ return NULL;
+ }
+ }
+ return s;
+}
diff --git a/lib/unistr/u-startswith.h b/lib/unistr/u-startswith.h
new file mode 100644
index 0000000..0486ef8
--- /dev/null
+++ b/lib/unistr/u-startswith.h
@@ -0,0 +1,30 @@
+/* Substring test for UTF-8/UTF-16/UTF-32 strings.
+ Copyright (C) 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+bool
+FUNC (const UNIT *str, const UNIT *prefix)
+{
+ for (;;)
+ {
+ UNIT uc1 = *str++;
+ UNIT uc2 = *prefix++;
+ if (uc2 == 0)
+ return true;
+ if (uc1 != uc2)
+ return false;
+ }
+}
diff --git a/lib/unistr/u-stpcpy.h b/lib/unistr/u-stpcpy.h
new file mode 100644
index 0000000..b13e816
--- /dev/null
+++ b/lib/unistr/u-stpcpy.h
@@ -0,0 +1,24 @@
+/* Copy UTF-8/UTF-16/UTF-32 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+UNIT *
+FUNC (UNIT *dest, const UNIT *src)
+{
+ for (; (*dest = *src) != 0; src++, dest++)
+ ;
+ return dest;
+}
diff --git a/lib/unistr/u-stpncpy.h b/lib/unistr/u-stpncpy.h
new file mode 100644
index 0000000..09bf434
--- /dev/null
+++ b/lib/unistr/u-stpncpy.h
@@ -0,0 +1,30 @@
+/* Copy UTF-8/UTF-16/UTF-32 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+UNIT *
+FUNC (UNIT *dest, const UNIT *src, size_t n)
+{
+ for (; n > 0 && (*dest = *src) != 0; src++, dest++, n--)
+ ;
+
+ /* This behavior is rarely useful, but it is here for consistency with
+ strncpy and wcsncpy. */
+ for (; n > 0; n--)
+ *dest++ = 0;
+
+ return dest - 1;
+}
diff --git a/lib/unistr/u-strcat.h b/lib/unistr/u-strcat.h
new file mode 100644
index 0000000..e86bbf9
--- /dev/null
+++ b/lib/unistr/u-strcat.h
@@ -0,0 +1,26 @@
+/* Concatenate UTF-8/UTF-16/UTF-32 strings.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+UNIT *
+FUNC (UNIT *dest, const UNIT *src)
+{
+ UNIT *destptr = dest + U_STRLEN (dest);
+
+ for (; (*destptr = *src) != 0; src++, destptr++)
+ ;
+ return dest;
+}
diff --git a/lib/unistr/u-strcoll.h b/lib/unistr/u-strcoll.h
new file mode 100644
index 0000000..af404a0
--- /dev/null
+++ b/lib/unistr/u-strcoll.h
@@ -0,0 +1,81 @@
+/* Compare UTF-8/UTF-16/UTF-32 strings using the collation rules of the current
+ locale.
+ Copyright (C) 2009 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2009.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+int
+FUNC (const UNIT *s1, const UNIT *s2)
+{
+ /* When this function succeeds, it sets errno back to its original value.
+ When it fails, it sets errno, but also returns a meaningful return value,
+ for the sake of callers which ignore errno. */
+ int final_errno = errno;
+ char *sl1;
+ char *sl2;
+ int result;
+
+ sl1 = U_STRCONV_TO_LOCALE (s1);
+ if (sl1 != NULL)
+ {
+ sl2 = U_STRCONV_TO_LOCALE (s2);
+ if (sl2 != NULL)
+ {
+ /* Compare sl1 and sl2. */
+ errno = 0;
+ result = strcoll (sl1, sl2);
+ if (errno == 0)
+ {
+ /* strcoll succeeded. */
+ free (sl1);
+ free (sl2);
+ }
+ else
+ {
+ /* strcoll failed. */
+ final_errno = errno;
+ free (sl1);
+ free (sl2);
+ result = U_STRCMP (s1, s2);
+ }
+ }
+ else
+ {
+ /* s1 could be converted to locale encoding, s2 not. */
+ final_errno = errno;
+ free (sl1);
+ result = -1;
+ }
+ }
+ else
+ {
+ final_errno = errno;
+ sl2 = U_STRCONV_TO_LOCALE (s2);
+ if (sl2 != NULL)
+ {
+ /* s2 could be converted to locale encoding, s1 not. */
+ free (sl2);
+ result = 1;
+ }
+ else
+ {
+ /* Neither s1 nor s2 could be converted to locale encoding. */
+ result = U_STRCMP (s1, s2);
+ }
+ }
+
+ errno = final_errno;
+ return result;
+}
diff --git a/lib/unistr/u-strcpy.h b/lib/unistr/u-strcpy.h
new file mode 100644
index 0000000..153f60e
--- /dev/null
+++ b/lib/unistr/u-strcpy.h
@@ -0,0 +1,26 @@
+/* Copy UTF-8/UTF-16/UTF-32 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+UNIT *
+FUNC (UNIT *dest, const UNIT *src)
+{
+ UNIT *destptr = dest;
+
+ for (; (*destptr = *src) != 0; src++, destptr++)
+ ;
+ return dest;
+}
diff --git a/lib/unistr/u-strcspn.h b/lib/unistr/u-strcspn.h
new file mode 100644
index 0000000..de32656
--- /dev/null
+++ b/lib/unistr/u-strcspn.h
@@ -0,0 +1,54 @@
+/* Search for some characters in UTF-8/UTF-16/UTF-32 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+size_t
+FUNC (const UNIT *str, const UNIT *reject)
+{
+ /* Optimize two cases. */
+ if (reject[0] == 0)
+ return U_STRLEN (str);
+ {
+ ucs4_t uc;
+ int count = U_STRMBTOUC (&uc, reject);
+ if (count >= 0 && reject[count] == 0)
+ {
+ const UNIT *found = U_STRCHR (str, uc);
+ if (found != NULL)
+ return found - str;
+ else
+ return U_STRLEN (str);
+ }
+ }
+ /* General case. */
+ {
+ const UNIT *ptr = str;
+
+ for (;;)
+ {
+ ucs4_t uc;
+ int count = U_STRMBTOUC (&uc, ptr);
+ if (count == 0)
+ return ptr - str;
+ if (count < 0)
+ break;
+ if (U_STRCHR (reject, uc))
+ return ptr - str;
+ ptr += count;
+ }
+ return U_STRLEN (str);
+ }
+}
diff --git a/lib/unistr/u-strdup.h b/lib/unistr/u-strdup.h
new file mode 100644
index 0000000..71e527a
--- /dev/null
+++ b/lib/unistr/u-strdup.h
@@ -0,0 +1,40 @@
+/* Copy UTF-8/UTF-16/UTF-32 string.
+ Copyright (C) 1999, 2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <stdlib.h>
+#include <string.h>
+
+UNIT *
+FUNC (const UNIT *s)
+{
+ size_t n = U_STRLEN (s) + 1;
+ UNIT *dest;
+
+ dest = (UNIT *) malloc (n * sizeof (UNIT));
+ if (dest != NULL)
+ {
+#if 0
+ UNIT *destptr = dest;
+
+ for (; n > 0; n--)
+ *destptr++ = *s++;
+#else
+ memcpy ((char *) dest, (const char *) s, n * sizeof (UNIT));
+#endif
+ }
+ return dest;
+}
diff --git a/lib/unistr/u-strlen.h b/lib/unistr/u-strlen.h
new file mode 100644
index 0000000..51dcae0
--- /dev/null
+++ b/lib/unistr/u-strlen.h
@@ -0,0 +1,26 @@
+/* Determine length of UTF-8/UTF-16/UTF-32 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+size_t
+FUNC (const UNIT *s)
+{
+ const UNIT *ptr;
+
+ for (ptr = s; *ptr != 0; ptr++)
+ ;
+ return ptr - s;
+}
diff --git a/lib/unistr/u-strncat.h b/lib/unistr/u-strncat.h
new file mode 100644
index 0000000..40b442e
--- /dev/null
+++ b/lib/unistr/u-strncat.h
@@ -0,0 +1,28 @@
+/* Concatenate UTF-8/UTF-16/UTF-32 strings.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+UNIT *
+FUNC (UNIT *dest, const UNIT *src, size_t n)
+{
+ UNIT *destptr = dest + U_STRLEN (dest);
+
+ for (; n > 0 && (*destptr = *src) != 0; src++, destptr++, n--)
+ ;
+ if (n == 0)
+ *destptr = 0;
+ return dest;
+}
diff --git a/lib/unistr/u-strncpy.h b/lib/unistr/u-strncpy.h
new file mode 100644
index 0000000..3d441b5
--- /dev/null
+++ b/lib/unistr/u-strncpy.h
@@ -0,0 +1,32 @@
+/* Copy UTF-8/UTF-16/UTF-32 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+UNIT *
+FUNC (UNIT *dest, const UNIT *src, size_t n)
+{
+ UNIT *destptr = dest;
+
+ for (; n > 0 && (*destptr = *src) != 0; src++, destptr++, n--)
+ ;
+
+ /* This behavior is rarely useful, but it is here for consistency with
+ strncpy and wcsncpy. */
+ for (; n > 0; n--)
+ *destptr++ = 0;
+
+ return dest;
+}
diff --git a/lib/unistr/u-strnlen.h b/lib/unistr/u-strnlen.h
new file mode 100644
index 0000000..6a1d2ad
--- /dev/null
+++ b/lib/unistr/u-strnlen.h
@@ -0,0 +1,26 @@
+/* Determine bounded length of UTF-8/UTF-16/UTF-32 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+size_t
+FUNC (const UNIT *s, size_t maxlen)
+{
+ const UNIT *ptr;
+
+ for (ptr = s; maxlen > 0 && *ptr != 0; ptr++, maxlen--)
+ ;
+ return ptr - s;
+}
diff --git a/lib/unistr/u-strpbrk.h b/lib/unistr/u-strpbrk.h
new file mode 100644
index 0000000..2ff4618
--- /dev/null
+++ b/lib/unistr/u-strpbrk.h
@@ -0,0 +1,46 @@
+/* Search for some characters in UTF-8/UTF-16/UTF-32 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+UNIT *
+FUNC (const UNIT *str, const UNIT *accept)
+{
+ /* Optimize two cases. */
+ if (accept[0] == 0)
+ return NULL;
+ {
+ ucs4_t uc;
+ int count = U_STRMBTOUC (&uc, accept);
+ if (count >= 0 && accept[count] == 0)
+ return U_STRCHR (str, uc);
+ }
+ /* General case. */
+ {
+ const UNIT *ptr = str;
+
+ for (;;)
+ {
+ ucs4_t uc;
+ int count = U_STRMBTOUC (&uc, ptr);
+ if (count <= 0)
+ break;
+ if (U_STRCHR (accept, uc))
+ return (UNIT *) ptr;
+ ptr += count;
+ }
+ return NULL;
+ }
+}
diff --git a/lib/unistr/u-strspn.h b/lib/unistr/u-strspn.h
new file mode 100644
index 0000000..6502ce4
--- /dev/null
+++ b/lib/unistr/u-strspn.h
@@ -0,0 +1,54 @@
+/* Search for some characters in UTF-8/UTF-16/UTF-32 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+size_t
+FUNC (const UNIT *str, const UNIT *accept)
+{
+ /* Optimize two cases. */
+ if (accept[0] == 0)
+ return 0;
+ {
+ ucs4_t uc;
+ int count = U_STRMBTOUC (&uc, accept);
+ if (count >= 0 && accept[count] == 0)
+ {
+ const UNIT *ptr = str;
+ for (; *ptr != 0; ptr += count)
+ if (U_CMP (ptr, accept, count) != 0)
+ break;
+ return ptr - str;
+ }
+ }
+ /* General case. */
+ {
+ const UNIT *ptr = str;
+
+ for (;;)
+ {
+ ucs4_t uc;
+ int count = U_STRMBTOUC (&uc, ptr);
+ if (count == 0)
+ return ptr - str;
+ if (count < 0)
+ break;
+ if (!U_STRCHR (accept, uc))
+ return ptr - str;
+ ptr += count;
+ }
+ return U_STRLEN (str);
+ }
+}
diff --git a/lib/unistr/u-strstr.h b/lib/unistr/u-strstr.h
new file mode 100644
index 0000000..55b5a31
--- /dev/null
+++ b/lib/unistr/u-strstr.h
@@ -0,0 +1,49 @@
+/* Substring test for UTF-8/UTF-16/UTF-32 strings.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+UNIT *
+FUNC (const UNIT *haystack, const UNIT *needle)
+{
+ UNIT first = needle[0];
+
+ /* Is needle empty? */
+ if (first == 0)
+ return (UNIT *) haystack;
+
+ /* Is needle nearly empty? */
+ if (needle[1] == 0)
+ return U_STRCHR (haystack, first);
+
+ /* Search for needle's first unit. */
+ for (; *haystack != 0; haystack++)
+ if (*haystack == first)
+ {
+ /* Compare with needle's remaining units. */
+ const UNIT *hptr = haystack + 1;
+ const UNIT *nptr = needle + 1;
+ for (;;)
+ {
+ if (*hptr != *nptr)
+ break;
+ hptr++; nptr++;
+ if (*nptr == 0)
+ return (UNIT *) haystack;
+ }
+ }
+
+ return NULL;
+}
diff --git a/lib/unistr/u-strtok.h b/lib/unistr/u-strtok.h
new file mode 100644
index 0000000..7ed57d6
--- /dev/null
+++ b/lib/unistr/u-strtok.h
@@ -0,0 +1,52 @@
+/* Tokenize UTF-8/UTF-16/UTF-32 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+UNIT *
+FUNC (UNIT *str, const UNIT *delim, UNIT **ptr)
+{
+ if (str == NULL)
+ {
+ str = *ptr;
+ if (str == NULL)
+ return NULL; /* reminder that end of token sequence has been reached */
+ }
+
+ /* Skip leading delimiters. */
+ str += U_STRSPN (str, delim);
+
+ /* Found a token? */
+ if (*str == 0)
+ {
+ *ptr = NULL;
+ return NULL;
+ }
+
+ /* Move past the token. */
+ {
+ UNIT *token_end = U_STRPBRK (str, delim);
+ if (token_end)
+ {
+ /* NUL-terminate the token. */
+ *token_end = 0;
+ *ptr = token_end + 1;
+ }
+ else
+ *ptr = NULL;
+ }
+
+ return str;
+}
diff --git a/lib/unistr/u16-check.c b/lib/unistr/u16-check.c
new file mode 100644
index 0000000..380cec2
--- /dev/null
+++ b/lib/unistr/u16-check.c
@@ -0,0 +1,51 @@
+/* Check UTF-16 string.
+ Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+const uint16_t *
+u16_check (const uint16_t *s, size_t n)
+{
+ const uint16_t *s_end = s + n;
+
+ while (s < s_end)
+ {
+ /* Keep in sync with unistr.h and utf16-ucs4.c. */
+ uint16_t c = *s;
+
+ if (c < 0xd800 || c >= 0xe000)
+ {
+ s++;
+ continue;
+ }
+ if (c < 0xdc00)
+ {
+ if (s + 2 <= s_end
+ && s[1] >= 0xdc00 && s[1] < 0xe000)
+ {
+ s += 2;
+ continue;
+ }
+ }
+ /* invalid or incomplete multibyte character */
+ return s;
+ }
+ return NULL;
+}
diff --git a/lib/unistr/u16-chr.c b/lib/unistr/u16-chr.c
new file mode 100644
index 0000000..2d7d797
--- /dev/null
+++ b/lib/unistr/u16-chr.c
@@ -0,0 +1,56 @@
+/* Search character in piece of UTF-16 string.
+ Copyright (C) 1999, 2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+uint16_t *
+u16_chr (const uint16_t *s, size_t n, ucs4_t uc)
+{
+ uint16_t c[2];
+
+ if (uc < 0x10000)
+ {
+ uint16_t c0 = uc;
+
+ for (; n > 0; s++, n--)
+ {
+ if (*s == c0)
+ return (uint16_t *) s;
+ }
+ }
+ else
+ switch (u16_uctomb_aux (c, uc, 2))
+ {
+ case 2:
+ if (n > 1)
+ {
+ uint16_t c0 = c[0];
+ uint16_t c1 = c[1];
+
+ for (n--; n > 0; s++, n--)
+ {
+ if (*s == c0 && s[1] == c1)
+ return (uint16_t *) s;
+ }
+ }
+ break;
+ }
+ return NULL;
+}
diff --git a/lib/unistr/u16-cmp.c b/lib/unistr/u16-cmp.c
new file mode 100644
index 0000000..0130d27
--- /dev/null
+++ b/lib/unistr/u16-cmp.c
@@ -0,0 +1,54 @@
+/* Compare pieces of UTF-16 strings.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u16_cmp (const uint16_t *s1, const uint16_t *s2, size_t n)
+{
+ /* Note that the UTF-16 encoding does NOT preserve lexicographic order.
+ Namely, if uc1 is a 16-bit character and [uc2a,uc2b] is a surrogate pair,
+ we must enforce uc1 < [uc2a,uc2b], even if uc1 > uc2a. */
+ for (; n > 0;)
+ {
+ uint16_t c1 = *s1++;
+ uint16_t c2 = *s2++;
+ if (c1 == c2)
+ {
+ n--;
+ continue;
+ }
+ if (c1 < 0xd800 || c1 >= 0xe000)
+ {
+ if (!(c2 < 0xd800 || c2 >= 0xe000))
+ /* c2 is a surrogate, but c1 is not. */
+ return -1;
+ }
+ else
+ {
+ if (c2 < 0xd800 || c2 >= 0xe000)
+ /* c1 is a surrogate, but c2 is not. */
+ return 1;
+ }
+ return (int)c1 - (int)c2;
+ /* > 0 if c1 > c2, < 0 if c1 < c2. */
+ }
+ return 0;
+}
diff --git a/lib/unistr/u16-cmp2.c b/lib/unistr/u16-cmp2.c
new file mode 100644
index 0000000..766dcd2
--- /dev/null
+++ b/lib/unistr/u16-cmp2.c
@@ -0,0 +1,28 @@
+/* Compare pieces of UTF-16 strings.
+ Copyright (C) 2009 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2009.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include "minmax.h"
+
+#define FUNC u16_cmp2
+#define UNIT uint16_t
+#define U_CMP u16_cmp
+#include "u-cmp2.h"
diff --git a/lib/unistr/u16-cpy-alloc.c b/lib/unistr/u16-cpy-alloc.c
new file mode 100644
index 0000000..047977e
--- /dev/null
+++ b/lib/unistr/u16-cpy-alloc.c
@@ -0,0 +1,25 @@
+/* Copy piece of UTF-16 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_cpy_alloc
+#define UNIT uint16_t
+#include "u-cpy-alloc.h"
diff --git a/lib/unistr/u16-cpy.c b/lib/unistr/u16-cpy.c
new file mode 100644
index 0000000..13e04b8
--- /dev/null
+++ b/lib/unistr/u16-cpy.c
@@ -0,0 +1,25 @@
+/* Copy piece of UTF-16 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_cpy
+#define UNIT uint16_t
+#include "u-cpy.h"
diff --git a/lib/unistr/u16-endswith.c b/lib/unistr/u16-endswith.c
new file mode 100644
index 0000000..d9abf46
--- /dev/null
+++ b/lib/unistr/u16-endswith.c
@@ -0,0 +1,27 @@
+/* Substring test for UTF-16 strings.
+ Copyright (C) 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_endswith
+#define UNIT uint16_t
+#define U_STRLEN u16_strlen
+#define U_CMP u16_cmp
+#include "u-endswith.h"
diff --git a/lib/unistr/u16-mblen.c b/lib/unistr/u16-mblen.c
new file mode 100644
index 0000000..6bb35ac
--- /dev/null
+++ b/lib/unistr/u16-mblen.c
@@ -0,0 +1,49 @@
+/* Look at first character in UTF-16 string.
+ Copyright (C) 1999-2000, 2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u16_mblen (const uint16_t *s, size_t n)
+{
+ if (n > 0)
+ {
+ /* Keep in sync with unistr.h and utf16-ucs4.c. */
+ uint16_t c = *s;
+
+ if (c < 0xd800 || c >= 0xe000)
+ return (c != 0 ? 1 : 0);
+#if CONFIG_UNICODE_SAFETY
+ if (c < 0xdc00)
+ {
+ if (n >= 2
+ && s[1] >= 0xdc00 && s[1] < 0xe000)
+ return 2;
+ }
+#else
+ {
+ if (n >= 2)
+ return 2;
+ }
+#endif
+ }
+ /* invalid or incomplete multibyte character */
+ return -1;
+}
diff --git a/lib/unistr/u16-mbsnlen.c b/lib/unistr/u16-mbsnlen.c
new file mode 100644
index 0000000..881958e
--- /dev/null
+++ b/lib/unistr/u16-mbsnlen.c
@@ -0,0 +1,39 @@
+/* Count characters in UTF-16 string.
+ Copyright (C) 2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2007.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+size_t
+u16_mbsnlen (const uint16_t *s, size_t n)
+{
+ size_t characters;
+
+ characters = 0;
+ while (n > 0)
+ {
+ int count = u16_mblen (s, n);
+ if (count <= 0)
+ count = 1;
+ s += count;
+ n -= count;
+ characters++;
+ }
+ return characters;
+}
diff --git a/lib/unistr/u16-mbtouc-aux.c b/lib/unistr/u16-mbtouc-aux.c
new file mode 100644
index 0000000..5f35b86
--- /dev/null
+++ b/lib/unistr/u16-mbtouc-aux.c
@@ -0,0 +1,51 @@
+/* Conversion UTF-16 to UCS-4.
+ Copyright (C) 2001-2002, 2006-2007, 2009 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2001.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#if defined IN_LIBUNISTRING || HAVE_INLINE
+
+int
+u16_mbtouc_aux (ucs4_t *puc, const uint16_t *s, size_t n)
+{
+ uint16_t c = *s;
+
+ if (c < 0xdc00)
+ {
+ if (n >= 2)
+ {
+ if (s[1] >= 0xdc00 && s[1] < 0xe000)
+ {
+ *puc = 0x10000 + ((c - 0xd800) << 10) + (s[1] - 0xdc00);
+ return 2;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ }
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 1;
+}
+
+#endif
diff --git a/lib/unistr/u16-mbtouc-unsafe-aux.c b/lib/unistr/u16-mbtouc-unsafe-aux.c
new file mode 100644
index 0000000..9906e30
--- /dev/null
+++ b/lib/unistr/u16-mbtouc-unsafe-aux.c
@@ -0,0 +1,55 @@
+/* Conversion UTF-16 to UCS-4.
+ Copyright (C) 2001-2002, 2006-2007, 2009 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2001.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#if defined IN_LIBUNISTRING || HAVE_INLINE
+
+int
+u16_mbtouc_unsafe_aux (ucs4_t *puc, const uint16_t *s, size_t n)
+{
+ uint16_t c = *s;
+
+#if CONFIG_UNICODE_SAFETY
+ if (c < 0xdc00)
+#endif
+ {
+ if (n >= 2)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if (s[1] >= 0xdc00 && s[1] < 0xe000)
+#endif
+ {
+ *puc = 0x10000 + ((c - 0xd800) << 10) + (s[1] - 0xdc00);
+ return 2;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ }
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 1;
+}
+
+#endif
diff --git a/lib/unistr/u16-mbtouc-unsafe.c b/lib/unistr/u16-mbtouc-unsafe.c
new file mode 100644
index 0000000..cc858d8
--- /dev/null
+++ b/lib/unistr/u16-mbtouc-unsafe.c
@@ -0,0 +1,66 @@
+/* Look at first character in UTF-16 string.
+ Copyright (C) 1999-2002, 2006-2007, 2009 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2001.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#if defined IN_LIBUNISTRING
+/* Tell unistr.h to declare u16_mbtouc_unsafe as 'extern', not
+ 'static inline'. */
+# include "unistring-notinline.h"
+#endif
+
+/* Specification. */
+#include "unistr.h"
+
+#if !HAVE_INLINE
+
+int
+u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n)
+{
+ uint16_t c = *s;
+
+ if (c < 0xd800 || c >= 0xe000)
+ {
+ *puc = c;
+ return 1;
+ }
+#if CONFIG_UNICODE_SAFETY
+ if (c < 0xdc00)
+#endif
+ {
+ if (n >= 2)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if (s[1] >= 0xdc00 && s[1] < 0xe000)
+#endif
+ {
+ *puc = 0x10000 + ((c - 0xd800) << 10) + (s[1] - 0xdc00);
+ return 2;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ }
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 1;
+}
+
+#endif
diff --git a/lib/unistr/u16-mbtouc.c b/lib/unistr/u16-mbtouc.c
new file mode 100644
index 0000000..2691db8
--- /dev/null
+++ b/lib/unistr/u16-mbtouc.c
@@ -0,0 +1,61 @@
+/* Look at first character in UTF-16 string.
+ Copyright (C) 1999-2002, 2006-2007, 2009 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2001.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#if defined IN_LIBUNISTRING
+/* Tell unistr.h to declare u16_mbtouc as 'extern', not 'static inline'. */
+# include "unistring-notinline.h"
+#endif
+
+/* Specification. */
+#include "unistr.h"
+
+#if !HAVE_INLINE
+
+int
+u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n)
+{
+ uint16_t c = *s;
+
+ if (c < 0xd800 || c >= 0xe000)
+ {
+ *puc = c;
+ return 1;
+ }
+ if (c < 0xdc00)
+ {
+ if (n >= 2)
+ {
+ if (s[1] >= 0xdc00 && s[1] < 0xe000)
+ {
+ *puc = 0x10000 + ((c - 0xd800) << 10) + (s[1] - 0xdc00);
+ return 2;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ }
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 1;
+}
+
+#endif
diff --git a/lib/unistr/u16-mbtoucr.c b/lib/unistr/u16-mbtoucr.c
new file mode 100644
index 0000000..a1bd8ee
--- /dev/null
+++ b/lib/unistr/u16-mbtoucr.c
@@ -0,0 +1,54 @@
+/* Look at first character in UTF-16 string, returning an error code.
+ Copyright (C) 1999-2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2001.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u16_mbtoucr (ucs4_t *puc, const uint16_t *s, size_t n)
+{
+ uint16_t c = *s;
+
+ if (c < 0xd800 || c >= 0xe000)
+ {
+ *puc = c;
+ return 1;
+ }
+ if (c < 0xdc00)
+ {
+ if (n >= 2)
+ {
+ if (s[1] >= 0xdc00 && s[1] < 0xe000)
+ {
+ *puc = 0x10000 + ((c - 0xd800) << 10) + (s[1] - 0xdc00);
+ return 2;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return -2;
+ }
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return -1;
+}
diff --git a/lib/unistr/u16-move.c b/lib/unistr/u16-move.c
new file mode 100644
index 0000000..2bf8c61
--- /dev/null
+++ b/lib/unistr/u16-move.c
@@ -0,0 +1,25 @@
+/* Copy piece of UTF-16 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_move
+#define UNIT uint16_t
+#include "u-move.h"
diff --git a/lib/unistr/u16-next.c b/lib/unistr/u16-next.c
new file mode 100644
index 0000000..7c49f72
--- /dev/null
+++ b/lib/unistr/u16-next.c
@@ -0,0 +1,37 @@
+/* Iterate over next character in UTF-16 string.
+ Copyright (C) 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+const uint16_t *
+u16_next (ucs4_t *puc, const uint16_t *s)
+{
+ int count;
+
+ count = u16_strmbtouc (puc, s);
+ if (count > 0)
+ return s + count;
+ else
+ {
+ if (count < 0)
+ *puc = 0xfffd;
+ return NULL;
+ }
+}
diff --git a/lib/unistr/u16-prev.c b/lib/unistr/u16-prev.c
new file mode 100644
index 0000000..3beecf0
--- /dev/null
+++ b/lib/unistr/u16-prev.c
@@ -0,0 +1,53 @@
+/* Iterate over previous character in UTF-16 string.
+ Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+const uint16_t *
+u16_prev (ucs4_t *puc, const uint16_t *s, const uint16_t *start)
+{
+ /* Keep in sync with unistr.h and utf16-ucs4.c. */
+ if (s != start)
+ {
+ uint16_t c_1 = s[-1];
+
+ if (c_1 < 0xd800 || c_1 >= 0xe000)
+ {
+ *puc = c_1;
+ return s - 1;
+ }
+#if CONFIG_UNICODE_SAFETY
+ if (c_1 >= 0xdc00)
+#endif
+ if (s - 1 != start)
+ {
+ uint16_t c_2 = s[-2];
+
+#if CONFIG_UNICODE_SAFETY
+ if (c_2 >= 0xd800 && c_2 < 0xdc00)
+#endif
+ {
+ *puc = 0x10000 + ((c_2 - 0xd800) << 10) + (c_1 - 0xdc00);
+ return s - 2;
+ }
+ }
+ }
+ return NULL;
+}
diff --git a/lib/unistr/u16-set.c b/lib/unistr/u16-set.c
new file mode 100644
index 0000000..9ef307f
--- /dev/null
+++ b/lib/unistr/u16-set.c
@@ -0,0 +1,26 @@
+/* Fill UTF-16 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_set
+#define UNIT uint16_t
+#define IS_SINGLE_UNIT(uc) (uc < 0xd800 || (uc < 0x10000 && uc >= 0xe000))
+#include "u-set.h"
diff --git a/lib/unistr/u16-startswith.c b/lib/unistr/u16-startswith.c
new file mode 100644
index 0000000..2f39d74
--- /dev/null
+++ b/lib/unistr/u16-startswith.c
@@ -0,0 +1,25 @@
+/* Substring test for UTF-16 strings.
+ Copyright (C) 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_startswith
+#define UNIT uint16_t
+#include "u-startswith.h"
diff --git a/lib/unistr/u16-stpcpy.c b/lib/unistr/u16-stpcpy.c
new file mode 100644
index 0000000..9207edc
--- /dev/null
+++ b/lib/unistr/u16-stpcpy.c
@@ -0,0 +1,25 @@
+/* Copy UTF-16 string.
+ Copyright (C) 1999, 2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_stpcpy
+#define UNIT uint16_t
+#include "u-stpcpy.h"
diff --git a/lib/unistr/u16-stpncpy.c b/lib/unistr/u16-stpncpy.c
new file mode 100644
index 0000000..30ef7e3
--- /dev/null
+++ b/lib/unistr/u16-stpncpy.c
@@ -0,0 +1,25 @@
+/* Copy UTF-16 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_stpncpy
+#define UNIT uint16_t
+#include "u-stpncpy.h"
diff --git a/lib/unistr/u16-strcat.c b/lib/unistr/u16-strcat.c
new file mode 100644
index 0000000..bb88f1b
--- /dev/null
+++ b/lib/unistr/u16-strcat.c
@@ -0,0 +1,26 @@
+/* Concatenate UTF-16 strings.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_strcat
+#define UNIT uint16_t
+#define U_STRLEN u16_strlen
+#include "u-strcat.h"
diff --git a/lib/unistr/u16-strchr.c b/lib/unistr/u16-strchr.c
new file mode 100644
index 0000000..673152f
--- /dev/null
+++ b/lib/unistr/u16-strchr.c
@@ -0,0 +1,63 @@
+/* Search character in UTF-16 string.
+ Copyright (C) 1999, 2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+uint16_t *
+u16_strchr (const uint16_t *s, ucs4_t uc)
+{
+ uint16_t c[2];
+
+ if (uc < 0x10000)
+ {
+ uint16_t c0 = uc;
+
+ for (;; s++)
+ {
+ if (*s == c0)
+ break;
+ if (*s == 0)
+ goto notfound;
+ }
+ return (uint16_t *) s;
+ }
+ else
+ switch (u16_uctomb_aux (c, uc, 2))
+ {
+ case 2:
+ if (*s == 0)
+ goto notfound;
+ {
+ uint16_t c0 = c[0];
+ uint16_t c1 = c[1];
+
+ for (;; s++)
+ {
+ if (s[1] == 0)
+ goto notfound;
+ if (*s == c0 && s[1] == c1)
+ break;
+ }
+ return (uint16_t *) s;
+ }
+ }
+notfound:
+ return NULL;
+}
diff --git a/lib/unistr/u16-strcmp.c b/lib/unistr/u16-strcmp.c
new file mode 100644
index 0000000..c161c65
--- /dev/null
+++ b/lib/unistr/u16-strcmp.c
@@ -0,0 +1,50 @@
+/* Compare UTF-16 strings.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u16_strcmp (const uint16_t *s1, const uint16_t *s2)
+{
+ /* Note that the UTF-16 encoding does NOT preserve lexicographic order.
+ Namely, if uc1 is a 16-bit character and [uc2a,uc2b] is a surrogate pair,
+ we must enforce uc1 < [uc2a,uc2b], even if uc1 > uc2a. */
+ for (;;)
+ {
+ uint16_t c1 = *s1++;
+ uint16_t c2 = *s2++;
+ if (c1 != 0 && c1 == c2)
+ continue;
+ if (c1 < 0xd800 || c1 >= 0xe000)
+ {
+ if (!(c2 < 0xd800 || c2 >= 0xe000))
+ /* c2 is a surrogate, but c1 is not. */
+ return -1;
+ }
+ else
+ {
+ if (c2 < 0xd800 || c2 >= 0xe000)
+ /* c1 is a surrogate, but c2 is not. */
+ return 1;
+ }
+ return (int)c1 - (int)c2;
+ /* > 0 if c1 > c2, < 0 if c1 < c2. */
+ }
+}
diff --git a/lib/unistr/u16-strcoll.c b/lib/unistr/u16-strcoll.c
new file mode 100644
index 0000000..280ba83
--- /dev/null
+++ b/lib/unistr/u16-strcoll.c
@@ -0,0 +1,33 @@
+/* Compare UTF-16 strings using the collation rules of the current locale.
+ Copyright (C) 2009 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2009.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "uniconv.h"
+
+#define FUNC u16_strcoll
+#define UNIT uint16_t
+#define U_STRCMP u16_strcmp
+#define U_STRCONV_TO_LOCALE u16_strconv_to_locale
+#include "u-strcoll.h"
diff --git a/lib/unistr/u16-strcpy.c b/lib/unistr/u16-strcpy.c
new file mode 100644
index 0000000..92c3e7e
--- /dev/null
+++ b/lib/unistr/u16-strcpy.c
@@ -0,0 +1,25 @@
+/* Copy UTF-16 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_strcpy
+#define UNIT uint16_t
+#include "u-strcpy.h"
diff --git a/lib/unistr/u16-strcspn.c b/lib/unistr/u16-strcspn.c
new file mode 100644
index 0000000..2f5ba43
--- /dev/null
+++ b/lib/unistr/u16-strcspn.c
@@ -0,0 +1,28 @@
+/* Search for some characters in UTF-16 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_strcspn
+#define UNIT uint16_t
+#define U_STRLEN u16_strlen
+#define U_STRMBTOUC u16_strmbtouc
+#define U_STRCHR u16_strchr
+#include "u-strcspn.h"
diff --git a/lib/unistr/u16-strdup.c b/lib/unistr/u16-strdup.c
new file mode 100644
index 0000000..22242c2
--- /dev/null
+++ b/lib/unistr/u16-strdup.c
@@ -0,0 +1,26 @@
+/* Copy UTF-16 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_strdup
+#define UNIT uint16_t
+#define U_STRLEN u16_strlen
+#include "u-strdup.h"
diff --git a/lib/unistr/u16-strlen.c b/lib/unistr/u16-strlen.c
new file mode 100644
index 0000000..9c05541
--- /dev/null
+++ b/lib/unistr/u16-strlen.c
@@ -0,0 +1,25 @@
+/* Determine length of UTF-16 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_strlen
+#define UNIT uint16_t
+#include "u-strlen.h"
diff --git a/lib/unistr/u16-strmblen.c b/lib/unistr/u16-strmblen.c
new file mode 100644
index 0000000..e7f3625
--- /dev/null
+++ b/lib/unistr/u16-strmblen.c
@@ -0,0 +1,43 @@
+/* Look at first character in UTF-16 string.
+ Copyright (C) 1999-2000, 2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u16_strmblen (const uint16_t *s)
+{
+ /* Keep in sync with unistr.h and utf16-ucs4.c. */
+ uint16_t c = *s;
+
+ if (c < 0xd800 || c >= 0xe000)
+ return (c != 0 ? 1 : 0);
+#if CONFIG_UNICODE_SAFETY
+ if (c < 0xdc00)
+ {
+ if (s[1] >= 0xdc00 && s[1] < 0xe000)
+ return 2;
+ }
+#else
+ if (s[1] != 0)
+ return 2;
+#endif
+ /* invalid or incomplete multibyte character */
+ return -1;
+}
diff --git a/lib/unistr/u16-strmbtouc.c b/lib/unistr/u16-strmbtouc.c
new file mode 100644
index 0000000..78d482e
--- /dev/null
+++ b/lib/unistr/u16-strmbtouc.c
@@ -0,0 +1,50 @@
+/* Look at first character in UTF-16 string.
+ Copyright (C) 1999-2000, 2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u16_strmbtouc (ucs4_t *puc, const uint16_t *s)
+{
+ /* Keep in sync with unistr.h and utf16-ucs4.c. */
+ uint16_t c = *s;
+
+ if (c < 0xd800 || c >= 0xe000)
+ {
+ *puc = c;
+ return (c != 0 ? 1 : 0);
+ }
+#if CONFIG_UNICODE_SAFETY
+ if (c < 0xdc00)
+#endif
+ {
+#if CONFIG_UNICODE_SAFETY
+ if (s[1] >= 0xdc00 && s[1] < 0xe000)
+#else
+ if (s[1] != 0)
+#endif
+ {
+ *puc = 0x10000 + ((c - 0xd800) << 10) + (s[1] - 0xdc00);
+ return 2;
+ }
+ }
+ /* invalid or incomplete multibyte character */
+ return -1;
+}
diff --git a/lib/unistr/u16-strncat.c b/lib/unistr/u16-strncat.c
new file mode 100644
index 0000000..9e14388
--- /dev/null
+++ b/lib/unistr/u16-strncat.c
@@ -0,0 +1,26 @@
+/* Concatenate UTF-16 strings.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_strncat
+#define UNIT uint16_t
+#define U_STRLEN u16_strlen
+#include "u-strncat.h"
diff --git a/lib/unistr/u16-strncmp.c b/lib/unistr/u16-strncmp.c
new file mode 100644
index 0000000..7278b6f
--- /dev/null
+++ b/lib/unistr/u16-strncmp.c
@@ -0,0 +1,54 @@
+/* Compare UTF-16 strings.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u16_strncmp (const uint16_t *s1, const uint16_t *s2, size_t n)
+{
+ /* Note that the UTF-16 encoding does NOT preserve lexicographic order.
+ Namely, if uc1 is a 16-bit character and [uc2a,uc2b] is a surrogate pair,
+ we must enforce uc1 < [uc2a,uc2b], even if uc1 > uc2a. */
+ for (; n > 0;)
+ {
+ uint16_t c1 = *s1++;
+ uint16_t c2 = *s2++;
+ if (c1 != 0 && c1 == c2)
+ {
+ n--;
+ continue;
+ }
+ if (c1 < 0xd800 || c1 >= 0xe000)
+ {
+ if (!(c2 < 0xd800 || c2 >= 0xe000))
+ /* c2 is a surrogate, but c1 is not. */
+ return -1;
+ }
+ else
+ {
+ if (c2 < 0xd800 || c2 >= 0xe000)
+ /* c1 is a surrogate, but c2 is not. */
+ return 1;
+ }
+ return (int)c1 - (int)c2;
+ /* > 0 if c1 > c2, < 0 if c1 < c2, = 0 if c1 and c2 are both 0. */
+ }
+ return 0;
+}
diff --git a/lib/unistr/u16-strncpy.c b/lib/unistr/u16-strncpy.c
new file mode 100644
index 0000000..65676aa
--- /dev/null
+++ b/lib/unistr/u16-strncpy.c
@@ -0,0 +1,25 @@
+/* Copy UTF-16 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_strncpy
+#define UNIT uint16_t
+#include "u-strncpy.h"
diff --git a/lib/unistr/u16-strnlen.c b/lib/unistr/u16-strnlen.c
new file mode 100644
index 0000000..1cd80c4
--- /dev/null
+++ b/lib/unistr/u16-strnlen.c
@@ -0,0 +1,25 @@
+/* Determine bounded length of UTF-16 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_strnlen
+#define UNIT uint16_t
+#include "u-strnlen.h"
diff --git a/lib/unistr/u16-strpbrk.c b/lib/unistr/u16-strpbrk.c
new file mode 100644
index 0000000..6360581
--- /dev/null
+++ b/lib/unistr/u16-strpbrk.c
@@ -0,0 +1,27 @@
+/* Search for some characters in UTF-16 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_strpbrk
+#define UNIT uint16_t
+#define U_STRMBTOUC u16_strmbtouc
+#define U_STRCHR u16_strchr
+#include "u-strpbrk.h"
diff --git a/lib/unistr/u16-strrchr.c b/lib/unistr/u16-strrchr.c
new file mode 100644
index 0000000..b3c7ab0
--- /dev/null
+++ b/lib/unistr/u16-strrchr.c
@@ -0,0 +1,64 @@
+/* Search character in UTF-16 string.
+ Copyright (C) 1999, 2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+uint16_t *
+u16_strrchr (const uint16_t *s, ucs4_t uc)
+{
+ /* Calling u16_strlen and then searching from the other end would cause more
+ memory accesses. Avoid that, at the cost of a few more comparisons. */
+ uint16_t *result = NULL;
+ uint16_t c[2];
+
+ if (uc < 0x10000)
+ {
+ uint16_t c0 = uc;
+
+ for (;; s++)
+ {
+ if (*s == c0)
+ result = (uint16_t *) s;
+ if (*s == 0)
+ break;
+ }
+ }
+ else
+ switch (u16_uctomb_aux (c, uc, 2))
+ {
+ case 2:
+ if (*s)
+ {
+ uint16_t c0 = c[0];
+ uint16_t c1 = c[1];
+
+ /* FIXME: Maybe walking the string via u16_mblen is a win? */
+ for (;; s++)
+ {
+ if (s[1] == 0)
+ break;
+ if (*s == c0 && s[1] == c1)
+ result = (uint16_t *) s;
+ }
+ }
+ break;
+ }
+ return result;
+}
diff --git a/lib/unistr/u16-strspn.c b/lib/unistr/u16-strspn.c
new file mode 100644
index 0000000..04ba8e6
--- /dev/null
+++ b/lib/unistr/u16-strspn.c
@@ -0,0 +1,29 @@
+/* Search for some characters in UTF-16 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_strspn
+#define UNIT uint16_t
+#define U_STRLEN u16_strlen
+#define U_STRMBTOUC u16_strmbtouc
+#define U_CMP u16_cmp
+#define U_STRCHR u16_strchr
+#include "u-strspn.h"
diff --git a/lib/unistr/u16-strstr.c b/lib/unistr/u16-strstr.c
new file mode 100644
index 0000000..bc61233
--- /dev/null
+++ b/lib/unistr/u16-strstr.c
@@ -0,0 +1,28 @@
+/* Substring test for UTF-16 strings.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+/* FIXME: Maybe walking the string via u16_mblen is a win? */
+
+#define FUNC u16_strstr
+#define UNIT uint16_t
+#define U_STRCHR u16_strchr
+#include "u-strstr.h"
diff --git a/lib/unistr/u16-strtok.c b/lib/unistr/u16-strtok.c
new file mode 100644
index 0000000..dd9b247
--- /dev/null
+++ b/lib/unistr/u16-strtok.c
@@ -0,0 +1,27 @@
+/* Tokenize UTF-16 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_strtok
+#define UNIT uint16_t
+#define U_STRSPN u16_strspn
+#define U_STRPBRK u16_strpbrk
+#include "u-strtok.h"
diff --git a/lib/unistr/u16-to-u32.c b/lib/unistr/u16-to-u32.c
new file mode 100644
index 0000000..2f39dcd
--- /dev/null
+++ b/lib/unistr/u16-to-u32.c
@@ -0,0 +1,125 @@
+/* Convert UTF-16 string to UTF-32 string.
+ Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_to_u32
+#define SRC_UNIT uint16_t
+#define DST_UNIT uint32_t
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+DST_UNIT *
+FUNC (const SRC_UNIT *s, size_t n, DST_UNIT *resultbuf, size_t *lengthp)
+{
+ const SRC_UNIT *s_end = s + n;
+ /* Output string accumulator. */
+ DST_UNIT *result;
+ size_t allocated;
+ size_t length;
+
+ if (resultbuf != NULL)
+ {
+ result = resultbuf;
+ allocated = *lengthp;
+ }
+ else
+ {
+ result = NULL;
+ allocated = 0;
+ }
+ length = 0;
+ /* Invariants:
+ result is either == resultbuf or == NULL or malloc-allocated.
+ If length > 0, then result != NULL. */
+
+ while (s < s_end)
+ {
+ ucs4_t uc;
+ int count;
+
+ /* Fetch a Unicode character from the input string. */
+ count = u16_mbtouc (&uc, s, s_end - s);
+ if (count < 0)
+ {
+ if (!(result == resultbuf || result == NULL))
+ free (result);
+ errno = EILSEQ;
+ return NULL;
+ }
+ s += count;
+
+ /* Store it in the output string. */
+ if (length + 1 > allocated)
+ {
+ DST_UNIT *memory;
+
+ allocated = (allocated > 0 ? 2 * allocated : 12);
+ if (length + 1 > allocated)
+ allocated = length + 1;
+ if (result == resultbuf || result == NULL)
+ memory = (DST_UNIT *) malloc (allocated * sizeof (DST_UNIT));
+ else
+ memory =
+ (DST_UNIT *) realloc (result, allocated * sizeof (DST_UNIT));
+
+ if (memory == NULL)
+ {
+ if (!(result == resultbuf || result == NULL))
+ free (result);
+ errno = ENOMEM;
+ return NULL;
+ }
+ if (result == resultbuf && length > 0)
+ memcpy ((char *) memory, (char *) result,
+ length * sizeof (DST_UNIT));
+ result = memory;
+ }
+ result[length++] = uc;
+ }
+
+ if (length == 0)
+ {
+ if (result == NULL)
+ {
+ /* Return a non-NULL value. NULL means error. */
+ result = (DST_UNIT *) malloc (1);
+ if (result == NULL)
+ {
+ errno = ENOMEM;
+ return NULL;
+ }
+ }
+ }
+ else if (result != resultbuf && length < allocated)
+ {
+ /* Shrink the allocated memory if possible. */
+ DST_UNIT *memory;
+
+ memory = (DST_UNIT *) realloc (result, length * sizeof (DST_UNIT));
+ if (memory != NULL)
+ result = memory;
+ }
+
+ *lengthp = length;
+ return result;
+}
diff --git a/lib/unistr/u16-to-u8.c b/lib/unistr/u16-to-u8.c
new file mode 100644
index 0000000..38b27ec
--- /dev/null
+++ b/lib/unistr/u16-to-u8.c
@@ -0,0 +1,136 @@
+/* Convert UTF-16 string to UTF-8 string.
+ Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_to_u8
+#define SRC_UNIT uint16_t
+#define DST_UNIT uint8_t
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+DST_UNIT *
+FUNC (const SRC_UNIT *s, size_t n, DST_UNIT *resultbuf, size_t *lengthp)
+{
+ const SRC_UNIT *s_end = s + n;
+ /* Output string accumulator. */
+ DST_UNIT *result;
+ size_t allocated;
+ size_t length;
+
+ if (resultbuf != NULL)
+ {
+ result = resultbuf;
+ allocated = *lengthp;
+ }
+ else
+ {
+ result = NULL;
+ allocated = 0;
+ }
+ length = 0;
+ /* Invariants:
+ result is either == resultbuf or == NULL or malloc-allocated.
+ If length > 0, then result != NULL. */
+
+ while (s < s_end)
+ {
+ ucs4_t uc;
+ int count;
+
+ /* Fetch a Unicode character from the input string. */
+ count = u16_mbtouc (&uc, s, s_end - s);
+ if (count < 0)
+ {
+ if (!(result == resultbuf || result == NULL))
+ free (result);
+ errno = EILSEQ;
+ return NULL;
+ }
+ s += count;
+
+ /* Store it in the output string. */
+ count = u8_uctomb (result + length, uc, allocated - length);
+ if (count == -1)
+ {
+ if (!(result == resultbuf || result == NULL))
+ free (result);
+ errno = EILSEQ;
+ return NULL;
+ }
+ if (count == -2)
+ {
+ DST_UNIT *memory;
+
+ allocated = (allocated > 0 ? 2 * allocated : 12);
+ if (length + 6 > allocated)
+ allocated = length + 6;
+ if (result == resultbuf || result == NULL)
+ memory = (DST_UNIT *) malloc (allocated * sizeof (DST_UNIT));
+ else
+ memory =
+ (DST_UNIT *) realloc (result, allocated * sizeof (DST_UNIT));
+
+ if (memory == NULL)
+ {
+ if (!(result == resultbuf || result == NULL))
+ free (result);
+ errno = ENOMEM;
+ return NULL;
+ }
+ if (result == resultbuf && length > 0)
+ memcpy ((char *) memory, (char *) result,
+ length * sizeof (DST_UNIT));
+ result = memory;
+ count = u8_uctomb (result + length, uc, allocated - length);
+ if (count < 0)
+ abort ();
+ }
+ length += count;
+ }
+
+ if (length == 0)
+ {
+ if (result == NULL)
+ {
+ /* Return a non-NULL value. NULL means error. */
+ result = (DST_UNIT *) malloc (1);
+ if (result == NULL)
+ {
+ errno = ENOMEM;
+ return NULL;
+ }
+ }
+ }
+ else if (result != resultbuf && length < allocated)
+ {
+ /* Shrink the allocated memory if possible. */
+ DST_UNIT *memory;
+
+ memory = (DST_UNIT *) realloc (result, length * sizeof (DST_UNIT));
+ if (memory != NULL)
+ result = memory;
+ }
+
+ *lengthp = length;
+ return result;
+}
diff --git a/lib/unistr/u16-uctomb-aux.c b/lib/unistr/u16-uctomb-aux.c
new file mode 100644
index 0000000..384452b
--- /dev/null
+++ b/lib/unistr/u16-uctomb-aux.c
@@ -0,0 +1,58 @@
+/* Conversion UCS-4 to UTF-16.
+ Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u16_uctomb_aux (uint16_t *s, ucs4_t uc, int n)
+{
+ if (uc < 0xd800)
+ {
+ /* The case n >= 1 is already handled by the caller. */
+ }
+ else if (uc < 0x10000)
+ {
+ if (uc >= 0xe000)
+ {
+ if (n >= 1)
+ {
+ s[0] = uc;
+ return 1;
+ }
+ }
+ else
+ return -1;
+ }
+ else
+ {
+ if (uc < 0x110000)
+ {
+ if (n >= 2)
+ {
+ s[0] = 0xd800 + ((uc - 0x10000) >> 10);
+ s[1] = 0xdc00 + ((uc - 0x10000) & 0x3ff);
+ return 2;
+ }
+ }
+ else
+ return -1;
+ }
+ return -2;
+}
diff --git a/lib/unistr/u16-uctomb.c b/lib/unistr/u16-uctomb.c
new file mode 100644
index 0000000..6ac5ada
--- /dev/null
+++ b/lib/unistr/u16-uctomb.c
@@ -0,0 +1,72 @@
+/* Store a character in UTF-16 string.
+ Copyright (C) 2002, 2005-2006, 2009 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#if defined IN_LIBUNISTRING
+/* Tell unistr.h to declare u16_uctomb as 'extern', not 'static inline'. */
+# include "unistring-notinline.h"
+#endif
+
+/* Specification. */
+#include "unistr.h"
+
+#if !HAVE_INLINE
+
+int
+u16_uctomb (uint16_t *s, ucs4_t uc, int n)
+{
+ if (uc < 0xd800)
+ {
+ if (n > 0)
+ {
+ s[0] = uc;
+ return 1;
+ }
+ /* else return -2, below. */
+ }
+ else if (uc < 0x10000)
+ {
+ if (uc >= 0xe000)
+ {
+ if (n >= 1)
+ {
+ s[0] = uc;
+ return 1;
+ }
+ }
+ else
+ return -1;
+ }
+ else
+ {
+ if (uc < 0x110000)
+ {
+ if (n >= 2)
+ {
+ s[0] = 0xd800 + ((uc - 0x10000) >> 10);
+ s[1] = 0xdc00 + ((uc - 0x10000) & 0x3ff);
+ return 2;
+ }
+ }
+ else
+ return -1;
+ }
+ return -2;
+}
+
+#endif
diff --git a/lib/unistr/u32-check.c b/lib/unistr/u32-check.c
new file mode 100644
index 0000000..87e9127
--- /dev/null
+++ b/lib/unistr/u32-check.c
@@ -0,0 +1,39 @@
+/* Check UTF-32 string.
+ Copyright (C) 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+const uint32_t *
+u32_check (const uint32_t *s, size_t n)
+{
+ const uint32_t *s_end = s + n;
+
+ while (s < s_end)
+ {
+ uint32_t c = *s;
+
+ if (c < 0xd800 || (c >= 0xe000 && c < 0x110000))
+ s++;
+ else
+ /* invalid Unicode character */
+ return s;
+ }
+ return NULL;
+}
diff --git a/lib/unistr/u32-chr.c b/lib/unistr/u32-chr.c
new file mode 100644
index 0000000..5a594b2
--- /dev/null
+++ b/lib/unistr/u32-chr.c
@@ -0,0 +1,32 @@
+/* Search character in piece of UTF-32 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+uint32_t *
+u32_chr (const uint32_t *s, size_t n, ucs4_t uc)
+{
+ for (; n > 0; s++, n--)
+ {
+ if (*s == uc)
+ return (uint32_t *) s;
+ }
+ return NULL;
+}
diff --git a/lib/unistr/u32-cmp.c b/lib/unistr/u32-cmp.c
new file mode 100644
index 0000000..a273b10
--- /dev/null
+++ b/lib/unistr/u32-cmp.c
@@ -0,0 +1,40 @@
+/* Compare pieces of UTF-32 strings.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u32_cmp (const uint32_t *s1, const uint32_t *s2, size_t n)
+{
+ for (; n > 0;)
+ {
+ uint32_t uc1 = *s1++;
+ uint32_t uc2 = *s2++;
+ if (uc1 == uc2)
+ {
+ n--;
+ continue;
+ }
+ /* Note that uc1 and uc2 each have at most 31 bits. */
+ return (int)uc1 - (int)uc2;
+ /* > 0 if uc1 > uc2, < 0 if uc1 < uc2. */
+ }
+ return 0;
+}
diff --git a/lib/unistr/u32-cmp2.c b/lib/unistr/u32-cmp2.c
new file mode 100644
index 0000000..6924c48
--- /dev/null
+++ b/lib/unistr/u32-cmp2.c
@@ -0,0 +1,28 @@
+/* Compare pieces of UTF-32 strings.
+ Copyright (C) 2009 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2009.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include "minmax.h"
+
+#define FUNC u32_cmp2
+#define UNIT uint32_t
+#define U_CMP u32_cmp
+#include "u-cmp2.h"
diff --git a/lib/unistr/u32-cpy-alloc.c b/lib/unistr/u32-cpy-alloc.c
new file mode 100644
index 0000000..f9c6960
--- /dev/null
+++ b/lib/unistr/u32-cpy-alloc.c
@@ -0,0 +1,25 @@
+/* Copy piece of UTF-32 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_cpy_alloc
+#define UNIT uint32_t
+#include "u-cpy-alloc.h"
diff --git a/lib/unistr/u32-cpy.c b/lib/unistr/u32-cpy.c
new file mode 100644
index 0000000..2493b65
--- /dev/null
+++ b/lib/unistr/u32-cpy.c
@@ -0,0 +1,25 @@
+/* Copy piece of UTF-32 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_cpy
+#define UNIT uint32_t
+#include "u-cpy.h"
diff --git a/lib/unistr/u32-endswith.c b/lib/unistr/u32-endswith.c
new file mode 100644
index 0000000..d77fa8c
--- /dev/null
+++ b/lib/unistr/u32-endswith.c
@@ -0,0 +1,27 @@
+/* Substring test for UTF-32 strings.
+ Copyright (C) 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_endswith
+#define UNIT uint32_t
+#define U_STRLEN u32_strlen
+#define U_CMP u32_cmp
+#include "u-endswith.h"
diff --git a/lib/unistr/u32-mblen.c b/lib/unistr/u32-mblen.c
new file mode 100644
index 0000000..a48c8b2
--- /dev/null
+++ b/lib/unistr/u32-mblen.c
@@ -0,0 +1,37 @@
+/* Look at first character in UTF-32 string.
+ Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u32_mblen (const uint32_t *s, size_t n)
+{
+ if (n > 0)
+ {
+ uint32_t c = *s;
+
+#if CONFIG_UNICODE_SAFETY
+ if (c < 0xd800 || (c >= 0xe000 && c < 0x110000))
+#endif
+ return (c != 0 ? 1 : 0);
+ }
+ /* invalid or incomplete multibyte character */
+ return -1;
+}
diff --git a/lib/unistr/u32-mbsnlen.c b/lib/unistr/u32-mbsnlen.c
new file mode 100644
index 0000000..b7a5d18
--- /dev/null
+++ b/lib/unistr/u32-mbsnlen.c
@@ -0,0 +1,27 @@
+/* Count characters in UTF-32 string.
+ Copyright (C) 2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2007.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+size_t
+u32_mbsnlen (const uint32_t *s, size_t n)
+{
+ return n;
+}
diff --git a/lib/unistr/u32-mbtouc-unsafe.c b/lib/unistr/u32-mbtouc-unsafe.c
new file mode 100644
index 0000000..4bd9e81
--- /dev/null
+++ b/lib/unistr/u32-mbtouc-unsafe.c
@@ -0,0 +1,48 @@
+/* Look at first character in UTF-32 string.
+ Copyright (C) 2002, 2006-2007, 2009 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#if defined IN_LIBUNISTRING
+/* Tell unistr.h to declare u32_mbtouc_unsafe as 'extern', not
+ 'static inline'. */
+# include "unistring-notinline.h"
+#endif
+
+/* Specification. */
+#include "unistr.h"
+
+#if !HAVE_INLINE
+
+int
+u32_mbtouc_unsafe (ucs4_t *puc, const uint32_t *s, size_t n)
+{
+ uint32_t c = *s;
+
+#if CONFIG_UNICODE_SAFETY
+ if (c < 0xd800 || (c >= 0xe000 && c < 0x110000))
+#endif
+ *puc = c;
+#if CONFIG_UNICODE_SAFETY
+ else
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+#endif
+ return 1;
+}
+
+#endif
diff --git a/lib/unistr/u32-mbtouc.c b/lib/unistr/u32-mbtouc.c
new file mode 100644
index 0000000..4eeef58
--- /dev/null
+++ b/lib/unistr/u32-mbtouc.c
@@ -0,0 +1,43 @@
+/* Look at first character in UTF-32 string.
+ Copyright (C) 2002, 2006-2007, 2009 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#if defined IN_LIBUNISTRING
+/* Tell unistr.h to declare u32_mbtouc as 'extern', not 'static inline'. */
+# include "unistring-notinline.h"
+#endif
+
+/* Specification. */
+#include "unistr.h"
+
+#if !HAVE_INLINE
+
+int
+u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n)
+{
+ uint32_t c = *s;
+
+ if (c < 0xd800 || (c >= 0xe000 && c < 0x110000))
+ *puc = c;
+ else
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 1;
+}
+
+#endif
diff --git a/lib/unistr/u32-mbtoucr.c b/lib/unistr/u32-mbtoucr.c
new file mode 100644
index 0000000..7d7993c
--- /dev/null
+++ b/lib/unistr/u32-mbtoucr.c
@@ -0,0 +1,39 @@
+/* Look at first character in UTF-32 string, returning an error code.
+ Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u32_mbtoucr (ucs4_t *puc, const uint32_t *s, size_t n)
+{
+ uint32_t c = *s;
+
+ if (c < 0xd800 || (c >= 0xe000 && c < 0x110000))
+ {
+ *puc = c;
+ return 1;
+ }
+ else
+ {
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return -1;
+ }
+}
diff --git a/lib/unistr/u32-move.c b/lib/unistr/u32-move.c
new file mode 100644
index 0000000..d6c08f4
--- /dev/null
+++ b/lib/unistr/u32-move.c
@@ -0,0 +1,25 @@
+/* Copy piece of UTF-32 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_move
+#define UNIT uint32_t
+#include "u-move.h"
diff --git a/lib/unistr/u32-next.c b/lib/unistr/u32-next.c
new file mode 100644
index 0000000..c529295
--- /dev/null
+++ b/lib/unistr/u32-next.c
@@ -0,0 +1,39 @@
+/* Iterate over next character in UTF-32 string.
+ Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+const uint32_t *
+u32_next (ucs4_t *puc, const uint32_t *s)
+{
+ int count;
+
+ count = u32_strmbtouc (puc, s);
+ if (count > 0)
+ return s + count;
+ else
+ {
+#if CONFIG_UNICODE_SAFETY
+ if (count < 0)
+ *puc = 0xfffd;
+#endif
+ return NULL;
+ }
+}
diff --git a/lib/unistr/u32-prev.c b/lib/unistr/u32-prev.c
new file mode 100644
index 0000000..bc20c93
--- /dev/null
+++ b/lib/unistr/u32-prev.c
@@ -0,0 +1,39 @@
+/* Iterate over previous character in UTF-32 string.
+ Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+const uint32_t *
+u32_prev (ucs4_t *puc, const uint32_t *s, const uint32_t *start)
+{
+ if (s != start)
+ {
+ uint32_t c_1 = s[-1];
+
+#if CONFIG_UNICODE_SAFETY
+ if (c_1 < 0xd800 || (c_1 >= 0xe000 && c_1 < 0x110000))
+#endif
+ {
+ *puc = c_1;
+ return s - 1;
+ }
+ }
+ return NULL;
+}
diff --git a/lib/unistr/u32-set.c b/lib/unistr/u32-set.c
new file mode 100644
index 0000000..de10e09
--- /dev/null
+++ b/lib/unistr/u32-set.c
@@ -0,0 +1,26 @@
+/* Fill UTF-32 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_set
+#define UNIT uint32_t
+#define IS_SINGLE_UNIT(uc) (uc < 0xd800 || (uc >= 0xe000 && uc < 0x110000))
+#include "u-set.h"
diff --git a/lib/unistr/u32-startswith.c b/lib/unistr/u32-startswith.c
new file mode 100644
index 0000000..0f9a185
--- /dev/null
+++ b/lib/unistr/u32-startswith.c
@@ -0,0 +1,25 @@
+/* Substring test for UTF-32 strings.
+ Copyright (C) 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_startswith
+#define UNIT uint32_t
+#include "u-startswith.h"
diff --git a/lib/unistr/u32-stpcpy.c b/lib/unistr/u32-stpcpy.c
new file mode 100644
index 0000000..5df8233
--- /dev/null
+++ b/lib/unistr/u32-stpcpy.c
@@ -0,0 +1,25 @@
+/* Copy UTF-32 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_stpcpy
+#define UNIT uint32_t
+#include "u-stpcpy.h"
diff --git a/lib/unistr/u32-stpncpy.c b/lib/unistr/u32-stpncpy.c
new file mode 100644
index 0000000..19a0894
--- /dev/null
+++ b/lib/unistr/u32-stpncpy.c
@@ -0,0 +1,25 @@
+/* Copy UTF-32 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_stpncpy
+#define UNIT uint32_t
+#include "u-stpncpy.h"
diff --git a/lib/unistr/u32-strcat.c b/lib/unistr/u32-strcat.c
new file mode 100644
index 0000000..cd4737f
--- /dev/null
+++ b/lib/unistr/u32-strcat.c
@@ -0,0 +1,26 @@
+/* Concatenate UTF-32 strings.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_strcat
+#define UNIT uint32_t
+#define U_STRLEN u32_strlen
+#include "u-strcat.h"
diff --git a/lib/unistr/u32-strchr.c b/lib/unistr/u32-strchr.c
new file mode 100644
index 0000000..57a3a3c
--- /dev/null
+++ b/lib/unistr/u32-strchr.c
@@ -0,0 +1,36 @@
+/* Search character in UTF-32 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+uint32_t *
+u32_strchr (const uint32_t *s, ucs4_t uc)
+{
+ for (;; s++)
+ {
+ if (*s == uc)
+ break;
+ if (*s == 0)
+ goto notfound;
+ }
+ return (uint32_t *) s;
+notfound:
+ return NULL;
+}
diff --git a/lib/unistr/u32-strcmp.c b/lib/unistr/u32-strcmp.c
new file mode 100644
index 0000000..b82d5ed
--- /dev/null
+++ b/lib/unistr/u32-strcmp.c
@@ -0,0 +1,36 @@
+/* Compare UTF-32 strings.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u32_strcmp (const uint32_t *s1, const uint32_t *s2)
+{
+ for (;;)
+ {
+ uint32_t uc1 = *s1++;
+ uint32_t uc2 = *s2++;
+ if (uc1 != 0 && uc1 == uc2)
+ continue;
+ /* Note that uc1 and uc2 each have at most 31 bits. */
+ return (int)uc1 - (int)uc2;
+ /* > 0 if uc1 > uc2, < 0 if uc1 < uc2. */
+ }
+}
diff --git a/lib/unistr/u32-strcoll.c b/lib/unistr/u32-strcoll.c
new file mode 100644
index 0000000..d5e1482
--- /dev/null
+++ b/lib/unistr/u32-strcoll.c
@@ -0,0 +1,33 @@
+/* Compare UTF-32 strings using the collation rules of the current locale.
+ Copyright (C) 2009 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2009.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "uniconv.h"
+
+#define FUNC u32_strcoll
+#define UNIT uint32_t
+#define U_STRCMP u32_strcmp
+#define U_STRCONV_TO_LOCALE u32_strconv_to_locale
+#include "u-strcoll.h"
diff --git a/lib/unistr/u32-strcpy.c b/lib/unistr/u32-strcpy.c
new file mode 100644
index 0000000..e500100
--- /dev/null
+++ b/lib/unistr/u32-strcpy.c
@@ -0,0 +1,25 @@
+/* Copy UTF-32 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_strcpy
+#define UNIT uint32_t
+#include "u-strcpy.h"
diff --git a/lib/unistr/u32-strcspn.c b/lib/unistr/u32-strcspn.c
new file mode 100644
index 0000000..f93ab2e
--- /dev/null
+++ b/lib/unistr/u32-strcspn.c
@@ -0,0 +1,51 @@
+/* Search for some characters in UTF-32 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_strcspn
+#define UNIT uint32_t
+#define U_STRLEN u32_strlen
+#define U_STRCHR u32_strchr
+
+size_t
+FUNC (const UNIT *str, const UNIT *reject)
+{
+ /* Optimize two cases. */
+ if (reject[0] == 0)
+ return U_STRLEN (str);
+ if (reject[1] == 0)
+ {
+ ucs4_t uc = reject[0];
+ const UNIT *ptr = str;
+ for (; *ptr != 0; ptr++)
+ if (*ptr == uc)
+ break;
+ return ptr - str;
+ }
+ /* General case. */
+ {
+ const UNIT *ptr = str;
+ for (; *ptr != 0; ptr++)
+ if (U_STRCHR (reject, *ptr))
+ break;
+ return ptr - str;
+ }
+}
diff --git a/lib/unistr/u32-strdup.c b/lib/unistr/u32-strdup.c
new file mode 100644
index 0000000..66e9393
--- /dev/null
+++ b/lib/unistr/u32-strdup.c
@@ -0,0 +1,26 @@
+/* Copy UTF-32 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_strdup
+#define UNIT uint32_t
+#define U_STRLEN u32_strlen
+#include "u-strdup.h"
diff --git a/lib/unistr/u32-strlen.c b/lib/unistr/u32-strlen.c
new file mode 100644
index 0000000..eb64cc2
--- /dev/null
+++ b/lib/unistr/u32-strlen.c
@@ -0,0 +1,25 @@
+/* Determine length of UTF-32 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_strlen
+#define UNIT uint32_t
+#include "u-strlen.h"
diff --git a/lib/unistr/u32-strmblen.c b/lib/unistr/u32-strmblen.c
new file mode 100644
index 0000000..4dde3ff
--- /dev/null
+++ b/lib/unistr/u32-strmblen.c
@@ -0,0 +1,36 @@
+/* Look at first character in UTF-32 string.
+ Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u32_strmblen (const uint32_t *s)
+{
+ uint32_t c = *s;
+
+#if CONFIG_UNICODE_SAFETY
+ if (c < 0xd800 || (c >= 0xe000 && c < 0x110000))
+#endif
+ return (c != 0 ? 1 : 0);
+#if CONFIG_UNICODE_SAFETY
+ /* invalid multibyte character */
+ return -1;
+#endif
+}
diff --git a/lib/unistr/u32-strmbtouc.c b/lib/unistr/u32-strmbtouc.c
new file mode 100644
index 0000000..e94ac30
--- /dev/null
+++ b/lib/unistr/u32-strmbtouc.c
@@ -0,0 +1,39 @@
+/* Look at first character in UTF-32 string.
+ Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u32_strmbtouc (ucs4_t *puc, const uint32_t *s)
+{
+ uint32_t c = *s;
+
+#if CONFIG_UNICODE_SAFETY
+ if (c < 0xd800 || (c >= 0xe000 && c < 0x110000))
+#endif
+ {
+ *puc = c;
+ return (c != 0 ? 1 : 0);
+ }
+#if CONFIG_UNICODE_SAFETY
+ /* invalid multibyte character */
+ return -1;
+#endif
+}
diff --git a/lib/unistr/u32-strncat.c b/lib/unistr/u32-strncat.c
new file mode 100644
index 0000000..86f895b
--- /dev/null
+++ b/lib/unistr/u32-strncat.c
@@ -0,0 +1,26 @@
+/* Concatenate UTF-32 strings.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_strncat
+#define UNIT uint32_t
+#define U_STRLEN u32_strlen
+#include "u-strncat.h"
diff --git a/lib/unistr/u32-strncmp.c b/lib/unistr/u32-strncmp.c
new file mode 100644
index 0000000..026ce50
--- /dev/null
+++ b/lib/unistr/u32-strncmp.c
@@ -0,0 +1,40 @@
+/* Compare UTF-32 strings.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u32_strncmp (const uint32_t *s1, const uint32_t *s2, size_t n)
+{
+ for (; n > 0;)
+ {
+ uint32_t uc1 = *s1++;
+ uint32_t uc2 = *s2++;
+ if (uc1 != 0 && uc1 == uc2)
+ {
+ n--;
+ continue;
+ }
+ /* Note that uc1 and uc2 each have at most 31 bits. */
+ return (int)uc1 - (int)uc2;
+ /* > 0 if uc1 > uc2, < 0 if uc1 < uc2, = 0 if uc1 and uc2 are both 0. */
+ }
+ return 0;
+}
diff --git a/lib/unistr/u32-strncpy.c b/lib/unistr/u32-strncpy.c
new file mode 100644
index 0000000..c8f409b
--- /dev/null
+++ b/lib/unistr/u32-strncpy.c
@@ -0,0 +1,25 @@
+/* Copy UTF-32 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_strncpy
+#define UNIT uint32_t
+#include "u-strncpy.h"
diff --git a/lib/unistr/u32-strnlen.c b/lib/unistr/u32-strnlen.c
new file mode 100644
index 0000000..2ead8f2
--- /dev/null
+++ b/lib/unistr/u32-strnlen.c
@@ -0,0 +1,25 @@
+/* Determine bounded length of UTF-32 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_strnlen
+#define UNIT uint32_t
+#include "u-strnlen.h"
diff --git a/lib/unistr/u32-strpbrk.c b/lib/unistr/u32-strpbrk.c
new file mode 100644
index 0000000..9579a1b
--- /dev/null
+++ b/lib/unistr/u32-strpbrk.c
@@ -0,0 +1,50 @@
+/* Search for some characters in UTF-32 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_strpbrk
+#define UNIT uint32_t
+#define U_STRCHR u32_strchr
+
+UNIT *
+FUNC (const UNIT *str, const UNIT *accept)
+{
+ /* Optimize two cases. */
+ if (accept[0] == 0)
+ return NULL;
+ if (accept[1] == 0)
+ {
+ ucs4_t uc = accept[0];
+ const UNIT *ptr = str;
+ for (; *ptr != 0; ptr++)
+ if (*ptr == uc)
+ return (UNIT *) ptr;
+ return NULL;
+ }
+ /* General case. */
+ {
+ const UNIT *ptr = str;
+ for (; *ptr != 0; ptr++)
+ if (U_STRCHR (accept, *ptr))
+ return (UNIT *) ptr;
+ return NULL;
+ }
+}
diff --git a/lib/unistr/u32-strrchr.c b/lib/unistr/u32-strrchr.c
new file mode 100644
index 0000000..b4641f3
--- /dev/null
+++ b/lib/unistr/u32-strrchr.c
@@ -0,0 +1,38 @@
+/* Search character in UTF-32 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+uint32_t *
+u32_strrchr (const uint32_t *s, ucs4_t uc)
+{
+ /* Calling u32_strlen and then searching from the other end would cause more
+ memory accesses. Avoid that, at the cost of a few more comparisons. */
+ uint32_t *result = NULL;
+
+ for (;; s++)
+ {
+ if (*s == uc)
+ result = (uint32_t *) s;
+ if (*s == 0)
+ break;
+ }
+ return result;
+}
diff --git a/lib/unistr/u32-strspn.c b/lib/unistr/u32-strspn.c
new file mode 100644
index 0000000..6f80202
--- /dev/null
+++ b/lib/unistr/u32-strspn.c
@@ -0,0 +1,50 @@
+/* Search for some characters in UTF-32 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_strspn
+#define UNIT uint32_t
+#define U_STRCHR u32_strchr
+
+size_t
+FUNC (const UNIT *str, const UNIT *accept)
+{
+ /* Optimize two cases. */
+ if (accept[0] == 0)
+ return 0;
+ if (accept[1] == 0)
+ {
+ ucs4_t uc = accept[0];
+ const UNIT *ptr = str;
+ for (; *ptr != 0; ptr++)
+ if (*ptr != uc)
+ break;
+ return ptr - str;
+ }
+ /* General case. */
+ {
+ const UNIT *ptr = str;
+ for (; *ptr != 0; ptr++)
+ if (!U_STRCHR (accept, *ptr))
+ break;
+ return ptr - str;
+ }
+}
diff --git a/lib/unistr/u32-strstr.c b/lib/unistr/u32-strstr.c
new file mode 100644
index 0000000..285746d
--- /dev/null
+++ b/lib/unistr/u32-strstr.c
@@ -0,0 +1,26 @@
+/* Substring test for UTF-32 strings.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_strstr
+#define UNIT uint32_t
+#define U_STRCHR u32_strchr
+#include "u-strstr.h"
diff --git a/lib/unistr/u32-strtok.c b/lib/unistr/u32-strtok.c
new file mode 100644
index 0000000..3f21919
--- /dev/null
+++ b/lib/unistr/u32-strtok.c
@@ -0,0 +1,27 @@
+/* Tokenize UTF-32 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_strtok
+#define UNIT uint32_t
+#define U_STRSPN u32_strspn
+#define U_STRPBRK u32_strpbrk
+#include "u-strtok.h"
diff --git a/lib/unistr/u32-to-u16.c b/lib/unistr/u32-to-u16.c
new file mode 100644
index 0000000..3a32162
--- /dev/null
+++ b/lib/unistr/u32-to-u16.c
@@ -0,0 +1,130 @@
+/* Convert UTF-32 string to UTF-16 string.
+ Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_to_u16
+#define SRC_UNIT uint32_t
+#define DST_UNIT uint16_t
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+DST_UNIT *
+FUNC (const SRC_UNIT *s, size_t n, DST_UNIT *resultbuf, size_t *lengthp)
+{
+ const SRC_UNIT *s_end = s + n;
+ /* Output string accumulator. */
+ DST_UNIT *result;
+ size_t allocated;
+ size_t length;
+
+ if (resultbuf != NULL)
+ {
+ result = resultbuf;
+ allocated = *lengthp;
+ }
+ else
+ {
+ result = NULL;
+ allocated = 0;
+ }
+ length = 0;
+ /* Invariants:
+ result is either == resultbuf or == NULL or malloc-allocated.
+ If length > 0, then result != NULL. */
+
+ while (s < s_end)
+ {
+ ucs4_t uc;
+ int count;
+
+ /* Fetch a Unicode character from the input string. */
+ uc = *s++;
+ /* No need to call the safe variant u32_mbtouc, because
+ u16_uctomb will verify uc anyway. */
+
+ /* Store it in the output string. */
+ count = u16_uctomb (result + length, uc, allocated - length);
+ if (count == -1)
+ {
+ if (!(result == resultbuf || result == NULL))
+ free (result);
+ errno = EILSEQ;
+ return NULL;
+ }
+ if (count == -2)
+ {
+ DST_UNIT *memory;
+
+ allocated = (allocated > 0 ? 2 * allocated : 12);
+ if (length + 2 > allocated)
+ allocated = length + 2;
+ if (result == resultbuf || result == NULL)
+ memory = (DST_UNIT *) malloc (allocated * sizeof (DST_UNIT));
+ else
+ memory =
+ (DST_UNIT *) realloc (result, allocated * sizeof (DST_UNIT));
+
+ if (memory == NULL)
+ {
+ if (!(result == resultbuf || result == NULL))
+ free (result);
+ errno = ENOMEM;
+ return NULL;
+ }
+ if (result == resultbuf && length > 0)
+ memcpy ((char *) memory, (char *) result,
+ length * sizeof (DST_UNIT));
+ result = memory;
+ count = u16_uctomb (result + length, uc, allocated - length);
+ if (count < 0)
+ abort ();
+ }
+ length += count;
+ }
+
+ if (length == 0)
+ {
+ if (result == NULL)
+ {
+ /* Return a non-NULL value. NULL means error. */
+ result = (DST_UNIT *) malloc (1);
+ if (result == NULL)
+ {
+ errno = ENOMEM;
+ return NULL;
+ }
+ }
+ }
+ else if (result != resultbuf && length < allocated)
+ {
+ /* Shrink the allocated memory if possible. */
+ DST_UNIT *memory;
+
+ memory = (DST_UNIT *) realloc (result, length * sizeof (DST_UNIT));
+ if (memory != NULL)
+ result = memory;
+ }
+
+ *lengthp = length;
+ return result;
+}
diff --git a/lib/unistr/u32-to-u8.c b/lib/unistr/u32-to-u8.c
new file mode 100644
index 0000000..4dce896
--- /dev/null
+++ b/lib/unistr/u32-to-u8.c
@@ -0,0 +1,130 @@
+/* Convert UTF-32 string to UTF-8 string.
+ Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_to_u8
+#define SRC_UNIT uint32_t
+#define DST_UNIT uint8_t
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+DST_UNIT *
+FUNC (const SRC_UNIT *s, size_t n, DST_UNIT *resultbuf, size_t *lengthp)
+{
+ const SRC_UNIT *s_end = s + n;
+ /* Output string accumulator. */
+ DST_UNIT *result;
+ size_t allocated;
+ size_t length;
+
+ if (resultbuf != NULL)
+ {
+ result = resultbuf;
+ allocated = *lengthp;
+ }
+ else
+ {
+ result = NULL;
+ allocated = 0;
+ }
+ length = 0;
+ /* Invariants:
+ result is either == resultbuf or == NULL or malloc-allocated.
+ If length > 0, then result != NULL. */
+
+ while (s < s_end)
+ {
+ ucs4_t uc;
+ int count;
+
+ /* Fetch a Unicode character from the input string. */
+ uc = *s++;
+ /* No need to call the safe variant u32_mbtouc, because
+ u8_uctomb will verify uc anyway. */
+
+ /* Store it in the output string. */
+ count = u8_uctomb (result + length, uc, allocated - length);
+ if (count == -1)
+ {
+ if (!(result == resultbuf || result == NULL))
+ free (result);
+ errno = EILSEQ;
+ return NULL;
+ }
+ if (count == -2)
+ {
+ DST_UNIT *memory;
+
+ allocated = (allocated > 0 ? 2 * allocated : 12);
+ if (length + 6 > allocated)
+ allocated = length + 6;
+ if (result == resultbuf || result == NULL)
+ memory = (DST_UNIT *) malloc (allocated * sizeof (DST_UNIT));
+ else
+ memory =
+ (DST_UNIT *) realloc (result, allocated * sizeof (DST_UNIT));
+
+ if (memory == NULL)
+ {
+ if (!(result == resultbuf || result == NULL))
+ free (result);
+ errno = ENOMEM;
+ return NULL;
+ }
+ if (result == resultbuf && length > 0)
+ memcpy ((char *) memory, (char *) result,
+ length * sizeof (DST_UNIT));
+ result = memory;
+ count = u8_uctomb (result + length, uc, allocated - length);
+ if (count < 0)
+ abort ();
+ }
+ length += count;
+ }
+
+ if (length == 0)
+ {
+ if (result == NULL)
+ {
+ /* Return a non-NULL value. NULL means error. */
+ result = (DST_UNIT *) malloc (1);
+ if (result == NULL)
+ {
+ errno = ENOMEM;
+ return NULL;
+ }
+ }
+ }
+ else if (result != resultbuf && length < allocated)
+ {
+ /* Shrink the allocated memory if possible. */
+ DST_UNIT *memory;
+
+ memory = (DST_UNIT *) realloc (result, length * sizeof (DST_UNIT));
+ if (memory != NULL)
+ result = memory;
+ }
+
+ *lengthp = length;
+ return result;
+}
diff --git a/lib/unistr/u32-uctomb.c b/lib/unistr/u32-uctomb.c
new file mode 100644
index 0000000..583b3b6
--- /dev/null
+++ b/lib/unistr/u32-uctomb.c
@@ -0,0 +1,47 @@
+/* Store a character in UTF-32 string.
+ Copyright (C) 2002, 2005-2006, 2009 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#if defined IN_LIBUNISTRING
+/* Tell unistr.h to declare u32_uctomb as 'extern', not 'static inline'. */
+# include "unistring-notinline.h"
+#endif
+
+/* Specification. */
+#include "unistr.h"
+
+#if !HAVE_INLINE
+
+int
+u32_uctomb (uint32_t *s, ucs4_t uc, int n)
+{
+ if (uc < 0xd800 || (uc >= 0xe000 && uc < 0x110000))
+ {
+ if (n > 0)
+ {
+ *s = uc;
+ return 1;
+ }
+ else
+ return -2;
+ }
+ else
+ return -1;
+}
+
+#endif
diff --git a/lib/unistr/u8-check.c b/lib/unistr/u8-check.c
new file mode 100644
index 0000000..53897fc
--- /dev/null
+++ b/lib/unistr/u8-check.c
@@ -0,0 +1,105 @@
+/* Check UTF-8 string.
+ Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+const uint8_t *
+u8_check (const uint8_t *s, size_t n)
+{
+ const uint8_t *s_end = s + n;
+
+ while (s < s_end)
+ {
+ /* Keep in sync with unistr.h and utf8-ucs4.c. */
+ uint8_t c = *s;
+
+ if (c < 0x80)
+ {
+ s++;
+ continue;
+ }
+ if (c >= 0xc2)
+ {
+ if (c < 0xe0)
+ {
+ if (s + 2 <= s_end
+ && (s[1] ^ 0x80) < 0x40)
+ {
+ s += 2;
+ continue;
+ }
+ }
+ else if (c < 0xf0)
+ {
+ if (s + 3 <= s_end
+ && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (c >= 0xe1 || s[1] >= 0xa0)
+ && (c != 0xed || s[1] < 0xa0))
+ {
+ s += 3;
+ continue;
+ }
+ }
+ else if (c < 0xf8)
+ {
+ if (s + 4 <= s_end
+ && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40
+ && (c >= 0xf1 || s[1] >= 0x90)
+#if 1
+ && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
+#endif
+ )
+ {
+ s += 4;
+ continue;
+ }
+ }
+#if 0
+ else if (c < 0xfc)
+ {
+ if (s + 5 <= s_end
+ && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
+ && (c >= 0xf9 || s[1] >= 0x88))
+ {
+ s += 5;
+ continue;
+ }
+ }
+ else if (c < 0xfe)
+ {
+ if (s + 6 <= s_end
+ && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
+ && (s[5] ^ 0x80) < 0x40
+ && (c >= 0xfd || s[1] >= 0x84))
+ {
+ s += 6;
+ continue;
+ }
+ }
+#endif
+ }
+ /* invalid or incomplete multibyte character */
+ return s;
+ }
+ return NULL;
+}
diff --git a/lib/unistr/u8-chr.c b/lib/unistr/u8-chr.c
new file mode 100644
index 0000000..2c4d768
--- /dev/null
+++ b/lib/unistr/u8-chr.c
@@ -0,0 +1,87 @@
+/* Search character in piece of UTF-8 string.
+ Copyright (C) 1999, 2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+uint8_t *
+u8_chr (const uint8_t *s, size_t n, ucs4_t uc)
+{
+ uint8_t c[6];
+
+ if (uc < 0x80)
+ {
+ uint8_t c0 = uc;
+
+ for (; n > 0; s++, n--)
+ {
+ if (*s == c0)
+ return (uint8_t *) s;
+ }
+ }
+ else
+ switch (u8_uctomb_aux (c, uc, 6))
+ {
+ case 2:
+ if (n > 1)
+ {
+ uint8_t c0 = c[0];
+ uint8_t c1 = c[1];
+
+ for (n--; n > 0; s++, n--)
+ {
+ if (*s == c0 && s[1] == c1)
+ return (uint8_t *) s;
+ }
+ }
+ break;
+
+ case 3:
+ if (n > 2)
+ {
+ uint8_t c0 = c[0];
+ uint8_t c1 = c[1];
+ uint8_t c2 = c[2];
+
+ for (n -= 2; n > 0; s++, n--)
+ {
+ if (*s == c0 && s[1] == c1 && s[2] == c2)
+ return (uint8_t *) s;
+ }
+ }
+ break;
+
+ case 4:
+ if (n > 3)
+ {
+ uint8_t c0 = c[0];
+ uint8_t c1 = c[1];
+ uint8_t c2 = c[2];
+ uint8_t c3 = c[3];
+
+ for (n -= 3; n > 0; s++, n--)
+ {
+ if (*s == c0 && s[1] == c1 && s[2] == c2 && s[3] == c3)
+ return (uint8_t *) s;
+ }
+ }
+ break;
+ }
+ return NULL;
+}
diff --git a/lib/unistr/u8-cmp.c b/lib/unistr/u8-cmp.c
new file mode 100644
index 0000000..77b2402
--- /dev/null
+++ b/lib/unistr/u8-cmp.c
@@ -0,0 +1,30 @@
+/* Compare pieces of UTF-8 strings.
+ Copyright (C) 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include <string.h>
+
+int
+u8_cmp (const uint8_t *s1, const uint8_t *s2, size_t n)
+{
+ /* Use the fact that the UTF-8 encoding preserves lexicographic order. */
+ return memcmp ((const char *) s1, (const char *) s2, n);
+}
diff --git a/lib/unistr/u8-cmp2.c b/lib/unistr/u8-cmp2.c
new file mode 100644
index 0000000..99cee9f
--- /dev/null
+++ b/lib/unistr/u8-cmp2.c
@@ -0,0 +1,28 @@
+/* Compare pieces of UTF-8 strings.
+ Copyright (C) 2009 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2009.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include "minmax.h"
+
+#define FUNC u8_cmp2
+#define UNIT uint8_t
+#define U_CMP u8_cmp
+#include "u-cmp2.h"
diff --git a/lib/unistr/u8-cpy-alloc.c b/lib/unistr/u8-cpy-alloc.c
new file mode 100644
index 0000000..b2dcc5d
--- /dev/null
+++ b/lib/unistr/u8-cpy-alloc.c
@@ -0,0 +1,25 @@
+/* Copy piece of UTF-8 string.
+ Copyright (C) 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u8_cpy_alloc
+#define UNIT uint8_t
+#include "u-cpy-alloc.h"
diff --git a/lib/unistr/u8-cpy.c b/lib/unistr/u8-cpy.c
new file mode 100644
index 0000000..bf3a55b
--- /dev/null
+++ b/lib/unistr/u8-cpy.c
@@ -0,0 +1,25 @@
+/* Copy piece of UTF-8 string.
+ Copyright (C) 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u8_cpy
+#define UNIT uint8_t
+#include "u-cpy.h"
diff --git a/lib/unistr/u8-endswith.c b/lib/unistr/u8-endswith.c
new file mode 100644
index 0000000..e40613a
--- /dev/null
+++ b/lib/unistr/u8-endswith.c
@@ -0,0 +1,27 @@
+/* Substring test for UTF-8 strings.
+ Copyright (C) 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u8_endswith
+#define UNIT uint8_t
+#define U_STRLEN u8_strlen
+#define U_CMP u8_cmp
+#include "u-endswith.h"
diff --git a/lib/unistr/u8-mblen.c b/lib/unistr/u8-mblen.c
new file mode 100644
index 0000000..1981bef
--- /dev/null
+++ b/lib/unistr/u8-mblen.c
@@ -0,0 +1,98 @@
+/* Look at first character in UTF-8 string.
+ Copyright (C) 1999-2000, 2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u8_mblen (const uint8_t *s, size_t n)
+{
+ if (n > 0)
+ {
+ /* Keep in sync with unistr.h and utf8-ucs4.c. */
+ uint8_t c = *s;
+
+ if (c < 0x80)
+ return (c != 0 ? 1 : 0);
+ if (c >= 0xc2)
+ {
+ if (c < 0xe0)
+ {
+ if (n >= 2
+#if CONFIG_UNICODE_SAFETY
+ && (s[1] ^ 0x80) < 0x40
+#endif
+ )
+ return 2;
+ }
+ else if (c < 0xf0)
+ {
+ if (n >= 3
+#if CONFIG_UNICODE_SAFETY
+ && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (c >= 0xe1 || s[1] >= 0xa0)
+ && (c != 0xed || s[1] < 0xa0)
+#endif
+ )
+ return 3;
+ }
+ else if (c < 0xf8)
+ {
+ if (n >= 4
+#if CONFIG_UNICODE_SAFETY
+ && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40
+ && (c >= 0xf1 || s[1] >= 0x90)
+#if 1
+ && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
+#endif
+#endif
+ )
+ return 4;
+ }
+#if 0
+ else if (c < 0xfc)
+ {
+ if (n >= 5
+#if CONFIG_UNICODE_SAFETY
+ && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
+ && (c >= 0xf9 || s[1] >= 0x88)
+#endif
+ )
+ return 5;
+ }
+ else if (c < 0xfe)
+ {
+ if (n >= 6
+#if CONFIG_UNICODE_SAFETY
+ && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
+ && (s[5] ^ 0x80) < 0x40
+ && (c >= 0xfd || s[1] >= 0x84)
+#endif
+ )
+ return 6;
+ }
+#endif
+ }
+ }
+ /* invalid or incomplete multibyte character */
+ return -1;
+}
diff --git a/lib/unistr/u8-mbsnlen.c b/lib/unistr/u8-mbsnlen.c
new file mode 100644
index 0000000..29af297
--- /dev/null
+++ b/lib/unistr/u8-mbsnlen.c
@@ -0,0 +1,39 @@
+/* Count characters in UTF-8 string.
+ Copyright (C) 2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2007.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+size_t
+u8_mbsnlen (const uint8_t *s, size_t n)
+{
+ size_t characters;
+
+ characters = 0;
+ while (n > 0)
+ {
+ int count = u8_mblen (s, n);
+ if (count <= 0)
+ count = 1;
+ s += count;
+ n -= count;
+ characters++;
+ }
+ return characters;
+}
diff --git a/lib/unistr/u8-mbtouc-aux.c b/lib/unistr/u8-mbtouc-aux.c
new file mode 100644
index 0000000..53d02bf
--- /dev/null
+++ b/lib/unistr/u8-mbtouc-aux.c
@@ -0,0 +1,158 @@
+/* Conversion UTF-8 to UCS-4.
+ Copyright (C) 2001-2002, 2006-2007, 2009 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2001.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#if defined IN_LIBUNISTRING || HAVE_INLINE
+
+int
+u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n)
+{
+ uint8_t c = *s;
+
+ if (c >= 0xc2)
+ {
+ if (c < 0xe0)
+ {
+ if (n >= 2)
+ {
+ if ((s[1] ^ 0x80) < 0x40)
+ {
+ *puc = ((unsigned int) (c & 0x1f) << 6)
+ | (unsigned int) (s[1] ^ 0x80);
+ return 2;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+ else if (c < 0xf0)
+ {
+ if (n >= 3)
+ {
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (c >= 0xe1 || s[1] >= 0xa0)
+ && (c != 0xed || s[1] < 0xa0))
+ {
+ *puc = ((unsigned int) (c & 0x0f) << 12)
+ | ((unsigned int) (s[1] ^ 0x80) << 6)
+ | (unsigned int) (s[2] ^ 0x80);
+ return 3;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+ else if (c < 0xf8)
+ {
+ if (n >= 4)
+ {
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40
+ && (c >= 0xf1 || s[1] >= 0x90)
+#if 1
+ && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
+#endif
+ )
+ {
+ *puc = ((unsigned int) (c & 0x07) << 18)
+ | ((unsigned int) (s[1] ^ 0x80) << 12)
+ | ((unsigned int) (s[2] ^ 0x80) << 6)
+ | (unsigned int) (s[3] ^ 0x80);
+ return 4;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+#if 0
+ else if (c < 0xfc)
+ {
+ if (n >= 5)
+ {
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
+ && (c >= 0xf9 || s[1] >= 0x88))
+ {
+ *puc = ((unsigned int) (c & 0x03) << 24)
+ | ((unsigned int) (s[1] ^ 0x80) << 18)
+ | ((unsigned int) (s[2] ^ 0x80) << 12)
+ | ((unsigned int) (s[3] ^ 0x80) << 6)
+ | (unsigned int) (s[4] ^ 0x80);
+ return 5;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+ else if (c < 0xfe)
+ {
+ if (n >= 6)
+ {
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
+ && (s[5] ^ 0x80) < 0x40
+ && (c >= 0xfd || s[1] >= 0x84))
+ {
+ *puc = ((unsigned int) (c & 0x01) << 30)
+ | ((unsigned int) (s[1] ^ 0x80) << 24)
+ | ((unsigned int) (s[2] ^ 0x80) << 18)
+ | ((unsigned int) (s[3] ^ 0x80) << 12)
+ | ((unsigned int) (s[4] ^ 0x80) << 6)
+ | (unsigned int) (s[5] ^ 0x80);
+ return 6;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+#endif
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 1;
+}
+
+#endif
diff --git a/lib/unistr/u8-mbtouc-unsafe-aux.c b/lib/unistr/u8-mbtouc-unsafe-aux.c
new file mode 100644
index 0000000..43e4a36
--- /dev/null
+++ b/lib/unistr/u8-mbtouc-unsafe-aux.c
@@ -0,0 +1,168 @@
+/* Conversion UTF-8 to UCS-4.
+ Copyright (C) 2001-2002, 2006-2007, 2009 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2001.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#if defined IN_LIBUNISTRING || HAVE_INLINE
+
+int
+u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n)
+{
+ uint8_t c = *s;
+
+ if (c >= 0xc2)
+ {
+ if (c < 0xe0)
+ {
+ if (n >= 2)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40)
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x1f) << 6)
+ | (unsigned int) (s[1] ^ 0x80);
+ return 2;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+ else if (c < 0xf0)
+ {
+ if (n >= 3)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (c >= 0xe1 || s[1] >= 0xa0)
+ && (c != 0xed || s[1] < 0xa0))
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x0f) << 12)
+ | ((unsigned int) (s[1] ^ 0x80) << 6)
+ | (unsigned int) (s[2] ^ 0x80);
+ return 3;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+ else if (c < 0xf8)
+ {
+ if (n >= 4)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40
+ && (c >= 0xf1 || s[1] >= 0x90)
+#if 1
+ && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
+#endif
+ )
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x07) << 18)
+ | ((unsigned int) (s[1] ^ 0x80) << 12)
+ | ((unsigned int) (s[2] ^ 0x80) << 6)
+ | (unsigned int) (s[3] ^ 0x80);
+ return 4;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+#if 0
+ else if (c < 0xfc)
+ {
+ if (n >= 5)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
+ && (c >= 0xf9 || s[1] >= 0x88))
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x03) << 24)
+ | ((unsigned int) (s[1] ^ 0x80) << 18)
+ | ((unsigned int) (s[2] ^ 0x80) << 12)
+ | ((unsigned int) (s[3] ^ 0x80) << 6)
+ | (unsigned int) (s[4] ^ 0x80);
+ return 5;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+ else if (c < 0xfe)
+ {
+ if (n >= 6)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
+ && (s[5] ^ 0x80) < 0x40
+ && (c >= 0xfd || s[1] >= 0x84))
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x01) << 30)
+ | ((unsigned int) (s[1] ^ 0x80) << 24)
+ | ((unsigned int) (s[2] ^ 0x80) << 18)
+ | ((unsigned int) (s[3] ^ 0x80) << 12)
+ | ((unsigned int) (s[4] ^ 0x80) << 6)
+ | (unsigned int) (s[5] ^ 0x80);
+ return 6;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+#endif
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 1;
+}
+
+#endif
diff --git a/lib/unistr/u8-mbtouc-unsafe.c b/lib/unistr/u8-mbtouc-unsafe.c
new file mode 100644
index 0000000..4661569
--- /dev/null
+++ b/lib/unistr/u8-mbtouc-unsafe.c
@@ -0,0 +1,179 @@
+/* Look at first character in UTF-8 string.
+ Copyright (C) 1999-2002, 2006-2007, 2009 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2001.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#if defined IN_LIBUNISTRING
+/* Tell unistr.h to declare u8_mbtouc_unsafe as 'extern', not
+ 'static inline'. */
+# include "unistring-notinline.h"
+#endif
+
+/* Specification. */
+#include "unistr.h"
+
+#if !HAVE_INLINE
+
+int
+u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n)
+{
+ uint8_t c = *s;
+
+ if (c < 0x80)
+ {
+ *puc = c;
+ return 1;
+ }
+ else if (c >= 0xc2)
+ {
+ if (c < 0xe0)
+ {
+ if (n >= 2)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40)
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x1f) << 6)
+ | (unsigned int) (s[1] ^ 0x80);
+ return 2;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+ else if (c < 0xf0)
+ {
+ if (n >= 3)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (c >= 0xe1 || s[1] >= 0xa0)
+ && (c != 0xed || s[1] < 0xa0))
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x0f) << 12)
+ | ((unsigned int) (s[1] ^ 0x80) << 6)
+ | (unsigned int) (s[2] ^ 0x80);
+ return 3;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+ else if (c < 0xf8)
+ {
+ if (n >= 4)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40
+ && (c >= 0xf1 || s[1] >= 0x90)
+#if 1
+ && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
+#endif
+ )
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x07) << 18)
+ | ((unsigned int) (s[1] ^ 0x80) << 12)
+ | ((unsigned int) (s[2] ^ 0x80) << 6)
+ | (unsigned int) (s[3] ^ 0x80);
+ return 4;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+#if 0
+ else if (c < 0xfc)
+ {
+ if (n >= 5)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
+ && (c >= 0xf9 || s[1] >= 0x88))
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x03) << 24)
+ | ((unsigned int) (s[1] ^ 0x80) << 18)
+ | ((unsigned int) (s[2] ^ 0x80) << 12)
+ | ((unsigned int) (s[3] ^ 0x80) << 6)
+ | (unsigned int) (s[4] ^ 0x80);
+ return 5;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+ else if (c < 0xfe)
+ {
+ if (n >= 6)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
+ && (s[5] ^ 0x80) < 0x40
+ && (c >= 0xfd || s[1] >= 0x84))
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x01) << 30)
+ | ((unsigned int) (s[1] ^ 0x80) << 24)
+ | ((unsigned int) (s[2] ^ 0x80) << 18)
+ | ((unsigned int) (s[3] ^ 0x80) << 12)
+ | ((unsigned int) (s[4] ^ 0x80) << 6)
+ | (unsigned int) (s[5] ^ 0x80);
+ return 6;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+#endif
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 1;
+}
+
+#endif
diff --git a/lib/unistr/u8-mbtouc.c b/lib/unistr/u8-mbtouc.c
new file mode 100644
index 0000000..ff624f1
--- /dev/null
+++ b/lib/unistr/u8-mbtouc.c
@@ -0,0 +1,168 @@
+/* Look at first character in UTF-8 string.
+ Copyright (C) 1999-2002, 2006-2007, 2009 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2001.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#if defined IN_LIBUNISTRING
+/* Tell unistr.h to declare u8_mbtouc as 'extern', not 'static inline'. */
+# include "unistring-notinline.h"
+#endif
+
+/* Specification. */
+#include "unistr.h"
+
+#if !HAVE_INLINE
+
+int
+u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n)
+{
+ uint8_t c = *s;
+
+ if (c < 0x80)
+ {
+ *puc = c;
+ return 1;
+ }
+ else if (c >= 0xc2)
+ {
+ if (c < 0xe0)
+ {
+ if (n >= 2)
+ {
+ if ((s[1] ^ 0x80) < 0x40)
+ {
+ *puc = ((unsigned int) (c & 0x1f) << 6)
+ | (unsigned int) (s[1] ^ 0x80);
+ return 2;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+ else if (c < 0xf0)
+ {
+ if (n >= 3)
+ {
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (c >= 0xe1 || s[1] >= 0xa0)
+ && (c != 0xed || s[1] < 0xa0))
+ {
+ *puc = ((unsigned int) (c & 0x0f) << 12)
+ | ((unsigned int) (s[1] ^ 0x80) << 6)
+ | (unsigned int) (s[2] ^ 0x80);
+ return 3;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+ else if (c < 0xf8)
+ {
+ if (n >= 4)
+ {
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40
+ && (c >= 0xf1 || s[1] >= 0x90)
+#if 1
+ && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
+#endif
+ )
+ {
+ *puc = ((unsigned int) (c & 0x07) << 18)
+ | ((unsigned int) (s[1] ^ 0x80) << 12)
+ | ((unsigned int) (s[2] ^ 0x80) << 6)
+ | (unsigned int) (s[3] ^ 0x80);
+ return 4;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+#if 0
+ else if (c < 0xfc)
+ {
+ if (n >= 5)
+ {
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
+ && (c >= 0xf9 || s[1] >= 0x88))
+ {
+ *puc = ((unsigned int) (c & 0x03) << 24)
+ | ((unsigned int) (s[1] ^ 0x80) << 18)
+ | ((unsigned int) (s[2] ^ 0x80) << 12)
+ | ((unsigned int) (s[3] ^ 0x80) << 6)
+ | (unsigned int) (s[4] ^ 0x80);
+ return 5;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+ else if (c < 0xfe)
+ {
+ if (n >= 6)
+ {
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
+ && (s[5] ^ 0x80) < 0x40
+ && (c >= 0xfd || s[1] >= 0x84))
+ {
+ *puc = ((unsigned int) (c & 0x01) << 30)
+ | ((unsigned int) (s[1] ^ 0x80) << 24)
+ | ((unsigned int) (s[2] ^ 0x80) << 18)
+ | ((unsigned int) (s[3] ^ 0x80) << 12)
+ | ((unsigned int) (s[4] ^ 0x80) << 6)
+ | (unsigned int) (s[5] ^ 0x80);
+ return 6;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+#endif
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 1;
+}
+
+#endif
diff --git a/lib/unistr/u8-mbtoucr.c b/lib/unistr/u8-mbtoucr.c
new file mode 100644
index 0000000..dd83352
--- /dev/null
+++ b/lib/unistr/u8-mbtoucr.c
@@ -0,0 +1,285 @@
+/* Look at first character in UTF-8 string, returning an error code.
+ Copyright (C) 1999-2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2001.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n)
+{
+ uint8_t c = *s;
+
+ if (c < 0x80)
+ {
+ *puc = c;
+ return 1;
+ }
+ else if (c >= 0xc2)
+ {
+ if (c < 0xe0)
+ {
+ if (n >= 2)
+ {
+ if ((s[1] ^ 0x80) < 0x40)
+ {
+ *puc = ((unsigned int) (c & 0x1f) << 6)
+ | (unsigned int) (s[1] ^ 0x80);
+ return 2;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return -2;
+ }
+ }
+ else if (c < 0xf0)
+ {
+ if (n >= 2)
+ {
+ if ((s[1] ^ 0x80) < 0x40
+ && (c >= 0xe1 || s[1] >= 0xa0)
+ && (c != 0xed || s[1] < 0xa0))
+ {
+ if (n >= 3)
+ {
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ *puc = ((unsigned int) (c & 0x0f) << 12)
+ | ((unsigned int) (s[1] ^ 0x80) << 6)
+ | (unsigned int) (s[2] ^ 0x80);
+ return 3;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return -2;
+ }
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return -2;
+ }
+ }
+ else if (c < 0xf8)
+ {
+ if (n >= 2)
+ {
+ if ((s[1] ^ 0x80) < 0x40
+ && (c >= 0xf1 || s[1] >= 0x90)
+#if 1
+ && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
+#endif
+ )
+ {
+ if (n >= 3)
+ {
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if (n >= 4)
+ {
+ if ((s[3] ^ 0x80) < 0x40)
+ {
+ *puc = ((unsigned int) (c & 0x07) << 18)
+ | ((unsigned int) (s[1] ^ 0x80) << 12)
+ | ((unsigned int) (s[2] ^ 0x80) << 6)
+ | (unsigned int) (s[3] ^ 0x80);
+ return 4;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return -2;
+ }
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return -2;
+ }
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return -2;
+ }
+ }
+#if 0
+ else if (c < 0xfc)
+ {
+ if (n >= 2)
+ {
+ if ((s[1] ^ 0x80) < 0x40
+ && (c >= 0xf9 || s[1] >= 0x88))
+ {
+ if (n >= 3)
+ {
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if (n >= 4)
+ {
+ if ((s[3] ^ 0x80) < 0x40)
+ {
+ if (n >= 5)
+ {
+ if ((s[4] ^ 0x80) < 0x40)
+ {
+ *puc = ((unsigned int) (c & 0x03) << 24)
+ | ((unsigned int) (s[1] ^ 0x80) << 18)
+ | ((unsigned int) (s[2] ^ 0x80) << 12)
+ | ((unsigned int) (s[3] ^ 0x80) << 6)
+ | (unsigned int) (s[4] ^ 0x80);
+ return 5;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return -2;
+ }
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return -2;
+ }
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return -2;
+ }
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return -2;
+ }
+ }
+ else if (c < 0xfe)
+ {
+ if (n >= 2)
+ {
+ if ((s[1] ^ 0x80) < 0x40
+ && (c >= 0xfd || s[1] >= 0x84))
+ {
+ if (n >= 3)
+ {
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if (n >= 4)
+ {
+ if ((s[3] ^ 0x80) < 0x40)
+ {
+ if (n >= 5)
+ {
+ if ((s[4] ^ 0x80) < 0x40)
+ {
+ if (n >= 6)
+ {
+ if ((s[5] ^ 0x80) < 0x40)
+ {
+ *puc = ((unsigned int) (c & 0x01) << 30)
+ | ((unsigned int) (s[1] ^ 0x80) << 24)
+ | ((unsigned int) (s[2] ^ 0x80) << 18)
+ | ((unsigned int) (s[3] ^ 0x80) << 12)
+ | ((unsigned int) (s[4] ^ 0x80) << 6)
+ | (unsigned int) (s[5] ^ 0x80);
+ return 6;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return -2;
+ }
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return -2;
+ }
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return -2;
+ }
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return -2;
+ }
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return -2;
+ }
+ }
+#endif
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return -1;
+}
diff --git a/lib/unistr/u8-move.c b/lib/unistr/u8-move.c
new file mode 100644
index 0000000..5a30be8
--- /dev/null
+++ b/lib/unistr/u8-move.c
@@ -0,0 +1,25 @@
+/* Copy piece of UTF-8 string.
+ Copyright (C) 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u8_move
+#define UNIT uint8_t
+#include "u-move.h"
diff --git a/lib/unistr/u8-next.c b/lib/unistr/u8-next.c
new file mode 100644
index 0000000..ecf4f80
--- /dev/null
+++ b/lib/unistr/u8-next.c
@@ -0,0 +1,37 @@
+/* Iterate over next character in UTF-8 string.
+ Copyright (C) 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+const uint8_t *
+u8_next (ucs4_t *puc, const uint8_t *s)
+{
+ int count;
+
+ count = u8_strmbtouc (puc, s);
+ if (count > 0)
+ return s + count;
+ else
+ {
+ if (count < 0)
+ *puc = 0xfffd;
+ return NULL;
+ }
+}
diff --git a/lib/unistr/u8-prev.c b/lib/unistr/u8-prev.c
new file mode 100644
index 0000000..245d22f
--- /dev/null
+++ b/lib/unistr/u8-prev.c
@@ -0,0 +1,93 @@
+/* Iterate over previous character in UTF-8 string.
+ Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+const uint8_t *
+u8_prev (ucs4_t *puc, const uint8_t *s, const uint8_t *start)
+{
+ /* Keep in sync with unistr.h and utf8-ucs4.c. */
+ if (s != start)
+ {
+ uint8_t c_1 = s[-1];
+
+ if (c_1 < 0x80)
+ {
+ *puc = c_1;
+ return s - 1;
+ }
+#if CONFIG_UNICODE_SAFETY
+ if ((c_1 ^ 0x80) < 0x40)
+#endif
+ if (s - 1 != start)
+ {
+ uint8_t c_2 = s[-2];
+
+ if (c_2 >= 0xc2 && c_2 < 0xe0)
+ {
+ *puc = ((unsigned int) (c_2 & 0x1f) << 6)
+ | (unsigned int) (c_1 ^ 0x80);
+ return s - 2;
+ }
+#if CONFIG_UNICODE_SAFETY
+ if ((c_2 ^ 0x80) < 0x40)
+#endif
+ if (s - 2 != start)
+ {
+ uint8_t c_3 = s[-3];
+
+ if (c_3 >= 0xe0 && c_3 < 0xf0
+#if CONFIG_UNICODE_SAFETY
+ && (c_3 >= 0xe1 || c_2 >= 0xa0)
+ && (c_3 != 0xed || c_2 < 0xa0)
+#endif
+ )
+ {
+ *puc = ((unsigned int) (c_3 & 0x0f) << 12)
+ | ((unsigned int) (c_2 ^ 0x80) << 6)
+ | (unsigned int) (c_1 ^ 0x80);
+ return s - 3;
+ }
+#if CONFIG_UNICODE_SAFETY
+ if ((c_3 ^ 0x80) < 0x40)
+#endif
+ if (s - 3 != start)
+ {
+ uint8_t c_4 = s[-4];
+
+ if (c_4 >= 0xf0 && c_4 < 0xf8
+#if CONFIG_UNICODE_SAFETY
+ && (c_4 >= 0xf1 || c_3 >= 0x90)
+ && (c_4 < 0xf4 || (c_4 == 0xf4 && c_3 < 0x90))
+#endif
+ )
+ {
+ *puc = ((unsigned int) (c_4 & 0x07) << 18)
+ | ((unsigned int) (c_3 ^ 0x80) << 12)
+ | ((unsigned int) (c_2 ^ 0x80) << 6)
+ | (unsigned int) (c_1 ^ 0x80);
+ return s - 4;
+ }
+ }
+ }
+ }
+ }
+ return NULL;
+}
diff --git a/lib/unistr/u8-set.c b/lib/unistr/u8-set.c
new file mode 100644
index 0000000..5bb4fd7
--- /dev/null
+++ b/lib/unistr/u8-set.c
@@ -0,0 +1,44 @@
+/* Fill UTF-8 string.
+ Copyright (C) 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u8_set
+#define UNIT uint8_t
+#define IS_SINGLE_UNIT(uc) (uc < 0x80)
+
+#include <errno.h>
+#include <string.h>
+
+UNIT *
+FUNC (UNIT *s, ucs4_t uc, size_t n)
+{
+ if (n > 0)
+ {
+ if (IS_SINGLE_UNIT (uc))
+ memset ((char *) s, uc, n);
+ else
+ {
+ errno = EILSEQ;
+ return NULL;
+ }
+ }
+ return s;
+}
diff --git a/lib/unistr/u8-startswith.c b/lib/unistr/u8-startswith.c
new file mode 100644
index 0000000..671e3d9
--- /dev/null
+++ b/lib/unistr/u8-startswith.c
@@ -0,0 +1,25 @@
+/* Substring test for UTF-8 strings.
+ Copyright (C) 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u8_startswith
+#define UNIT uint8_t
+#include "u-startswith.h"
diff --git a/lib/unistr/u8-stpcpy.c b/lib/unistr/u8-stpcpy.c
new file mode 100644
index 0000000..a5c15c5
--- /dev/null
+++ b/lib/unistr/u8-stpcpy.c
@@ -0,0 +1,44 @@
+/* Copy UTF-8 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Ensure stpcpy() gets declared. */
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE 1
+#endif
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#if HAVE_STPCPY
+
+# include <string.h>
+
+uint8_t *
+u8_stpcpy (uint8_t *dest, const uint8_t *src)
+{
+ return (uint8_t *) stpcpy ((char *) dest, (const char *) src);
+}
+
+#else
+
+# define FUNC u8_stpcpy
+# define UNIT uint8_t
+# include "u-stpcpy.h"
+
+#endif
diff --git a/lib/unistr/u8-stpncpy.c b/lib/unistr/u8-stpncpy.c
new file mode 100644
index 0000000..87f254d
--- /dev/null
+++ b/lib/unistr/u8-stpncpy.c
@@ -0,0 +1,44 @@
+/* Copy UTF-8 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Ensure stpncpy() gets declared. */
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE 1
+#endif
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#if __GLIBC__ >= 2
+
+# include <string.h>
+
+uint8_t *
+u8_stpncpy (uint8_t *dest, const uint8_t *src, size_t n)
+{
+ return (uint8_t *) stpncpy ((char *) dest, (const char *) src, n);
+}
+
+#else
+
+# define FUNC u8_stpncpy
+# define UNIT uint8_t
+# include "u-stpncpy.h"
+
+#endif
diff --git a/lib/unistr/u8-strcat.c b/lib/unistr/u8-strcat.c
new file mode 100644
index 0000000..25ec5d3
--- /dev/null
+++ b/lib/unistr/u8-strcat.c
@@ -0,0 +1,29 @@
+/* Concatenate UTF-8 strings.
+ Copyright (C) 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include <string.h>
+
+uint8_t *
+u8_strcat (uint8_t *dest, const uint8_t *src)
+{
+ return (uint8_t *) strcat ((char *) dest, (const char *) src);
+}
diff --git a/lib/unistr/u8-strchr.c b/lib/unistr/u8-strchr.c
new file mode 100644
index 0000000..370a793
--- /dev/null
+++ b/lib/unistr/u8-strchr.c
@@ -0,0 +1,100 @@
+/* Search character in UTF-8 string.
+ Copyright (C) 1999, 2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+uint8_t *
+u8_strchr (const uint8_t *s, ucs4_t uc)
+{
+ uint8_t c[6];
+
+ if (uc < 0x80)
+ {
+ uint8_t c0 = uc;
+
+ for (;; s++)
+ {
+ if (*s == c0)
+ break;
+ if (*s == 0)
+ goto notfound;
+ }
+ return (uint8_t *) s;
+ }
+ else
+ switch (u8_uctomb_aux (c, uc, 6))
+ {
+ case 2:
+ if (*s == 0)
+ goto notfound;
+ {
+ uint8_t c0 = c[0];
+ uint8_t c1 = c[1];
+
+ for (;; s++)
+ {
+ if (s[1] == 0)
+ goto notfound;
+ if (*s == c0 && s[1] == c1)
+ break;
+ }
+ return (uint8_t *) s;
+ }
+
+ case 3:
+ if (*s == 0 || s[1] == 0)
+ goto notfound;
+ {
+ uint8_t c0 = c[0];
+ uint8_t c1 = c[1];
+ uint8_t c2 = c[2];
+
+ for (;; s++)
+ {
+ if (s[2] == 0)
+ goto notfound;
+ if (*s == c0 && s[1] == c1 && s[2] == c2)
+ break;
+ }
+ return (uint8_t *) s;
+ }
+
+ case 4:
+ if (*s == 0 || s[1] == 0 || s[2] == 0)
+ goto notfound;
+ {
+ uint8_t c0 = c[0];
+ uint8_t c1 = c[1];
+ uint8_t c2 = c[2];
+ uint8_t c3 = c[3];
+
+ for (;; s++)
+ {
+ if (s[3] == 0)
+ goto notfound;
+ if (*s == c0 && s[1] == c1 && s[2] == c2 && s[3] == c3)
+ break;
+ }
+ return (uint8_t *) s;
+ }
+ }
+notfound:
+ return NULL;
+}
diff --git a/lib/unistr/u8-strcmp.c b/lib/unistr/u8-strcmp.c
new file mode 100644
index 0000000..58b4b8c
--- /dev/null
+++ b/lib/unistr/u8-strcmp.c
@@ -0,0 +1,30 @@
+/* Compare UTF-8 strings.
+ Copyright (C) 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include <string.h>
+
+int
+u8_strcmp (const uint8_t *s1, const uint8_t *s2)
+{
+ /* Use the fact that the UTF-8 encoding preserves lexicographic order. */
+ return strcmp ((const char *) s1, (const char *) s2);
+}
diff --git a/lib/unistr/u8-strcoll.c b/lib/unistr/u8-strcoll.c
new file mode 100644
index 0000000..ca08609
--- /dev/null
+++ b/lib/unistr/u8-strcoll.c
@@ -0,0 +1,33 @@
+/* Compare UTF-8 strings using the collation rules of the current locale.
+ Copyright (C) 2009 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2009.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "uniconv.h"
+
+#define FUNC u8_strcoll
+#define UNIT uint8_t
+#define U_STRCMP u8_strcmp
+#define U_STRCONV_TO_LOCALE u8_strconv_to_locale
+#include "u-strcoll.h"
diff --git a/lib/unistr/u8-strcpy.c b/lib/unistr/u8-strcpy.c
new file mode 100644
index 0000000..b93c8cb
--- /dev/null
+++ b/lib/unistr/u8-strcpy.c
@@ -0,0 +1,29 @@
+/* Copy UTF-8 string.
+ Copyright (C) 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include <string.h>
+
+uint8_t *
+u8_strcpy (uint8_t *dest, const uint8_t *src)
+{
+ return (uint8_t *) strcpy ((char *) dest, (const char *) src);
+}
diff --git a/lib/unistr/u8-strcspn.c b/lib/unistr/u8-strcspn.c
new file mode 100644
index 0000000..a43b53f
--- /dev/null
+++ b/lib/unistr/u8-strcspn.c
@@ -0,0 +1,28 @@
+/* Search for some characters in UTF-8 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u8_strcspn
+#define UNIT uint8_t
+#define U_STRLEN u8_strlen
+#define U_STRMBTOUC u8_strmbtouc
+#define U_STRCHR u8_strchr
+#include "u-strcspn.h"
diff --git a/lib/unistr/u8-strdup.c b/lib/unistr/u8-strdup.c
new file mode 100644
index 0000000..feaa872
--- /dev/null
+++ b/lib/unistr/u8-strdup.c
@@ -0,0 +1,40 @@
+/* Copy UTF-8 string.
+ Copyright (C) 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#if HAVE_STRDUP
+
+# include <string.h>
+
+uint8_t *
+u8_strdup (const uint8_t *s)
+{
+ return (uint8_t *) strdup ((const char *) s);
+}
+
+#else
+
+# define FUNC u8_strdup
+# define UNIT uint8_t
+# define U_STRLEN u8_strlen
+# include "u-strdup.h"
+
+#endif
diff --git a/lib/unistr/u8-strlen.c b/lib/unistr/u8-strlen.c
new file mode 100644
index 0000000..cf5b9ce
--- /dev/null
+++ b/lib/unistr/u8-strlen.c
@@ -0,0 +1,29 @@
+/* Determine length of UTF-8 string.
+ Copyright (C) 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include <string.h>
+
+size_t
+u8_strlen (const uint8_t *s)
+{
+ return strlen ((const char *) s);
+}
diff --git a/lib/unistr/u8-strmblen.c b/lib/unistr/u8-strmblen.c
new file mode 100644
index 0000000..52db99d
--- /dev/null
+++ b/lib/unistr/u8-strmblen.c
@@ -0,0 +1,96 @@
+/* Look at first character in UTF-8 string.
+ Copyright (C) 1999-2000, 2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u8_strmblen (const uint8_t *s)
+{
+ /* Keep in sync with unistr.h and utf8-ucs4.c. */
+ uint8_t c = *s;
+
+ if (c < 0x80)
+ return (c != 0 ? 1 : 0);
+ if (c >= 0xc2)
+ {
+ if (c < 0xe0)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40)
+#else
+ if (s[1] != 0)
+#endif
+ return 2;
+ }
+ else if (c < 0xf0)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (c >= 0xe1 || s[1] >= 0xa0)
+ && (c != 0xed || s[1] < 0xa0))
+#else
+ if (s[1] != 0 && s[2] != 0)
+#endif
+ return 3;
+ }
+ else if (c < 0xf8)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40
+ && (c >= 0xf1 || s[1] >= 0x90)
+#if 1
+ && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
+#endif
+ )
+#else
+ if (s[1] != 0 && s[2] != 0 && s[3] != 0)
+#endif
+ return 4;
+ }
+#if 0
+ else if (c < 0xfc)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
+ && (c >= 0xf9 || s[1] >= 0x88))
+#else
+ if (s[1] != 0 && s[2] != 0 && s[3] != 0 && s[4] != 0)
+#endif
+ return 5;
+ }
+ else if (c < 0xfe)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
+ && (s[5] ^ 0x80) < 0x40
+ && (c >= 0xfd || s[1] >= 0x84))
+#else
+ if (s[1] != 0 && s[2] != 0 && s[3] != 0 && s[4] != 0 && s[5] != 0)
+#endif
+ return 6;
+ }
+#endif
+ }
+ /* invalid or incomplete multibyte character */
+ return -1;
+}
diff --git a/lib/unistr/u8-strmbtouc.c b/lib/unistr/u8-strmbtouc.c
new file mode 100644
index 0000000..bc2fdb0
--- /dev/null
+++ b/lib/unistr/u8-strmbtouc.c
@@ -0,0 +1,129 @@
+/* Look at first character in UTF-8 string.
+ Copyright (C) 1999-2000, 2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u8_strmbtouc (ucs4_t *puc, const uint8_t *s)
+{
+ /* Keep in sync with unistr.h and utf8-ucs4.c. */
+ uint8_t c = *s;
+
+ if (c < 0x80)
+ {
+ *puc = c;
+ return (c != 0 ? 1 : 0);
+ }
+ if (c >= 0xc2)
+ {
+ if (c < 0xe0)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40)
+#else
+ if (s[1] != 0)
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x1f) << 6)
+ | (unsigned int) (s[1] ^ 0x80);
+ return 2;
+ }
+ }
+ else if (c < 0xf0)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (c >= 0xe1 || s[1] >= 0xa0)
+ && (c != 0xed || s[1] < 0xa0))
+#else
+ if (s[1] != 0 && s[2] != 0)
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x0f) << 12)
+ | ((unsigned int) (s[1] ^ 0x80) << 6)
+ | (unsigned int) (s[2] ^ 0x80);
+ return 3;
+ }
+ }
+ else if (c < 0xf8)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40
+ && (c >= 0xf1 || s[1] >= 0x90)
+#if 1
+ && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
+#endif
+ )
+#else
+ if (s[1] != 0 && s[2] != 0 && s[3] != 0)
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x07) << 18)
+ | ((unsigned int) (s[1] ^ 0x80) << 12)
+ | ((unsigned int) (s[2] ^ 0x80) << 6)
+ | (unsigned int) (s[3] ^ 0x80);
+ return 4;
+ }
+ }
+#if 0
+ else if (c < 0xfc)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
+ && (c >= 0xf9 || s[1] >= 0x88))
+#else
+ if (s[1] != 0 && s[2] != 0 && s[3] != 0 && s[4] != 0)
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x03) << 24)
+ | ((unsigned int) (s[1] ^ 0x80) << 18)
+ | ((unsigned int) (s[2] ^ 0x80) << 12)
+ | ((unsigned int) (s[3] ^ 0x80) << 6)
+ | (unsigned int) (s[4] ^ 0x80);
+ return 5;
+ }
+ }
+ else if (c < 0xfe)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
+ && (s[5] ^ 0x80) < 0x40
+ && (c >= 0xfd || s[1] >= 0x84))
+#else
+ if (s[1] != 0 && s[2] != 0 && s[3] != 0 && s[4] != 0 && s[5] != 0)
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x01) << 30)
+ | ((unsigned int) (s[1] ^ 0x80) << 24)
+ | ((unsigned int) (s[2] ^ 0x80) << 18)
+ | ((unsigned int) (s[3] ^ 0x80) << 12)
+ | ((unsigned int) (s[4] ^ 0x80) << 6)
+ | (unsigned int) (s[5] ^ 0x80);
+ return 6;
+ }
+ }
+#endif
+ }
+ /* invalid or incomplete multibyte character */
+ return -1;
+}
diff --git a/lib/unistr/u8-strncat.c b/lib/unistr/u8-strncat.c
new file mode 100644
index 0000000..d965460
--- /dev/null
+++ b/lib/unistr/u8-strncat.c
@@ -0,0 +1,29 @@
+/* Concatenate UTF-8 strings.
+ Copyright (C) 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include <string.h>
+
+uint8_t *
+u8_strncat (uint8_t *dest, const uint8_t *src, size_t n)
+{
+ return (uint8_t *) strncat ((char *) dest, (const char *) src, n);
+}
diff --git a/lib/unistr/u8-strncmp.c b/lib/unistr/u8-strncmp.c
new file mode 100644
index 0000000..5aad7e7
--- /dev/null
+++ b/lib/unistr/u8-strncmp.c
@@ -0,0 +1,30 @@
+/* Compare UTF-8 strings.
+ Copyright (C) 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include <string.h>
+
+int
+u8_strncmp (const uint8_t *s1, const uint8_t *s2, size_t n)
+{
+ /* Use the fact that the UTF-8 encoding preserves lexicographic order. */
+ return strncmp ((const char *) s1, (const char *) s2, n);
+}
diff --git a/lib/unistr/u8-strncpy.c b/lib/unistr/u8-strncpy.c
new file mode 100644
index 0000000..f478fab
--- /dev/null
+++ b/lib/unistr/u8-strncpy.c
@@ -0,0 +1,29 @@
+/* Copy UTF-8 string.
+ Copyright (C) 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include <string.h>
+
+uint8_t *
+u8_strncpy (uint8_t *dest, const uint8_t *src, size_t n)
+{
+ return (uint8_t *) strncpy ((char *) dest, (const char *) src, n);
+}
diff --git a/lib/unistr/u8-strnlen.c b/lib/unistr/u8-strnlen.c
new file mode 100644
index 0000000..32ce7b8
--- /dev/null
+++ b/lib/unistr/u8-strnlen.c
@@ -0,0 +1,44 @@
+/* Determine bounded length of UTF-8 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Ensure strnlen() gets declared. */
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE 1
+#endif
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#if __GLIBC__ >= 2
+
+# include <string.h>
+
+size_t
+u8_strnlen (const uint8_t *s, size_t maxlen)
+{
+ return strnlen ((const char *) s, maxlen);
+}
+
+#else
+
+# define FUNC u8_strnlen
+# define UNIT uint8_t
+# include "u-strnlen.h"
+
+#endif
diff --git a/lib/unistr/u8-strpbrk.c b/lib/unistr/u8-strpbrk.c
new file mode 100644
index 0000000..1538faa
--- /dev/null
+++ b/lib/unistr/u8-strpbrk.c
@@ -0,0 +1,27 @@
+/* Search for some characters in UTF-8 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u8_strpbrk
+#define UNIT uint8_t
+#define U_STRMBTOUC u8_strmbtouc
+#define U_STRCHR u8_strchr
+#include "u-strpbrk.h"
diff --git a/lib/unistr/u8-strrchr.c b/lib/unistr/u8-strrchr.c
new file mode 100644
index 0000000..309f323
--- /dev/null
+++ b/lib/unistr/u8-strrchr.c
@@ -0,0 +1,101 @@
+/* Search character in UTF-8 string.
+ Copyright (C) 1999, 2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+uint8_t *
+u8_strrchr (const uint8_t *s, ucs4_t uc)
+{
+ /* Calling u8_strlen and then searching from the other end would cause more
+ memory accesses. Avoid that, at the cost of a few more comparisons. */
+ uint8_t *result = NULL;
+ uint8_t c[6];
+
+ if (uc < 0x80)
+ {
+ uint8_t c0 = uc;
+
+ for (;; s++)
+ {
+ if (*s == c0)
+ result = (uint8_t *) s;
+ if (*s == 0)
+ break;
+ }
+ }
+ else
+ switch (u8_uctomb_aux (c, uc, 6))
+ {
+ case 2:
+ if (*s)
+ {
+ uint8_t c0 = c[0];
+ uint8_t c1 = c[1];
+
+ /* FIXME: Maybe walking the string via u8_mblen is a win? */
+ for (;; s++)
+ {
+ if (s[1] == 0)
+ break;
+ if (*s == c0 && s[1] == c1)
+ result = (uint8_t *) s;
+ }
+ }
+ break;
+
+ case 3:
+ if (*s && s[1])
+ {
+ uint8_t c0 = c[0];
+ uint8_t c1 = c[1];
+ uint8_t c2 = c[2];
+
+ /* FIXME: Maybe walking the string via u8_mblen is a win? */
+ for (;; s++)
+ {
+ if (s[2] == 0)
+ break;
+ if (*s == c0 && s[1] == c1 && s[2] == c2)
+ result = (uint8_t *) s;
+ }
+ }
+ break;
+
+ case 4:
+ if (*s && s[1] && s[2])
+ {
+ uint8_t c0 = c[0];
+ uint8_t c1 = c[1];
+ uint8_t c2 = c[2];
+ uint8_t c3 = c[3];
+
+ /* FIXME: Maybe walking the string via u8_mblen is a win? */
+ for (;; s++)
+ {
+ if (s[3] == 0)
+ break;
+ if (*s == c0 && s[1] == c1 && s[2] == c2 && s[3] == c3)
+ result = (uint8_t *) s;
+ }
+ }
+ break;
+ }
+ return result;
+}
diff --git a/lib/unistr/u8-strspn.c b/lib/unistr/u8-strspn.c
new file mode 100644
index 0000000..582c825
--- /dev/null
+++ b/lib/unistr/u8-strspn.c
@@ -0,0 +1,29 @@
+/* Search for some characters in UTF-8 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u8_strspn
+#define UNIT uint8_t
+#define U_STRLEN u8_strlen
+#define U_STRMBTOUC u8_strmbtouc
+#define U_CMP u8_cmp
+#define U_STRCHR u8_strchr
+#include "u-strspn.h"
diff --git a/lib/unistr/u8-strstr.c b/lib/unistr/u8-strstr.c
new file mode 100644
index 0000000..b5cbf24
--- /dev/null
+++ b/lib/unistr/u8-strstr.c
@@ -0,0 +1,28 @@
+/* Substring test for UTF-8 strings.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+/* FIXME: Maybe walking the string via u8_mblen is a win? */
+
+#define FUNC u8_strstr
+#define UNIT uint8_t
+#define U_STRCHR u8_strchr
+#include "u-strstr.h"
diff --git a/lib/unistr/u8-strtok.c b/lib/unistr/u8-strtok.c
new file mode 100644
index 0000000..a8d3821
--- /dev/null
+++ b/lib/unistr/u8-strtok.c
@@ -0,0 +1,27 @@
+/* Tokenize UTF-8 string.
+ Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u8_strtok
+#define UNIT uint8_t
+#define U_STRSPN u8_strspn
+#define U_STRPBRK u8_strpbrk
+#include "u-strtok.h"
diff --git a/lib/unistr/u8-to-u16.c b/lib/unistr/u8-to-u16.c
new file mode 100644
index 0000000..03c6600
--- /dev/null
+++ b/lib/unistr/u8-to-u16.c
@@ -0,0 +1,136 @@
+/* Convert UTF-8 string to UTF-16 string.
+ Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u8_to_u16
+#define SRC_UNIT uint8_t
+#define DST_UNIT uint16_t
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+DST_UNIT *
+FUNC (const SRC_UNIT *s, size_t n, DST_UNIT *resultbuf, size_t *lengthp)
+{
+ const SRC_UNIT *s_end = s + n;
+ /* Output string accumulator. */
+ DST_UNIT *result;
+ size_t allocated;
+ size_t length;
+
+ if (resultbuf != NULL)
+ {
+ result = resultbuf;
+ allocated = *lengthp;
+ }
+ else
+ {
+ result = NULL;
+ allocated = 0;
+ }
+ length = 0;
+ /* Invariants:
+ result is either == resultbuf or == NULL or malloc-allocated.
+ If length > 0, then result != NULL. */
+
+ while (s < s_end)
+ {
+ ucs4_t uc;
+ int count;
+
+ /* Fetch a Unicode character from the input string. */
+ count = u8_mbtouc (&uc, s, s_end - s);
+ if (count < 0)
+ {
+ if (!(result == resultbuf || result == NULL))
+ free (result);
+ errno = EILSEQ;
+ return NULL;
+ }
+ s += count;
+
+ /* Store it in the output string. */
+ count = u16_uctomb (result + length, uc, allocated - length);
+ if (count == -1)
+ {
+ if (!(result == resultbuf || result == NULL))
+ free (result);
+ errno = EILSEQ;
+ return NULL;
+ }
+ if (count == -2)
+ {
+ DST_UNIT *memory;
+
+ allocated = (allocated > 0 ? 2 * allocated : 12);
+ if (length + 2 > allocated)
+ allocated = length + 2;
+ if (result == resultbuf || result == NULL)
+ memory = (DST_UNIT *) malloc (allocated * sizeof (DST_UNIT));
+ else
+ memory =
+ (DST_UNIT *) realloc (result, allocated * sizeof (DST_UNIT));
+
+ if (memory == NULL)
+ {
+ if (!(result == resultbuf || result == NULL))
+ free (result);
+ errno = ENOMEM;
+ return NULL;
+ }
+ if (result == resultbuf && length > 0)
+ memcpy ((char *) memory, (char *) result,
+ length * sizeof (DST_UNIT));
+ result = memory;
+ count = u16_uctomb (result + length, uc, allocated - length);
+ if (count < 0)
+ abort ();
+ }
+ length += count;
+ }
+
+ if (length == 0)
+ {
+ if (result == NULL)
+ {
+ /* Return a non-NULL value. NULL means error. */
+ result = (DST_UNIT *) malloc (1);
+ if (result == NULL)
+ {
+ errno = ENOMEM;
+ return NULL;
+ }
+ }
+ }
+ else if (result != resultbuf && length < allocated)
+ {
+ /* Shrink the allocated memory if possible. */
+ DST_UNIT *memory;
+
+ memory = (DST_UNIT *) realloc (result, length * sizeof (DST_UNIT));
+ if (memory != NULL)
+ result = memory;
+ }
+
+ *lengthp = length;
+ return result;
+}
diff --git a/lib/unistr/u8-to-u32.c b/lib/unistr/u8-to-u32.c
new file mode 100644
index 0000000..2a0ad39
--- /dev/null
+++ b/lib/unistr/u8-to-u32.c
@@ -0,0 +1,125 @@
+/* Convert UTF-8 string to UTF-32 string.
+ Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u8_to_u32
+#define SRC_UNIT uint8_t
+#define DST_UNIT uint32_t
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+DST_UNIT *
+FUNC (const SRC_UNIT *s, size_t n, DST_UNIT *resultbuf, size_t *lengthp)
+{
+ const SRC_UNIT *s_end = s + n;
+ /* Output string accumulator. */
+ DST_UNIT *result;
+ size_t allocated;
+ size_t length;
+
+ if (resultbuf != NULL)
+ {
+ result = resultbuf;
+ allocated = *lengthp;
+ }
+ else
+ {
+ result = NULL;
+ allocated = 0;
+ }
+ length = 0;
+ /* Invariants:
+ result is either == resultbuf or == NULL or malloc-allocated.
+ If length > 0, then result != NULL. */
+
+ while (s < s_end)
+ {
+ ucs4_t uc;
+ int count;
+
+ /* Fetch a Unicode character from the input string. */
+ count = u8_mbtouc (&uc, s, s_end - s);
+ if (count < 0)
+ {
+ if (!(result == resultbuf || result == NULL))
+ free (result);
+ errno = EILSEQ;
+ return NULL;
+ }
+ s += count;
+
+ /* Store it in the output string. */
+ if (length + 1 > allocated)
+ {
+ DST_UNIT *memory;
+
+ allocated = (allocated > 0 ? 2 * allocated : 12);
+ if (length + 1 > allocated)
+ allocated = length + 1;
+ if (result == resultbuf || result == NULL)
+ memory = (DST_UNIT *) malloc (allocated * sizeof (DST_UNIT));
+ else
+ memory =
+ (DST_UNIT *) realloc (result, allocated * sizeof (DST_UNIT));
+
+ if (memory == NULL)
+ {
+ if (!(result == resultbuf || result == NULL))
+ free (result);
+ errno = ENOMEM;
+ return NULL;
+ }
+ if (result == resultbuf && length > 0)
+ memcpy ((char *) memory, (char *) result,
+ length * sizeof (DST_UNIT));
+ result = memory;
+ }
+ result[length++] = uc;
+ }
+
+ if (length == 0)
+ {
+ if (result == NULL)
+ {
+ /* Return a non-NULL value. NULL means error. */
+ result = (DST_UNIT *) malloc (1);
+ if (result == NULL)
+ {
+ errno = ENOMEM;
+ return NULL;
+ }
+ }
+ }
+ else if (result != resultbuf && length < allocated)
+ {
+ /* Shrink the allocated memory if possible. */
+ DST_UNIT *memory;
+
+ memory = (DST_UNIT *) realloc (result, length * sizeof (DST_UNIT));
+ if (memory != NULL)
+ result = memory;
+ }
+
+ *lengthp = length;
+ return result;
+}
diff --git a/lib/unistr/u8-uctomb-aux.c b/lib/unistr/u8-uctomb-aux.c
new file mode 100644
index 0000000..c42fa50
--- /dev/null
+++ b/lib/unistr/u8-uctomb-aux.c
@@ -0,0 +1,69 @@
+/* Conversion UCS-4 to UTF-8.
+ Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u8_uctomb_aux (uint8_t *s, ucs4_t uc, int n)
+{
+ int count;
+
+ if (uc < 0x80)
+ /* The case n >= 1 is already handled by the caller. */
+ return -2;
+ else if (uc < 0x800)
+ count = 2;
+ else if (uc < 0x10000)
+ {
+ if (uc < 0xd800 || uc >= 0xe000)
+ count = 3;
+ else
+ return -1;
+ }
+#if 0
+ else if (uc < 0x200000)
+ count = 4;
+ else if (uc < 0x4000000)
+ count = 5;
+ else if (uc <= 0x7fffffff)
+ count = 6;
+#else
+ else if (uc < 0x110000)
+ count = 4;
+#endif
+ else
+ return -1;
+
+ if (n < count)
+ return -2;
+
+ switch (count) /* note: code falls through cases! */
+ {
+#if 0
+ case 6: s[5] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x4000000;
+ case 5: s[4] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x200000;
+#endif
+ case 4: s[3] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x10000;
+ case 3: s[2] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x800;
+ case 2: s[1] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0xc0;
+ /*case 1:*/ s[0] = uc;
+ }
+ return count;
+}
diff --git a/lib/unistr/u8-uctomb.c b/lib/unistr/u8-uctomb.c
new file mode 100644
index 0000000..3392166
--- /dev/null
+++ b/lib/unistr/u8-uctomb.c
@@ -0,0 +1,88 @@
+/* Store a character in UTF-8 string.
+ Copyright (C) 2002, 2005-2006, 2009 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#if defined IN_LIBUNISTRING
+/* Tell unistr.h to declare u8_uctomb as 'extern', not 'static inline'. */
+# include "unistring-notinline.h"
+#endif
+
+/* Specification. */
+#include "unistr.h"
+
+#if !HAVE_INLINE
+
+int
+u8_uctomb (uint8_t *s, ucs4_t uc, int n)
+{
+ if (uc < 0x80)
+ {
+ if (n > 0)
+ {
+ s[0] = uc;
+ return 1;
+ }
+ /* else return -2, below. */
+ }
+ else
+ {
+ int count;
+
+ if (uc < 0x800)
+ count = 2;
+ else if (uc < 0x10000)
+ {
+ if (uc < 0xd800 || uc >= 0xe000)
+ count = 3;
+ else
+ return -1;
+ }
+#if 0
+ else if (uc < 0x200000)
+ count = 4;
+ else if (uc < 0x4000000)
+ count = 5;
+ else if (uc <= 0x7fffffff)
+ count = 6;
+#else
+ else if (uc < 0x110000)
+ count = 4;
+#endif
+ else
+ return -1;
+
+ if (n >= count)
+ {
+ switch (count) /* note: code falls through cases! */
+ {
+#if 0
+ case 6: s[5] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x4000000;
+ case 5: s[4] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x200000;
+#endif
+ case 4: s[3] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x10000;
+ case 3: s[2] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x800;
+ case 2: s[1] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0xc0;
+ /*case 1:*/ s[0] = uc;
+ }
+ return count;
+ }
+ }
+ return -2;
+}
+
+#endif