diff options
author | Manuel A. Fernandez Montecelo <manuel.montezelo@gmail.com> | 2016-05-27 14:35:40 +0100 |
---|---|---|
committer | Manuel A. Fernandez Montecelo <manuel.montezelo@gmail.com> | 2016-05-27 14:35:40 +0100 |
commit | b1de003dac299705a7f01c997d2b866bafe39926 (patch) | |
tree | 1cc16a3877e945116387a380f7f3023f81fa36e4 /lib/unistr | |
parent | 752fd7247bc223bcea35bd89cf56d1c08ead9ba6 (diff) | |
parent | 3590c846d4c2febbc05b4ad6b14a06edc549e453 (diff) |
Merge tag 'upstream/0.9.6+really0.9.6'
Upstream version 0.9.6+really0.9.6
Diffstat (limited to 'lib/unistr')
151 files changed, 1145 insertions, 432 deletions
diff --git a/lib/unistr/u-cmp2.h b/lib/unistr/u-cmp2.h index 6058c4a..6ee062d 100644 --- a/lib/unistr/u-cmp2.h +++ b/lib/unistr/u-cmp2.h @@ -1,5 +1,5 @@ /* Compare pieces of UTF-8/UTF-16/UTF-32 strings. - Copyright (C) 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u-cpy-alloc.h b/lib/unistr/u-cpy-alloc.h index f36a8d0..5e3ad56 100644 --- a/lib/unistr/u-cpy-alloc.h +++ b/lib/unistr/u-cpy-alloc.h @@ -1,5 +1,5 @@ /* Copy piece of UTF-8/UTF-16/UTF-32 string. - Copyright (C) 1999, 2002, 2006-2007, 2009-2010 Free Software Foundation, + Copyright (C) 1999, 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. diff --git a/lib/unistr/u-cpy.h b/lib/unistr/u-cpy.h index 6dad952..2f9f997 100644 --- a/lib/unistr/u-cpy.h +++ b/lib/unistr/u-cpy.h @@ -1,5 +1,5 @@ /* Copy piece of UTF-8/UTF-16/UTF-32 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u-endswith.h b/lib/unistr/u-endswith.h index 094a87f..cd366c1 100644 --- a/lib/unistr/u-endswith.h +++ b/lib/unistr/u-endswith.h @@ -1,5 +1,5 @@ /* Substring test for UTF-8/UTF-16/UTF-32 strings. - Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u-move.h b/lib/unistr/u-move.h index d8d58b0..13332a0 100644 --- a/lib/unistr/u-move.h +++ b/lib/unistr/u-move.h @@ -1,5 +1,5 @@ /* Copy piece of UTF-8/UTF-16/UTF-32 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u-set.h b/lib/unistr/u-set.h index de78a8e..8cb2737 100644 --- a/lib/unistr/u-set.h +++ b/lib/unistr/u-set.h @@ -1,5 +1,5 @@ /* Fill UTF-8/UTF-16/UTF-32 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u-startswith.h b/lib/unistr/u-startswith.h index 1696651..393dad5 100644 --- a/lib/unistr/u-startswith.h +++ b/lib/unistr/u-startswith.h @@ -1,5 +1,5 @@ /* Substring test for UTF-8/UTF-16/UTF-32 strings. - Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u-stpcpy.h b/lib/unistr/u-stpcpy.h index 483f3c2..bbb3623 100644 --- a/lib/unistr/u-stpcpy.h +++ b/lib/unistr/u-stpcpy.h @@ -1,5 +1,5 @@ /* Copy UTF-8/UTF-16/UTF-32 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u-stpncpy.h b/lib/unistr/u-stpncpy.h index 4d6dd3c..545e102 100644 --- a/lib/unistr/u-stpncpy.h +++ b/lib/unistr/u-stpncpy.h @@ -1,5 +1,5 @@ /* Copy UTF-8/UTF-16/UTF-32 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u-strcat.h b/lib/unistr/u-strcat.h index 84430fc..4119d39 100644 --- a/lib/unistr/u-strcat.h +++ b/lib/unistr/u-strcat.h @@ -1,5 +1,5 @@ /* Concatenate UTF-8/UTF-16/UTF-32 strings. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u-strcoll.h b/lib/unistr/u-strcoll.h index 9ec5c60..014e11b 100644 --- a/lib/unistr/u-strcoll.h +++ b/lib/unistr/u-strcoll.h @@ -1,6 +1,6 @@ /* Compare UTF-8/UTF-16/UTF-32 strings using the collation rules of the current locale. - Copyright (C) 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This program is free software: you can redistribute it and/or modify it @@ -23,14 +23,19 @@ FUNC (const UNIT *s1, const UNIT *s2) When it fails, it sets errno, but also returns a meaningful return value, for the sake of callers which ignore errno. */ int final_errno = errno; + const char *encoding = locale_charset (); char *sl1; char *sl2; int result; - sl1 = U_STRCONV_TO_LOCALE (s1); + /* Pass iconveh_error here, not iconveh_question_mark. Otherwise the + conversion to locale encoding can do transliteration or map some + characters to question marks, leading to results that depend on the + iconv() implementation and are not obvious. */ + sl1 = U_STRCONV_TO_ENCODING (s1, encoding, iconveh_error); if (sl1 != NULL) { - sl2 = U_STRCONV_TO_LOCALE (s2); + sl2 = U_STRCONV_TO_ENCODING (s2, encoding, iconveh_error); if (sl2 != NULL) { /* Compare sl1 and sl2. */ @@ -41,10 +46,10 @@ FUNC (const UNIT *s1, const UNIT *s2) /* strcoll succeeded. */ free (sl1); free (sl2); - /* The conversion to locale encoding can do transliteration or - map some characters to question marks. Therefore sl1 and sl2 - may be equal when s1 and s2 were in fact different. Return a - nonzero result in this case. */ + /* The conversion to locale encoding can drop Unicode TAG + characters. Therefore sl1 and sl2 may be equal when s1 + and s2 were in fact different. Return a nonzero result + in this case. */ if (result == 0) result = U_STRCMP (s1, s2); } @@ -68,7 +73,7 @@ FUNC (const UNIT *s1, const UNIT *s2) else { final_errno = errno; - sl2 = U_STRCONV_TO_LOCALE (s2); + sl2 = U_STRCONV_TO_ENCODING (s2, encoding, iconveh_error); if (sl2 != NULL) { /* s2 could be converted to locale encoding, s1 not. */ diff --git a/lib/unistr/u-strcpy.h b/lib/unistr/u-strcpy.h index b059aea..aca6847 100644 --- a/lib/unistr/u-strcpy.h +++ b/lib/unistr/u-strcpy.h @@ -1,5 +1,5 @@ /* Copy UTF-8/UTF-16/UTF-32 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u-strcspn.h b/lib/unistr/u-strcspn.h index 77b9550..bb287d8 100644 --- a/lib/unistr/u-strcspn.h +++ b/lib/unistr/u-strcspn.h @@ -1,5 +1,5 @@ /* Search for some characters in UTF-8/UTF-16/UTF-32 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u-strdup.h b/lib/unistr/u-strdup.h index a92e1ef..80cb454 100644 --- a/lib/unistr/u-strdup.h +++ b/lib/unistr/u-strdup.h @@ -1,5 +1,5 @@ /* Copy UTF-8/UTF-16/UTF-32 string. - Copyright (C) 1999, 2002, 2006-2007, 2009-2010 Free Software Foundation, + Copyright (C) 1999, 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. diff --git a/lib/unistr/u-strlen.h b/lib/unistr/u-strlen.h index a54d614..11d9831 100644 --- a/lib/unistr/u-strlen.h +++ b/lib/unistr/u-strlen.h @@ -1,5 +1,5 @@ /* Determine length of UTF-8/UTF-16/UTF-32 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u-strncat.h b/lib/unistr/u-strncat.h index e5c9a04..f202d32 100644 --- a/lib/unistr/u-strncat.h +++ b/lib/unistr/u-strncat.h @@ -1,5 +1,5 @@ /* Concatenate UTF-8/UTF-16/UTF-32 strings. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u-strncpy.h b/lib/unistr/u-strncpy.h index 55a4f67..8354d0c 100644 --- a/lib/unistr/u-strncpy.h +++ b/lib/unistr/u-strncpy.h @@ -1,5 +1,5 @@ /* Copy UTF-8/UTF-16/UTF-32 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u-strnlen.h b/lib/unistr/u-strnlen.h index 9bcc3da..bcf1e03 100644 --- a/lib/unistr/u-strnlen.h +++ b/lib/unistr/u-strnlen.h @@ -1,5 +1,5 @@ /* Determine bounded length of UTF-8/UTF-16/UTF-32 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u-strpbrk.h b/lib/unistr/u-strpbrk.h index 34aabc5..60653ab 100644 --- a/lib/unistr/u-strpbrk.h +++ b/lib/unistr/u-strpbrk.h @@ -1,5 +1,5 @@ /* Search for some characters in UTF-8/UTF-16/UTF-32 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u-strspn.h b/lib/unistr/u-strspn.h index d6669af..d15c46e 100644 --- a/lib/unistr/u-strspn.h +++ b/lib/unistr/u-strspn.h @@ -1,5 +1,5 @@ /* Search for some characters in UTF-8/UTF-16/UTF-32 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u-strstr.h b/lib/unistr/u-strstr.h index df32be8..459215e 100644 --- a/lib/unistr/u-strstr.h +++ b/lib/unistr/u-strstr.h @@ -1,6 +1,6 @@ /* Substring test for UTF-8/UTF-16/UTF-32 strings. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. - Written by Bruno Haible <bruno@clisp.org>, 2002. + Copyright (C) 1999, 2002, 2006, 2010-2015 Free Software Foundation, Inc. + Written by Bruno Haible <bruno@clisp.org>, 2002, 2005. This program is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published @@ -24,26 +24,108 @@ FUNC (const UNIT *haystack, const UNIT *needle) if (first == 0) return (UNIT *) haystack; - /* Is needle nearly empty? */ + /* Is needle nearly empty (only one unit)? */ if (needle[1] == 0) return U_STRCHR (haystack, first); - /* Search for needle's first unit. */ - for (; *haystack != 0; haystack++) - if (*haystack == first) +#ifdef U_STRMBTOUC + /* Is needle nearly empty (only one character)? */ + { + ucs4_t first_uc; + int count = U_STRMBTOUC (&first_uc, needle); + if (count > 0 && needle[count] == 0) + return U_STRCHR (haystack, first_uc); + } +#endif + +#if UNIT_IS_UINT8_T + return (uint8_t *) strstr ((const char *) haystack, (const char *) needle); +#else + { + /* Minimizing the worst-case complexity: + Let n = U_STRLEN(haystack), m = U_STRLEN(needle). + The naïve algorithm is O(n*m) worst-case. + The Knuth-Morris-Pratt algorithm is O(n) worst-case but it needs a + memory allocation. + To achieve linear complexity and yet amortize the cost of the + memory allocation, we activate the Knuth-Morris-Pratt algorithm + only once the naïve algorithm has already run for some time; more + precisely, when + - the outer loop count is >= 10, + - the average number of comparisons per outer loop is >= 5, + - the total number of comparisons is >= m. + But we try it only once. If the memory allocation attempt failed, + we don't retry it. */ + bool try_kmp = true; + size_t outer_loop_count = 0; + size_t comparison_count = 0; + size_t last_ccount = 0; /* last comparison count */ + const UNIT *needle_last_ccount = needle; /* = needle + last_ccount */ + + /* Speed up the following searches of needle by caching its first + character. */ + UNIT b = *needle++; + + for (;; haystack++) { - /* Compare with needle's remaining units. */ - const UNIT *hptr = haystack + 1; - const UNIT *nptr = needle + 1; - for (;;) + if (*haystack == 0) + /* No match. */ + return NULL; + + /* See whether it's advisable to use an asymptotically faster + algorithm. */ + if (try_kmp + && outer_loop_count >= 10 + && comparison_count >= 5 * outer_loop_count) { - if (*hptr != *nptr) - break; - hptr++; nptr++; - if (*nptr == 0) - return (UNIT *) haystack; + /* See if needle + comparison_count now reaches the end of + needle. */ + if (needle_last_ccount != NULL) + { + needle_last_ccount += + U_STRNLEN (needle_last_ccount, + comparison_count - last_ccount); + if (*needle_last_ccount == 0) + needle_last_ccount = NULL; + last_ccount = comparison_count; + } + if (needle_last_ccount == NULL) + { + /* Try the Knuth-Morris-Pratt algorithm. */ + const UNIT *result; + bool success = + knuth_morris_pratt (haystack, + needle - 1, U_STRLEN (needle - 1), + &result); + if (success) + return (UNIT *) result; + try_kmp = false; + } } - } - return NULL; + outer_loop_count++; + comparison_count++; + if (*haystack == b) + /* The first character matches. */ + { + const UNIT *rhaystack = haystack + 1; + const UNIT *rneedle = needle; + + for (;; rhaystack++, rneedle++) + { + if (*rneedle == 0) + /* Found a match. */ + return (UNIT *) haystack; + if (*rhaystack == 0) + /* No match. */ + return NULL; + comparison_count++; + if (*rhaystack != *rneedle) + /* Nothing in this round. */ + break; + } + } + } + } +#endif } diff --git a/lib/unistr/u-strtok.h b/lib/unistr/u-strtok.h index 7fdef57..edafa1b 100644 --- a/lib/unistr/u-strtok.h +++ b/lib/unistr/u-strtok.h @@ -1,5 +1,5 @@ /* Tokenize UTF-8/UTF-16/UTF-32 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it @@ -40,9 +40,9 @@ FUNC (UNIT *str, const UNIT *delim, UNIT **ptr) UNIT *token_end = U_STRPBRK (str, delim); if (token_end) { + *ptr = token_end + U_STRMBLEN (token_end); /* NUL-terminate the token. */ *token_end = 0; - *ptr = token_end + 1; } else *ptr = NULL; diff --git a/lib/unistr/u16-check.c b/lib/unistr/u16-check.c index 4f18383..ef6e334 100644 --- a/lib/unistr/u16-check.c +++ b/lib/unistr/u16-check.c @@ -1,5 +1,5 @@ /* Check UTF-16 string. - Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-chr.c b/lib/unistr/u16-chr.c index 13deef4..cbf8dd2 100644 --- a/lib/unistr/u16-chr.c +++ b/lib/unistr/u16-chr.c @@ -1,5 +1,5 @@ /* Search character in piece of UTF-16 string. - Copyright (C) 1999, 2002, 2006-2007, 2009-2010 Free Software Foundation, + Copyright (C) 1999, 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. diff --git a/lib/unistr/u16-cmp.c b/lib/unistr/u16-cmp.c index b71c1c3..01059f8 100644 --- a/lib/unistr/u16-cmp.c +++ b/lib/unistr/u16-cmp.c @@ -1,5 +1,5 @@ /* Compare pieces of UTF-16 strings. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-cmp2.c b/lib/unistr/u16-cmp2.c index 8c2a8d6..687356f 100644 --- a/lib/unistr/u16-cmp2.c +++ b/lib/unistr/u16-cmp2.c @@ -1,5 +1,5 @@ /* Compare pieces of UTF-16 strings. - Copyright (C) 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-cpy-alloc.c b/lib/unistr/u16-cpy-alloc.c index 33984f9..567855d 100644 --- a/lib/unistr/u16-cpy-alloc.c +++ b/lib/unistr/u16-cpy-alloc.c @@ -1,5 +1,5 @@ /* Copy piece of UTF-16 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-cpy.c b/lib/unistr/u16-cpy.c index 614d24c..cdcf6af 100644 --- a/lib/unistr/u16-cpy.c +++ b/lib/unistr/u16-cpy.c @@ -1,5 +1,5 @@ /* Copy piece of UTF-16 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-endswith.c b/lib/unistr/u16-endswith.c index 27dcd5a..ee7c392 100644 --- a/lib/unistr/u16-endswith.c +++ b/lib/unistr/u16-endswith.c @@ -1,5 +1,5 @@ /* Substring test for UTF-16 strings. - Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-mblen.c b/lib/unistr/u16-mblen.c index 9e7a93a..6fab7e8 100644 --- a/lib/unistr/u16-mblen.c +++ b/lib/unistr/u16-mblen.c @@ -1,5 +1,5 @@ /* Look at first character in UTF-16 string. - Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2010 Free Software + Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. diff --git a/lib/unistr/u16-mbsnlen.c b/lib/unistr/u16-mbsnlen.c index 601d81d..68f8d67 100644 --- a/lib/unistr/u16-mbsnlen.c +++ b/lib/unistr/u16-mbsnlen.c @@ -1,5 +1,5 @@ /* Count characters in UTF-16 string. - Copyright (C) 2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2007. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-mbtouc-aux.c b/lib/unistr/u16-mbtouc-aux.c index bee77fc..e4fb94b 100644 --- a/lib/unistr/u16-mbtouc-aux.c +++ b/lib/unistr/u16-mbtouc-aux.c @@ -1,5 +1,5 @@ /* Conversion UTF-16 to UCS-4. - Copyright (C) 2001-2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2001-2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2001. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-mbtouc-unsafe-aux.c b/lib/unistr/u16-mbtouc-unsafe-aux.c index f2d7225..d616096 100644 --- a/lib/unistr/u16-mbtouc-unsafe-aux.c +++ b/lib/unistr/u16-mbtouc-unsafe-aux.c @@ -1,5 +1,5 @@ /* Conversion UTF-16 to UCS-4. - Copyright (C) 2001-2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2001-2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2001. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-mbtouc-unsafe.c b/lib/unistr/u16-mbtouc-unsafe.c index a5a3638..fef8911 100644 --- a/lib/unistr/u16-mbtouc-unsafe.c +++ b/lib/unistr/u16-mbtouc-unsafe.c @@ -1,5 +1,5 @@ /* Look at first character in UTF-16 string. - Copyright (C) 1999-2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999-2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2001. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-mbtouc.c b/lib/unistr/u16-mbtouc.c index 26b6089..7855ed4 100644 --- a/lib/unistr/u16-mbtouc.c +++ b/lib/unistr/u16-mbtouc.c @@ -1,5 +1,5 @@ /* Look at first character in UTF-16 string. - Copyright (C) 1999-2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999-2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2001. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-mbtoucr.c b/lib/unistr/u16-mbtoucr.c index 00b7b70..65b33b7 100644 --- a/lib/unistr/u16-mbtoucr.c +++ b/lib/unistr/u16-mbtoucr.c @@ -1,5 +1,5 @@ /* Look at first character in UTF-16 string, returning an error code. - Copyright (C) 1999-2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999-2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2001. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-move.c b/lib/unistr/u16-move.c index f6efb88..39a6fdd 100644 --- a/lib/unistr/u16-move.c +++ b/lib/unistr/u16-move.c @@ -1,5 +1,5 @@ /* Copy piece of UTF-16 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-next.c b/lib/unistr/u16-next.c index 8245f11..1621b68 100644 --- a/lib/unistr/u16-next.c +++ b/lib/unistr/u16-next.c @@ -1,5 +1,5 @@ /* Iterate over next character in UTF-16 string. - Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-prev.c b/lib/unistr/u16-prev.c index 4c902ad..e1a6854 100644 --- a/lib/unistr/u16-prev.c +++ b/lib/unistr/u16-prev.c @@ -1,5 +1,5 @@ /* Iterate over previous character in UTF-16 string. - Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-set.c b/lib/unistr/u16-set.c index 7bad3d6..58644b4 100644 --- a/lib/unistr/u16-set.c +++ b/lib/unistr/u16-set.c @@ -1,5 +1,5 @@ /* Fill UTF-16 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-startswith.c b/lib/unistr/u16-startswith.c index 22a5d5f..feb1ebd 100644 --- a/lib/unistr/u16-startswith.c +++ b/lib/unistr/u16-startswith.c @@ -1,5 +1,5 @@ /* Substring test for UTF-16 strings. - Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-stpcpy.c b/lib/unistr/u16-stpcpy.c index 59467f7..d49737f 100644 --- a/lib/unistr/u16-stpcpy.c +++ b/lib/unistr/u16-stpcpy.c @@ -1,5 +1,5 @@ /* Copy UTF-16 string. - Copyright (C) 1999, 2002, 2006-2007, 2009-2010 Free Software Foundation, + Copyright (C) 1999, 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. diff --git a/lib/unistr/u16-stpncpy.c b/lib/unistr/u16-stpncpy.c index 49c616e..315dad7 100644 --- a/lib/unistr/u16-stpncpy.c +++ b/lib/unistr/u16-stpncpy.c @@ -1,5 +1,5 @@ /* Copy UTF-16 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-strcat.c b/lib/unistr/u16-strcat.c index 8b35868..9d933e1 100644 --- a/lib/unistr/u16-strcat.c +++ b/lib/unistr/u16-strcat.c @@ -1,5 +1,5 @@ /* Concatenate UTF-16 strings. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-strchr.c b/lib/unistr/u16-strchr.c index 1f8719f..c89c174 100644 --- a/lib/unistr/u16-strchr.c +++ b/lib/unistr/u16-strchr.c @@ -1,5 +1,5 @@ /* Search character in UTF-16 string. - Copyright (C) 1999, 2002, 2006-2007, 2009-2010 Free Software Foundation, + Copyright (C) 1999, 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. diff --git a/lib/unistr/u16-strcmp.c b/lib/unistr/u16-strcmp.c index b781211..f9033ef 100644 --- a/lib/unistr/u16-strcmp.c +++ b/lib/unistr/u16-strcmp.c @@ -1,5 +1,5 @@ /* Compare UTF-16 strings. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-strcoll.c b/lib/unistr/u16-strcoll.c index 5a504bf..2a6d757 100644 --- a/lib/unistr/u16-strcoll.c +++ b/lib/unistr/u16-strcoll.c @@ -1,5 +1,5 @@ /* Compare UTF-16 strings using the collation rules of the current locale. - Copyright (C) 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This program is free software: you can redistribute it and/or modify it @@ -29,5 +29,5 @@ #define FUNC u16_strcoll #define UNIT uint16_t #define U_STRCMP u16_strcmp -#define U_STRCONV_TO_LOCALE u16_strconv_to_locale +#define U_STRCONV_TO_ENCODING u16_strconv_to_encoding #include "u-strcoll.h" diff --git a/lib/unistr/u16-strcpy.c b/lib/unistr/u16-strcpy.c index ecde75d..5deb766 100644 --- a/lib/unistr/u16-strcpy.c +++ b/lib/unistr/u16-strcpy.c @@ -1,5 +1,5 @@ /* Copy UTF-16 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-strcspn.c b/lib/unistr/u16-strcspn.c index b18bb37..52309b3 100644 --- a/lib/unistr/u16-strcspn.c +++ b/lib/unistr/u16-strcspn.c @@ -1,5 +1,5 @@ /* Search for some characters in UTF-16 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-strdup.c b/lib/unistr/u16-strdup.c index 4a94451..61144d7 100644 --- a/lib/unistr/u16-strdup.c +++ b/lib/unistr/u16-strdup.c @@ -1,5 +1,5 @@ /* Copy UTF-16 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-strlen.c b/lib/unistr/u16-strlen.c index da613c2..6a96c91 100644 --- a/lib/unistr/u16-strlen.c +++ b/lib/unistr/u16-strlen.c @@ -1,5 +1,5 @@ /* Determine length of UTF-16 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-strmblen.c b/lib/unistr/u16-strmblen.c index f07ca0b..1004ef1 100644 --- a/lib/unistr/u16-strmblen.c +++ b/lib/unistr/u16-strmblen.c @@ -1,5 +1,5 @@ /* Look at first character in UTF-16 string. - Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2010 Free Software + Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. diff --git a/lib/unistr/u16-strmbtouc.c b/lib/unistr/u16-strmbtouc.c index 9aa5d43..89661e7 100644 --- a/lib/unistr/u16-strmbtouc.c +++ b/lib/unistr/u16-strmbtouc.c @@ -1,5 +1,5 @@ /* Look at first character in UTF-16 string. - Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2010 Free Software + Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. diff --git a/lib/unistr/u16-strncat.c b/lib/unistr/u16-strncat.c index 7082716..cf1c4f9 100644 --- a/lib/unistr/u16-strncat.c +++ b/lib/unistr/u16-strncat.c @@ -1,5 +1,5 @@ /* Concatenate UTF-16 strings. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-strncmp.c b/lib/unistr/u16-strncmp.c index 3ed59c3..c76e7b7 100644 --- a/lib/unistr/u16-strncmp.c +++ b/lib/unistr/u16-strncmp.c @@ -1,5 +1,5 @@ /* Compare UTF-16 strings. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-strncpy.c b/lib/unistr/u16-strncpy.c index bd2eb6f..da7ebb8 100644 --- a/lib/unistr/u16-strncpy.c +++ b/lib/unistr/u16-strncpy.c @@ -1,5 +1,5 @@ /* Copy UTF-16 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-strnlen.c b/lib/unistr/u16-strnlen.c index 49e6d58..4940e02 100644 --- a/lib/unistr/u16-strnlen.c +++ b/lib/unistr/u16-strnlen.c @@ -1,5 +1,5 @@ /* Determine bounded length of UTF-16 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-strpbrk.c b/lib/unistr/u16-strpbrk.c index 45353fa..60ec122 100644 --- a/lib/unistr/u16-strpbrk.c +++ b/lib/unistr/u16-strpbrk.c @@ -1,5 +1,5 @@ /* Search for some characters in UTF-16 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-strrchr.c b/lib/unistr/u16-strrchr.c index 3cf3de2..3179a8d 100644 --- a/lib/unistr/u16-strrchr.c +++ b/lib/unistr/u16-strrchr.c @@ -1,5 +1,5 @@ /* Search character in UTF-16 string. - Copyright (C) 1999, 2002, 2006-2007, 2009-2010 Free Software Foundation, + Copyright (C) 1999, 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. diff --git a/lib/unistr/u16-strspn.c b/lib/unistr/u16-strspn.c index ab812eb..4a0194f 100644 --- a/lib/unistr/u16-strspn.c +++ b/lib/unistr/u16-strspn.c @@ -1,5 +1,5 @@ /* Search for some characters in UTF-16 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-strstr.c b/lib/unistr/u16-strstr.c index 54a74d6..5b87e81 100644 --- a/lib/unistr/u16-strstr.c +++ b/lib/unistr/u16-strstr.c @@ -1,5 +1,5 @@ /* Substring test for UTF-16 strings. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2010-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it @@ -20,9 +20,18 @@ /* Specification. */ #include "unistr.h" +#include "malloca.h" + /* FIXME: Maybe walking the string via u16_mblen is a win? */ -#define FUNC u16_strstr #define UNIT uint16_t + +#define CANON_ELEMENT(c) c +#include "str-kmp.h" + +#define FUNC u16_strstr #define U_STRCHR u16_strchr +#define U_STRMBTOUC u16_strmbtouc +#define U_STRLEN u16_strlen +#define U_STRNLEN u16_strnlen #include "u-strstr.h" diff --git a/lib/unistr/u16-strtok.c b/lib/unistr/u16-strtok.c index f84c465..df36cf7 100644 --- a/lib/unistr/u16-strtok.c +++ b/lib/unistr/u16-strtok.c @@ -1,5 +1,5 @@ /* Tokenize UTF-16 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it @@ -24,4 +24,5 @@ #define UNIT uint16_t #define U_STRSPN u16_strspn #define U_STRPBRK u16_strpbrk +#define U_STRMBLEN u16_strmblen #include "u-strtok.h" diff --git a/lib/unistr/u16-to-u32.c b/lib/unistr/u16-to-u32.c index 3544cde..8acee50 100644 --- a/lib/unistr/u16-to-u32.c +++ b/lib/unistr/u16-to-u32.c @@ -1,5 +1,5 @@ /* Convert UTF-16 string to UTF-32 string. - Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-to-u8.c b/lib/unistr/u16-to-u8.c index f92cc93..3e93a94 100644 --- a/lib/unistr/u16-to-u8.c +++ b/lib/unistr/u16-to-u8.c @@ -1,5 +1,5 @@ /* Convert UTF-16 string to UTF-8 string. - Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-uctomb-aux.c b/lib/unistr/u16-uctomb-aux.c index e2acc9e..55d7a14 100644 --- a/lib/unistr/u16-uctomb-aux.c +++ b/lib/unistr/u16-uctomb-aux.c @@ -1,5 +1,5 @@ /* Conversion UCS-4 to UTF-16. - Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u16-uctomb.c b/lib/unistr/u16-uctomb.c index cbc1abc..b93fa6f 100644 --- a/lib/unistr/u16-uctomb.c +++ b/lib/unistr/u16-uctomb.c @@ -1,5 +1,5 @@ /* Store a character in UTF-16 string. - Copyright (C) 2002, 2005-2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2005-2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-check.c b/lib/unistr/u32-check.c index 8c5f518..53b722b 100644 --- a/lib/unistr/u32-check.c +++ b/lib/unistr/u32-check.c @@ -1,5 +1,5 @@ /* Check UTF-32 string. - Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-chr.c b/lib/unistr/u32-chr.c index 1900288..1470c22 100644 --- a/lib/unistr/u32-chr.c +++ b/lib/unistr/u32-chr.c @@ -1,5 +1,5 @@ /* Search character in piece of UTF-32 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-cmp.c b/lib/unistr/u32-cmp.c index 36496f7..7799e38 100644 --- a/lib/unistr/u32-cmp.c +++ b/lib/unistr/u32-cmp.c @@ -1,5 +1,5 @@ /* Compare pieces of UTF-32 strings. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-cmp2.c b/lib/unistr/u32-cmp2.c index 3247602..b4e0a1b 100644 --- a/lib/unistr/u32-cmp2.c +++ b/lib/unistr/u32-cmp2.c @@ -1,5 +1,5 @@ /* Compare pieces of UTF-32 strings. - Copyright (C) 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-cpy-alloc.c b/lib/unistr/u32-cpy-alloc.c index b9ebefe..3e94298 100644 --- a/lib/unistr/u32-cpy-alloc.c +++ b/lib/unistr/u32-cpy-alloc.c @@ -1,5 +1,5 @@ /* Copy piece of UTF-32 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-cpy.c b/lib/unistr/u32-cpy.c index 747430b..ab5a451 100644 --- a/lib/unistr/u32-cpy.c +++ b/lib/unistr/u32-cpy.c @@ -1,5 +1,5 @@ /* Copy piece of UTF-32 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-endswith.c b/lib/unistr/u32-endswith.c index cee0d93..3960c8a 100644 --- a/lib/unistr/u32-endswith.c +++ b/lib/unistr/u32-endswith.c @@ -1,5 +1,5 @@ /* Substring test for UTF-32 strings. - Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-mblen.c b/lib/unistr/u32-mblen.c index 4530fe4..45a9f49 100644 --- a/lib/unistr/u32-mblen.c +++ b/lib/unistr/u32-mblen.c @@ -1,5 +1,5 @@ /* Look at first character in UTF-32 string. - Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-mbsnlen.c b/lib/unistr/u32-mbsnlen.c index 0a71293..371959e 100644 --- a/lib/unistr/u32-mbsnlen.c +++ b/lib/unistr/u32-mbsnlen.c @@ -1,5 +1,5 @@ /* Count characters in UTF-32 string. - Copyright (C) 2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2007. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-mbtouc-unsafe.c b/lib/unistr/u32-mbtouc-unsafe.c index 8dbbfe4..3244811 100644 --- a/lib/unistr/u32-mbtouc-unsafe.c +++ b/lib/unistr/u32-mbtouc-unsafe.c @@ -1,5 +1,5 @@ /* Look at first character in UTF-32 string. - Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-mbtouc.c b/lib/unistr/u32-mbtouc.c index 05f7c77..5d4de51 100644 --- a/lib/unistr/u32-mbtouc.c +++ b/lib/unistr/u32-mbtouc.c @@ -1,5 +1,5 @@ /* Look at first character in UTF-32 string. - Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-mbtoucr.c b/lib/unistr/u32-mbtoucr.c index 83f7782..faa5695 100644 --- a/lib/unistr/u32-mbtoucr.c +++ b/lib/unistr/u32-mbtoucr.c @@ -1,5 +1,5 @@ /* Look at first character in UTF-32 string, returning an error code. - Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-move.c b/lib/unistr/u32-move.c index 6362d85..fc2aab1 100644 --- a/lib/unistr/u32-move.c +++ b/lib/unistr/u32-move.c @@ -1,5 +1,5 @@ /* Copy piece of UTF-32 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-next.c b/lib/unistr/u32-next.c index 987fb02..8be330e 100644 --- a/lib/unistr/u32-next.c +++ b/lib/unistr/u32-next.c @@ -1,5 +1,5 @@ /* Iterate over next character in UTF-32 string. - Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-prev.c b/lib/unistr/u32-prev.c index f20b1b7..127029d 100644 --- a/lib/unistr/u32-prev.c +++ b/lib/unistr/u32-prev.c @@ -1,5 +1,5 @@ /* Iterate over previous character in UTF-32 string. - Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-set.c b/lib/unistr/u32-set.c index 5c517cd..a60f5a8 100644 --- a/lib/unistr/u32-set.c +++ b/lib/unistr/u32-set.c @@ -1,5 +1,5 @@ /* Fill UTF-32 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-startswith.c b/lib/unistr/u32-startswith.c index 7fcb597..817310f 100644 --- a/lib/unistr/u32-startswith.c +++ b/lib/unistr/u32-startswith.c @@ -1,5 +1,5 @@ /* Substring test for UTF-32 strings. - Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-stpcpy.c b/lib/unistr/u32-stpcpy.c index 3cbfa92..fdd5f5e 100644 --- a/lib/unistr/u32-stpcpy.c +++ b/lib/unistr/u32-stpcpy.c @@ -1,5 +1,5 @@ /* Copy UTF-32 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-stpncpy.c b/lib/unistr/u32-stpncpy.c index e55891e..124101e 100644 --- a/lib/unistr/u32-stpncpy.c +++ b/lib/unistr/u32-stpncpy.c @@ -1,5 +1,5 @@ /* Copy UTF-32 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-strcat.c b/lib/unistr/u32-strcat.c index 4e26bb4..5e51c58 100644 --- a/lib/unistr/u32-strcat.c +++ b/lib/unistr/u32-strcat.c @@ -1,5 +1,5 @@ /* Concatenate UTF-32 strings. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-strchr.c b/lib/unistr/u32-strchr.c index b5df7b7..355ad27 100644 --- a/lib/unistr/u32-strchr.c +++ b/lib/unistr/u32-strchr.c @@ -1,5 +1,5 @@ /* Search character in UTF-32 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-strcmp.c b/lib/unistr/u32-strcmp.c index 5f336ce..16f9b1e 100644 --- a/lib/unistr/u32-strcmp.c +++ b/lib/unistr/u32-strcmp.c @@ -1,5 +1,5 @@ /* Compare UTF-32 strings. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-strcoll.c b/lib/unistr/u32-strcoll.c index 9748855..7865f4c 100644 --- a/lib/unistr/u32-strcoll.c +++ b/lib/unistr/u32-strcoll.c @@ -1,5 +1,5 @@ /* Compare UTF-32 strings using the collation rules of the current locale. - Copyright (C) 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This program is free software: you can redistribute it and/or modify it @@ -29,5 +29,5 @@ #define FUNC u32_strcoll #define UNIT uint32_t #define U_STRCMP u32_strcmp -#define U_STRCONV_TO_LOCALE u32_strconv_to_locale +#define U_STRCONV_TO_ENCODING u32_strconv_to_encoding #include "u-strcoll.h" diff --git a/lib/unistr/u32-strcpy.c b/lib/unistr/u32-strcpy.c index f3c70e6..9f325bc 100644 --- a/lib/unistr/u32-strcpy.c +++ b/lib/unistr/u32-strcpy.c @@ -1,5 +1,5 @@ /* Copy UTF-32 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-strcspn.c b/lib/unistr/u32-strcspn.c index 595d32e..963c732 100644 --- a/lib/unistr/u32-strcspn.c +++ b/lib/unistr/u32-strcspn.c @@ -1,5 +1,5 @@ /* Search for some characters in UTF-32 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-strdup.c b/lib/unistr/u32-strdup.c index da4cc5a..3af4622 100644 --- a/lib/unistr/u32-strdup.c +++ b/lib/unistr/u32-strdup.c @@ -1,5 +1,5 @@ /* Copy UTF-32 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-strlen.c b/lib/unistr/u32-strlen.c index 95d32ab..b87f55d 100644 --- a/lib/unistr/u32-strlen.c +++ b/lib/unistr/u32-strlen.c @@ -1,5 +1,5 @@ /* Determine length of UTF-32 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-strmblen.c b/lib/unistr/u32-strmblen.c index 54b78d7..362ea48 100644 --- a/lib/unistr/u32-strmblen.c +++ b/lib/unistr/u32-strmblen.c @@ -1,5 +1,5 @@ /* Look at first character in UTF-32 string. - Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-strmbtouc.c b/lib/unistr/u32-strmbtouc.c index 4c067b9..cfa89d0 100644 --- a/lib/unistr/u32-strmbtouc.c +++ b/lib/unistr/u32-strmbtouc.c @@ -1,5 +1,5 @@ /* Look at first character in UTF-32 string. - Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-strncat.c b/lib/unistr/u32-strncat.c index c9260b0..4758f4d 100644 --- a/lib/unistr/u32-strncat.c +++ b/lib/unistr/u32-strncat.c @@ -1,5 +1,5 @@ /* Concatenate UTF-32 strings. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-strncmp.c b/lib/unistr/u32-strncmp.c index dcbb935..f4222f6 100644 --- a/lib/unistr/u32-strncmp.c +++ b/lib/unistr/u32-strncmp.c @@ -1,5 +1,5 @@ /* Compare UTF-32 strings. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-strncpy.c b/lib/unistr/u32-strncpy.c index 9a54f97..1f4c31b 100644 --- a/lib/unistr/u32-strncpy.c +++ b/lib/unistr/u32-strncpy.c @@ -1,5 +1,5 @@ /* Copy UTF-32 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-strnlen.c b/lib/unistr/u32-strnlen.c index 8102ac7..df3601f 100644 --- a/lib/unistr/u32-strnlen.c +++ b/lib/unistr/u32-strnlen.c @@ -1,5 +1,5 @@ /* Determine bounded length of UTF-32 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-strpbrk.c b/lib/unistr/u32-strpbrk.c index 2c4b6bc..c9cca79 100644 --- a/lib/unistr/u32-strpbrk.c +++ b/lib/unistr/u32-strpbrk.c @@ -1,5 +1,5 @@ /* Search for some characters in UTF-32 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-strrchr.c b/lib/unistr/u32-strrchr.c index f0030e5..242d4b8 100644 --- a/lib/unistr/u32-strrchr.c +++ b/lib/unistr/u32-strrchr.c @@ -1,5 +1,5 @@ /* Search character in UTF-32 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-strspn.c b/lib/unistr/u32-strspn.c index e2571d5..2541e79 100644 --- a/lib/unistr/u32-strspn.c +++ b/lib/unistr/u32-strspn.c @@ -1,5 +1,5 @@ /* Search for some characters in UTF-32 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-strstr.c b/lib/unistr/u32-strstr.c index cad06fc..c8abe5b 100644 --- a/lib/unistr/u32-strstr.c +++ b/lib/unistr/u32-strstr.c @@ -1,5 +1,5 @@ /* Substring test for UTF-32 strings. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it @@ -20,7 +20,15 @@ /* Specification. */ #include "unistr.h" -#define FUNC u32_strstr +#include "malloca.h" + #define UNIT uint32_t + +#define CANON_ELEMENT(c) c +#include "str-kmp.h" + +#define FUNC u32_strstr #define U_STRCHR u32_strchr +#define U_STRLEN u32_strlen +#define U_STRNLEN u32_strnlen #include "u-strstr.h" diff --git a/lib/unistr/u32-strtok.c b/lib/unistr/u32-strtok.c index 067122f..f8ef999 100644 --- a/lib/unistr/u32-strtok.c +++ b/lib/unistr/u32-strtok.c @@ -1,5 +1,5 @@ /* Tokenize UTF-32 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it @@ -24,4 +24,5 @@ #define UNIT uint32_t #define U_STRSPN u32_strspn #define U_STRPBRK u32_strpbrk +#define U_STRMBLEN u32_strmblen #include "u-strtok.h" diff --git a/lib/unistr/u32-to-u16.c b/lib/unistr/u32-to-u16.c index be32770..e02e782 100644 --- a/lib/unistr/u32-to-u16.c +++ b/lib/unistr/u32-to-u16.c @@ -1,5 +1,5 @@ /* Convert UTF-32 string to UTF-16 string. - Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-to-u8.c b/lib/unistr/u32-to-u8.c index 36710d3..0c4f4cb 100644 --- a/lib/unistr/u32-to-u8.c +++ b/lib/unistr/u32-to-u8.c @@ -1,5 +1,5 @@ /* Convert UTF-32 string to UTF-8 string. - Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u32-uctomb.c b/lib/unistr/u32-uctomb.c index 3ac58b4..18b2f0d 100644 --- a/lib/unistr/u32-uctomb.c +++ b/lib/unistr/u32-uctomb.c @@ -1,5 +1,5 @@ /* Store a character in UTF-32 string. - Copyright (C) 2002, 2005-2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2005-2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u8-check.c b/lib/unistr/u8-check.c index 368042b..53ece76 100644 --- a/lib/unistr/u8-check.c +++ b/lib/unistr/u8-check.c @@ -1,5 +1,5 @@ /* Check UTF-8 string. - Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it @@ -27,7 +27,7 @@ u8_check (const uint8_t *s, size_t n) while (s < s_end) { - /* Keep in sync with unistr.h and utf8-ucs4.c. */ + /* Keep in sync with unistr.h and u8-mbtouc-aux.c. */ uint8_t c = *s; if (c < 0x80) diff --git a/lib/unistr/u8-chr.c b/lib/unistr/u8-chr.c index 435d1be..c7779d2 100644 --- a/lib/unistr/u8-chr.c +++ b/lib/unistr/u8-chr.c @@ -1,5 +1,5 @@ /* Search character in piece of UTF-8 string. - Copyright (C) 1999, 2002, 2006-2007, 2009-2010 Free Software Foundation, + Copyright (C) 1999, 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. @@ -21,68 +21,181 @@ /* Specification. */ #include "unistr.h" +#include <string.h> + uint8_t * u8_chr (const uint8_t *s, size_t n, ucs4_t uc) { - uint8_t c[6]; - if (uc < 0x80) { uint8_t c0 = uc; - for (; n > 0; s++, n--) - { - if (*s == c0) - return (uint8_t *) s; - } + return (uint8_t *) memchr ((const char *) s, c0, n); } - else - switch (u8_uctomb_aux (c, uc, 6)) + + { + uint8_t c[6]; + size_t uc_size; + uc_size = u8_uctomb_aux (c, uc, 6); + + if (n < uc_size) + return NULL; + + /* For multibyte character matching we use a Boyer-Moore like + algorithm that searches for the last byte, skipping multi-byte + jumps, and matches back from there. + + Instead of using a table as is usual for Boyer-Moore, we compare + the candidate last byte s[UC_SIZE-1] with each of the possible + bytes in the UTF-8 representation of UC. If the final byte does + not match, we will perform up to UC_SIZE comparisons per memory + load---but each comparison lets us skip one byte in the input! + + If the final byte matches, the "real" Boyer-Moore algorithm + is approximated. Instead, u8_chr just looks for other cN that + are equal to the final byte and uses those to try realigning to + another possible match. For example, when searching for 0xF0 + 0xAA 0xBB 0xAA it will always skip forward by two bytes, even if + the character in the string was for example 0xF1 0xAA 0xBB 0xAA. + The advantage of this scheme is that the skip count after a failed + match can be computed outside the loop, and that it keeps the + complexity low for a pretty rare case. In particular, since c[0] + is never between 0x80 and 0xBF, c[0] is never equal to c[UC_SIZE-1] + and this is optimal for two-byte UTF-8 characters. */ + switch (uc_size) { case 2: - if (n > 1) - { - uint8_t c0 = c[0]; - uint8_t c1 = c[1]; - - for (n--; n > 0; s++, n--) - { - if (*s == c0 && s[1] == c1) - return (uint8_t *) s; - } - } - break; + { + uint8_t c0 = c[0]; + uint8_t c1 = c[1]; + const uint8_t *end = s + n - 1; + + do + { + /* Here s < end. + Test whether s[0..1] == { c0, c1 }. */ + uint8_t s1 = s[1]; + if (s1 == c1) + { + if (*s == c0) + return (uint8_t *) s; + else + /* Skip the search at s + 1, because s[1] = c1 < c0. */ + s += 2; + } + else + { + if (s1 == c0) + s++; + else + /* Skip the search at s + 1, because s[1] != c0. */ + s += 2; + } + } + while (s < end); + break; + } case 3: - if (n > 2) - { - uint8_t c0 = c[0]; - uint8_t c1 = c[1]; - uint8_t c2 = c[2]; - - for (n -= 2; n > 0; s++, n--) - { - if (*s == c0 && s[1] == c1 && s[2] == c2) - return (uint8_t *) s; - } - } - break; + { + uint8_t c0 = c[0]; + uint8_t c1 = c[1]; + uint8_t c2 = c[2]; + const uint8_t *end = s + n - 2; + size_t skip; + + if (c2 == c1) + skip = 1; + else + skip = 3; + + do + { + /* Here s < end. + Test whether s[0..2] == { c0, c1, c2 }. */ + uint8_t s2 = s[2]; + if (s2 == c2) + { + if (s[1] == c1 && *s == c0) + return (uint8_t *) s; + else + /* If c2 != c1: + Skip the search at s + 1, because s[2] == c2 != c1. + Skip the search at s + 2, because s[2] == c2 < c0. */ + s += skip; + } + else + { + if (s2 == c1) + s++; + else if (s2 == c0) + /* Skip the search at s + 1, because s[2] != c1. */ + s += 2; + else + /* Skip the search at s + 1, because s[2] != c1. + Skip the search at s + 2, because s[2] != c0. */ + s += 3; + } + } + while (s < end); + break; + } case 4: - if (n > 3) - { - uint8_t c0 = c[0]; - uint8_t c1 = c[1]; - uint8_t c2 = c[2]; - uint8_t c3 = c[3]; - - for (n -= 3; n > 0; s++, n--) - { - if (*s == c0 && s[1] == c1 && s[2] == c2 && s[3] == c3) - return (uint8_t *) s; - } - } - break; + { + uint8_t c0 = c[0]; + uint8_t c1 = c[1]; + uint8_t c2 = c[2]; + uint8_t c3 = c[3]; + const uint8_t *end = s + n - 3; + size_t skip; + + if (c3 == c2) + skip = 1; + else if (c3 == c1) + skip = 2; + else + skip = 4; + + do + { + /* Here s < end. + Test whether s[0..3] == { c0, c1, c2, c3 }. */ + uint8_t s3 = s[3]; + if (s3 == c3) + { + if (s[2] == c2 && s[1] == c1 && *s == c0) + return (uint8_t *) s; + else + /* If c3 != c2: + Skip the search at s + 1, because s[3] == c3 != c2. + If c3 != c1: + Skip the search at s + 2, because s[3] == c3 != c1. + Skip the search at s + 3, because s[3] == c3 < c0. */ + s += skip; + } + else + { + if (s3 == c2) + s++; + else if (s3 == c1) + /* Skip the search at s + 1, because s[3] != c2. */ + s += 2; + else if (s3 == c0) + /* Skip the search at s + 1, because s[3] != c2. + Skip the search at s + 2, because s[3] != c1. */ + s += 3; + else + /* Skip the search at s + 1, because s[3] != c2. + Skip the search at s + 2, because s[3] != c1. + Skip the search at s + 3, because s[3] != c0. */ + s += 4; + } + } + while (s < end); + break; + } } - return NULL; + return NULL; + } } diff --git a/lib/unistr/u8-cmp.c b/lib/unistr/u8-cmp.c index d021b1a..402de81 100644 --- a/lib/unistr/u8-cmp.c +++ b/lib/unistr/u8-cmp.c @@ -1,5 +1,5 @@ /* Compare pieces of UTF-8 strings. - Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u8-cmp2.c b/lib/unistr/u8-cmp2.c index 8a97db4..dbbc8ed 100644 --- a/lib/unistr/u8-cmp2.c +++ b/lib/unistr/u8-cmp2.c @@ -1,5 +1,5 @@ /* Compare pieces of UTF-8 strings. - Copyright (C) 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u8-cpy-alloc.c b/lib/unistr/u8-cpy-alloc.c index 1884516..1e3910a 100644 --- a/lib/unistr/u8-cpy-alloc.c +++ b/lib/unistr/u8-cpy-alloc.c @@ -1,5 +1,5 @@ /* Copy piece of UTF-8 string. - Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u8-cpy.c b/lib/unistr/u8-cpy.c index 2f29200..683200c 100644 --- a/lib/unistr/u8-cpy.c +++ b/lib/unistr/u8-cpy.c @@ -1,5 +1,5 @@ /* Copy piece of UTF-8 string. - Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u8-endswith.c b/lib/unistr/u8-endswith.c index 833b3e1..a835dc4 100644 --- a/lib/unistr/u8-endswith.c +++ b/lib/unistr/u8-endswith.c @@ -1,5 +1,5 @@ /* Substring test for UTF-8 strings. - Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u8-mblen.c b/lib/unistr/u8-mblen.c index f6a2027..131149b 100644 --- a/lib/unistr/u8-mblen.c +++ b/lib/unistr/u8-mblen.c @@ -1,5 +1,5 @@ /* Look at first character in UTF-8 string. - Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2010 Free Software + Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. @@ -26,7 +26,7 @@ u8_mblen (const uint8_t *s, size_t n) { if (n > 0) { - /* Keep in sync with unistr.h and utf8-ucs4.c. */ + /* Keep in sync with unistr.h and u8-mbtouc-aux.c. */ uint8_t c = *s; if (c < 0x80) diff --git a/lib/unistr/u8-mbsnlen.c b/lib/unistr/u8-mbsnlen.c index 9ddc42e..ce13c38 100644 --- a/lib/unistr/u8-mbsnlen.c +++ b/lib/unistr/u8-mbsnlen.c @@ -1,5 +1,5 @@ /* Count characters in UTF-8 string. - Copyright (C) 2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2007. This program is free software: you can redistribute it and/or modify it @@ -33,7 +33,9 @@ u8_mbsnlen (const uint8_t *s, size_t n) characters++; if (count == -2) break; - if (count <= 0) + if (count < 0) + count = u8_mbtouc (&uc, s, n); + else if (count == 0) count = 1; s += count; n -= count; diff --git a/lib/unistr/u8-mbtouc-aux.c b/lib/unistr/u8-mbtouc-aux.c index c997589..834725f 100644 --- a/lib/unistr/u8-mbtouc-aux.c +++ b/lib/unistr/u8-mbtouc-aux.c @@ -1,5 +1,5 @@ /* Conversion UTF-8 to UCS-4. - Copyright (C) 2001-2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2001-2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2001. This program is free software: you can redistribute it and/or modify it @@ -45,21 +45,32 @@ u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n) { /* incomplete multibyte character */ *puc = 0xfffd; - return n; + return 1; } } else if (c < 0xf0) { if (n >= 3) { - if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 - && (c >= 0xe1 || s[1] >= 0xa0) - && (c != 0xed || s[1] < 0xa0)) + if ((s[1] ^ 0x80) < 0x40) { - *puc = ((unsigned int) (c & 0x0f) << 12) - | ((unsigned int) (s[1] ^ 0x80) << 6) - | (unsigned int) (s[2] ^ 0x80); - return 3; + if ((s[2] ^ 0x80) < 0x40) + { + if ((c >= 0xe1 || s[1] >= 0xa0) + && (c != 0xed || s[1] < 0xa0)) + { + *puc = ((unsigned int) (c & 0x0f) << 12) + | ((unsigned int) (s[1] ^ 0x80) << 6) + | (unsigned int) (s[2] ^ 0x80); + return 3; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 3; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 2; } /* invalid multibyte character */ } @@ -67,26 +78,45 @@ u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n) { /* incomplete multibyte character */ *puc = 0xfffd; - return n; + if (n == 1 || (s[1] ^ 0x80) >= 0x40) + return 1; + else + return 2; } } else if (c < 0xf8) { if (n >= 4) { - if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 - && (s[3] ^ 0x80) < 0x40 - && (c >= 0xf1 || s[1] >= 0x90) + if ((s[1] ^ 0x80) < 0x40) + { + if ((s[2] ^ 0x80) < 0x40) + { + if ((s[3] ^ 0x80) < 0x40) + { + if ((c >= 0xf1 || s[1] >= 0x90) #if 1 - && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)) + && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)) #endif - ) - { - *puc = ((unsigned int) (c & 0x07) << 18) - | ((unsigned int) (s[1] ^ 0x80) << 12) - | ((unsigned int) (s[2] ^ 0x80) << 6) - | (unsigned int) (s[3] ^ 0x80); - return 4; + ) + { + *puc = ((unsigned int) (c & 0x07) << 18) + | ((unsigned int) (s[1] ^ 0x80) << 12) + | ((unsigned int) (s[2] ^ 0x80) << 6) + | (unsigned int) (s[3] ^ 0x80); + return 4; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 4; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 3; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 2; } /* invalid multibyte character */ } @@ -94,7 +124,12 @@ u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n) { /* incomplete multibyte character */ *puc = 0xfffd; - return n; + if (n == 1 || (s[1] ^ 0x80) >= 0x40) + return 1; + else if (n == 2 || (s[2] ^ 0x80) >= 0x40) + return 2; + else + return 3; } } #if 0 @@ -102,16 +137,37 @@ u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n) { if (n >= 5) { - if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 - && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 - && (c >= 0xf9 || s[1] >= 0x88)) + if ((s[1] ^ 0x80) < 0x40) { - *puc = ((unsigned int) (c & 0x03) << 24) - | ((unsigned int) (s[1] ^ 0x80) << 18) - | ((unsigned int) (s[2] ^ 0x80) << 12) - | ((unsigned int) (s[3] ^ 0x80) << 6) - | (unsigned int) (s[4] ^ 0x80); - return 5; + if ((s[2] ^ 0x80) < 0x40) + { + if ((s[3] ^ 0x80) < 0x40) + { + if ((s[4] ^ 0x80) < 0x40) + { + if (c >= 0xf9 || s[1] >= 0x88) + { + *puc = ((unsigned int) (c & 0x03) << 24) + | ((unsigned int) (s[1] ^ 0x80) << 18) + | ((unsigned int) (s[2] ^ 0x80) << 12) + | ((unsigned int) (s[3] ^ 0x80) << 6) + | (unsigned int) (s[4] ^ 0x80); + return 5; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 5; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 4; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 3; + } + /* invalid multibyte character */ + return 2; } /* invalid multibyte character */ } @@ -126,18 +182,44 @@ u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n) { if (n >= 6) { - if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 - && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 - && (s[5] ^ 0x80) < 0x40 - && (c >= 0xfd || s[1] >= 0x84)) + if ((s[1] ^ 0x80) < 0x40) { - *puc = ((unsigned int) (c & 0x01) << 30) - | ((unsigned int) (s[1] ^ 0x80) << 24) - | ((unsigned int) (s[2] ^ 0x80) << 18) - | ((unsigned int) (s[3] ^ 0x80) << 12) - | ((unsigned int) (s[4] ^ 0x80) << 6) - | (unsigned int) (s[5] ^ 0x80); - return 6; + if ((s[2] ^ 0x80) < 0x40) + { + if ((s[3] ^ 0x80) < 0x40) + { + if ((s[4] ^ 0x80) < 0x40) + { + if ((s[5] ^ 0x80) < 0x40) + { + if (c >= 0xfd || s[1] >= 0x84) + { + *puc = ((unsigned int) (c & 0x01) << 30) + | ((unsigned int) (s[1] ^ 0x80) << 24) + | ((unsigned int) (s[2] ^ 0x80) << 18) + | ((unsigned int) (s[3] ^ 0x80) << 12) + | ((unsigned int) (s[4] ^ 0x80) << 6) + | (unsigned int) (s[5] ^ 0x80); + return 6; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 6; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 5; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 4; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 3; + } + /* invalid multibyte character */ + return 2; } /* invalid multibyte character */ } diff --git a/lib/unistr/u8-mbtouc-unsafe-aux.c b/lib/unistr/u8-mbtouc-unsafe-aux.c index 47590e3..b406d3e 100644 --- a/lib/unistr/u8-mbtouc-unsafe-aux.c +++ b/lib/unistr/u8-mbtouc-unsafe-aux.c @@ -1,5 +1,5 @@ /* Conversion UTF-8 to UCS-4. - Copyright (C) 2001-2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2001-2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2001. This program is free software: you can redistribute it and/or modify it @@ -41,13 +41,15 @@ u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n) | (unsigned int) (s[1] ^ 0x80); return 2; } +#if CONFIG_UNICODE_SAFETY /* invalid multibyte character */ +#endif } else { /* incomplete multibyte character */ *puc = 0xfffd; - return n; + return 1; } } else if (c < 0xf0) @@ -55,23 +57,39 @@ u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n) if (n >= 3) { #if CONFIG_UNICODE_SAFETY - if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 - && (c >= 0xe1 || s[1] >= 0xa0) - && (c != 0xed || s[1] < 0xa0)) -#endif + if ((s[1] ^ 0x80) < 0x40) { - *puc = ((unsigned int) (c & 0x0f) << 12) - | ((unsigned int) (s[1] ^ 0x80) << 6) - | (unsigned int) (s[2] ^ 0x80); - return 3; + if ((s[2] ^ 0x80) < 0x40) + { + if ((c >= 0xe1 || s[1] >= 0xa0) + && (c != 0xed || s[1] < 0xa0)) +#endif + { + *puc = ((unsigned int) (c & 0x0f) << 12) + | ((unsigned int) (s[1] ^ 0x80) << 6) + | (unsigned int) (s[2] ^ 0x80); + return 3; + } +#if CONFIG_UNICODE_SAFETY + /* invalid multibyte character */ + *puc = 0xfffd; + return 3; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 2; } /* invalid multibyte character */ +#endif } else { /* incomplete multibyte character */ *puc = 0xfffd; - return n; + if (n == 1 || (s[1] ^ 0x80) >= 0x40) + return 1; + else + return 2; } } else if (c < 0xf8) @@ -79,28 +97,51 @@ u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n) if (n >= 4) { #if CONFIG_UNICODE_SAFETY - if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 - && (s[3] ^ 0x80) < 0x40 - && (c >= 0xf1 || s[1] >= 0x90) + if ((s[1] ^ 0x80) < 0x40) + { + if ((s[2] ^ 0x80) < 0x40) + { + if ((s[3] ^ 0x80) < 0x40) + { + if ((c >= 0xf1 || s[1] >= 0x90) #if 1 - && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)) + && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)) #endif - ) + ) #endif - { - *puc = ((unsigned int) (c & 0x07) << 18) - | ((unsigned int) (s[1] ^ 0x80) << 12) - | ((unsigned int) (s[2] ^ 0x80) << 6) - | (unsigned int) (s[3] ^ 0x80); - return 4; + { + *puc = ((unsigned int) (c & 0x07) << 18) + | ((unsigned int) (s[1] ^ 0x80) << 12) + | ((unsigned int) (s[2] ^ 0x80) << 6) + | (unsigned int) (s[3] ^ 0x80); + return 4; + } +#if CONFIG_UNICODE_SAFETY + /* invalid multibyte character */ + *puc = 0xfffd; + return 4; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 3; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 2; } /* invalid multibyte character */ +#endif } else { /* incomplete multibyte character */ *puc = 0xfffd; - return n; + if (n == 1 || (s[1] ^ 0x80) >= 0x40) + return 1; + else if (n == 2 || (s[2] ^ 0x80) >= 0x40) + return 2; + else + return 3; } } #if 0 @@ -109,19 +150,42 @@ u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n) if (n >= 5) { #if CONFIG_UNICODE_SAFETY - if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 - && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 - && (c >= 0xf9 || s[1] >= 0x88)) -#endif + if ((s[1] ^ 0x80) < 0x40) { - *puc = ((unsigned int) (c & 0x03) << 24) - | ((unsigned int) (s[1] ^ 0x80) << 18) - | ((unsigned int) (s[2] ^ 0x80) << 12) - | ((unsigned int) (s[3] ^ 0x80) << 6) - | (unsigned int) (s[4] ^ 0x80); - return 5; + if ((s[2] ^ 0x80) < 0x40) + { + if ((s[3] ^ 0x80) < 0x40) + { + if ((s[4] ^ 0x80) < 0x40) + { + if (c >= 0xf9 || s[1] >= 0x88) +#endif + { + *puc = ((unsigned int) (c & 0x03) << 24) + | ((unsigned int) (s[1] ^ 0x80) << 18) + | ((unsigned int) (s[2] ^ 0x80) << 12) + | ((unsigned int) (s[3] ^ 0x80) << 6) + | (unsigned int) (s[4] ^ 0x80); + return 5; + } +#if CONFIG_UNICODE_SAFETY + /* invalid multibyte character */ + *puc = 0xfffd; + return 5; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 4; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 3; + } + /* invalid multibyte character */ + return 2; } /* invalid multibyte character */ +#endif } else { @@ -135,21 +199,49 @@ u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n) if (n >= 6) { #if CONFIG_UNICODE_SAFETY - if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 - && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 - && (s[5] ^ 0x80) < 0x40 - && (c >= 0xfd || s[1] >= 0x84)) -#endif + if ((s[1] ^ 0x80) < 0x40) { - *puc = ((unsigned int) (c & 0x01) << 30) - | ((unsigned int) (s[1] ^ 0x80) << 24) - | ((unsigned int) (s[2] ^ 0x80) << 18) - | ((unsigned int) (s[3] ^ 0x80) << 12) - | ((unsigned int) (s[4] ^ 0x80) << 6) - | (unsigned int) (s[5] ^ 0x80); - return 6; + if ((s[2] ^ 0x80) < 0x40) + { + if ((s[3] ^ 0x80) < 0x40) + { + if ((s[4] ^ 0x80) < 0x40) + { + if ((s[5] ^ 0x80) < 0x40) + { + if (c >= 0xfd || s[1] >= 0x84) +#endif + { + *puc = ((unsigned int) (c & 0x01) << 30) + | ((unsigned int) (s[1] ^ 0x80) << 24) + | ((unsigned int) (s[2] ^ 0x80) << 18) + | ((unsigned int) (s[3] ^ 0x80) << 12) + | ((unsigned int) (s[4] ^ 0x80) << 6) + | (unsigned int) (s[5] ^ 0x80); + return 6; + } +#if CONFIG_UNICODE_SAFETY + /* invalid multibyte character */ + *puc = 0xfffd; + return 6; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 5; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 4; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 3; + } + /* invalid multibyte character */ + return 2; } /* invalid multibyte character */ +#endif } else { diff --git a/lib/unistr/u8-mbtouc-unsafe.c b/lib/unistr/u8-mbtouc-unsafe.c index 41583f9..01d12dc 100644 --- a/lib/unistr/u8-mbtouc-unsafe.c +++ b/lib/unistr/u8-mbtouc-unsafe.c @@ -1,5 +1,5 @@ /* Look at first character in UTF-8 string. - Copyright (C) 1999-2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999-2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2001. This program is free software: you can redistribute it and/or modify it @@ -52,13 +52,15 @@ u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n) | (unsigned int) (s[1] ^ 0x80); return 2; } +#if CONFIG_UNICODE_SAFETY /* invalid multibyte character */ +#endif } else { /* incomplete multibyte character */ *puc = 0xfffd; - return n; + return 1; } } else if (c < 0xf0) @@ -66,23 +68,39 @@ u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n) if (n >= 3) { #if CONFIG_UNICODE_SAFETY - if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 - && (c >= 0xe1 || s[1] >= 0xa0) - && (c != 0xed || s[1] < 0xa0)) -#endif + if ((s[1] ^ 0x80) < 0x40) { - *puc = ((unsigned int) (c & 0x0f) << 12) - | ((unsigned int) (s[1] ^ 0x80) << 6) - | (unsigned int) (s[2] ^ 0x80); - return 3; + if ((s[2] ^ 0x80) < 0x40) + { + if ((c >= 0xe1 || s[1] >= 0xa0) + && (c != 0xed || s[1] < 0xa0)) +#endif + { + *puc = ((unsigned int) (c & 0x0f) << 12) + | ((unsigned int) (s[1] ^ 0x80) << 6) + | (unsigned int) (s[2] ^ 0x80); + return 3; + } +#if CONFIG_UNICODE_SAFETY + /* invalid multibyte character */ + *puc = 0xfffd; + return 3; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 2; } /* invalid multibyte character */ +#endif } else { /* incomplete multibyte character */ *puc = 0xfffd; - return n; + if (n == 1 || (s[1] ^ 0x80) >= 0x40) + return 1; + else + return 2; } } else if (c < 0xf8) @@ -90,28 +108,51 @@ u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n) if (n >= 4) { #if CONFIG_UNICODE_SAFETY - if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 - && (s[3] ^ 0x80) < 0x40 - && (c >= 0xf1 || s[1] >= 0x90) + if ((s[1] ^ 0x80) < 0x40) + { + if ((s[2] ^ 0x80) < 0x40) + { + if ((s[3] ^ 0x80) < 0x40) + { + if ((c >= 0xf1 || s[1] >= 0x90) #if 1 - && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)) + && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)) #endif - ) + ) #endif - { - *puc = ((unsigned int) (c & 0x07) << 18) - | ((unsigned int) (s[1] ^ 0x80) << 12) - | ((unsigned int) (s[2] ^ 0x80) << 6) - | (unsigned int) (s[3] ^ 0x80); - return 4; + { + *puc = ((unsigned int) (c & 0x07) << 18) + | ((unsigned int) (s[1] ^ 0x80) << 12) + | ((unsigned int) (s[2] ^ 0x80) << 6) + | (unsigned int) (s[3] ^ 0x80); + return 4; + } +#if CONFIG_UNICODE_SAFETY + /* invalid multibyte character */ + *puc = 0xfffd; + return 4; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 3; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 2; } /* invalid multibyte character */ +#endif } else { /* incomplete multibyte character */ *puc = 0xfffd; - return n; + if (n == 1 || (s[1] ^ 0x80) >= 0x40) + return 1; + else if (n == 2 || (s[2] ^ 0x80) >= 0x40) + return 2; + else + return 3; } } #if 0 @@ -120,19 +161,42 @@ u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n) if (n >= 5) { #if CONFIG_UNICODE_SAFETY - if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 - && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 - && (c >= 0xf9 || s[1] >= 0x88)) -#endif + if ((s[1] ^ 0x80) < 0x40) { - *puc = ((unsigned int) (c & 0x03) << 24) - | ((unsigned int) (s[1] ^ 0x80) << 18) - | ((unsigned int) (s[2] ^ 0x80) << 12) - | ((unsigned int) (s[3] ^ 0x80) << 6) - | (unsigned int) (s[4] ^ 0x80); - return 5; + if ((s[2] ^ 0x80) < 0x40) + { + if ((s[3] ^ 0x80) < 0x40) + { + if ((s[4] ^ 0x80) < 0x40) + { + if (c >= 0xf9 || s[1] >= 0x88) +#endif + { + *puc = ((unsigned int) (c & 0x03) << 24) + | ((unsigned int) (s[1] ^ 0x80) << 18) + | ((unsigned int) (s[2] ^ 0x80) << 12) + | ((unsigned int) (s[3] ^ 0x80) << 6) + | (unsigned int) (s[4] ^ 0x80); + return 5; + } +#if CONFIG_UNICODE_SAFETY + /* invalid multibyte character */ + *puc = 0xfffd; + return 5; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 4; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 3; + } + /* invalid multibyte character */ + return 2; } /* invalid multibyte character */ +#endif } else { @@ -146,21 +210,49 @@ u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n) if (n >= 6) { #if CONFIG_UNICODE_SAFETY - if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 - && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 - && (s[5] ^ 0x80) < 0x40 - && (c >= 0xfd || s[1] >= 0x84)) -#endif + if ((s[1] ^ 0x80) < 0x40) { - *puc = ((unsigned int) (c & 0x01) << 30) - | ((unsigned int) (s[1] ^ 0x80) << 24) - | ((unsigned int) (s[2] ^ 0x80) << 18) - | ((unsigned int) (s[3] ^ 0x80) << 12) - | ((unsigned int) (s[4] ^ 0x80) << 6) - | (unsigned int) (s[5] ^ 0x80); - return 6; + if ((s[2] ^ 0x80) < 0x40) + { + if ((s[3] ^ 0x80) < 0x40) + { + if ((s[4] ^ 0x80) < 0x40) + { + if ((s[5] ^ 0x80) < 0x40) + { + if (c >= 0xfd || s[1] >= 0x84) +#endif + { + *puc = ((unsigned int) (c & 0x01) << 30) + | ((unsigned int) (s[1] ^ 0x80) << 24) + | ((unsigned int) (s[2] ^ 0x80) << 18) + | ((unsigned int) (s[3] ^ 0x80) << 12) + | ((unsigned int) (s[4] ^ 0x80) << 6) + | (unsigned int) (s[5] ^ 0x80); + return 6; + } +#if CONFIG_UNICODE_SAFETY + /* invalid multibyte character */ + *puc = 0xfffd; + return 6; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 5; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 4; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 3; + } + /* invalid multibyte character */ + return 2; } /* invalid multibyte character */ +#endif } else { diff --git a/lib/unistr/u8-mbtouc.c b/lib/unistr/u8-mbtouc.c index 96cd5b7..dc4607f 100644 --- a/lib/unistr/u8-mbtouc.c +++ b/lib/unistr/u8-mbtouc.c @@ -1,5 +1,5 @@ /* Look at first character in UTF-8 string. - Copyright (C) 1999-2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999-2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2001. This program is free software: you can redistribute it and/or modify it @@ -55,21 +55,32 @@ u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n) { /* incomplete multibyte character */ *puc = 0xfffd; - return n; + return 1; } } else if (c < 0xf0) { if (n >= 3) { - if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 - && (c >= 0xe1 || s[1] >= 0xa0) - && (c != 0xed || s[1] < 0xa0)) + if ((s[1] ^ 0x80) < 0x40) { - *puc = ((unsigned int) (c & 0x0f) << 12) - | ((unsigned int) (s[1] ^ 0x80) << 6) - | (unsigned int) (s[2] ^ 0x80); - return 3; + if ((s[2] ^ 0x80) < 0x40) + { + if ((c >= 0xe1 || s[1] >= 0xa0) + && (c != 0xed || s[1] < 0xa0)) + { + *puc = ((unsigned int) (c & 0x0f) << 12) + | ((unsigned int) (s[1] ^ 0x80) << 6) + | (unsigned int) (s[2] ^ 0x80); + return 3; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 3; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 2; } /* invalid multibyte character */ } @@ -77,26 +88,45 @@ u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n) { /* incomplete multibyte character */ *puc = 0xfffd; - return n; + if (n == 1 || (s[1] ^ 0x80) >= 0x40) + return 1; + else + return 2; } } else if (c < 0xf8) { if (n >= 4) { - if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 - && (s[3] ^ 0x80) < 0x40 - && (c >= 0xf1 || s[1] >= 0x90) + if ((s[1] ^ 0x80) < 0x40) + { + if ((s[2] ^ 0x80) < 0x40) + { + if ((s[3] ^ 0x80) < 0x40) + { + if ((c >= 0xf1 || s[1] >= 0x90) #if 1 - && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)) + && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)) #endif - ) - { - *puc = ((unsigned int) (c & 0x07) << 18) - | ((unsigned int) (s[1] ^ 0x80) << 12) - | ((unsigned int) (s[2] ^ 0x80) << 6) - | (unsigned int) (s[3] ^ 0x80); - return 4; + ) + { + *puc = ((unsigned int) (c & 0x07) << 18) + | ((unsigned int) (s[1] ^ 0x80) << 12) + | ((unsigned int) (s[2] ^ 0x80) << 6) + | (unsigned int) (s[3] ^ 0x80); + return 4; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 4; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 3; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 2; } /* invalid multibyte character */ } @@ -104,7 +134,12 @@ u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n) { /* incomplete multibyte character */ *puc = 0xfffd; - return n; + if (n == 1 || (s[1] ^ 0x80) >= 0x40) + return 1; + else if (n == 2 || (s[2] ^ 0x80) >= 0x40) + return 2; + else + return 3; } } #if 0 @@ -112,16 +147,37 @@ u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n) { if (n >= 5) { - if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 - && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 - && (c >= 0xf9 || s[1] >= 0x88)) + if ((s[1] ^ 0x80) < 0x40) { - *puc = ((unsigned int) (c & 0x03) << 24) - | ((unsigned int) (s[1] ^ 0x80) << 18) - | ((unsigned int) (s[2] ^ 0x80) << 12) - | ((unsigned int) (s[3] ^ 0x80) << 6) - | (unsigned int) (s[4] ^ 0x80); - return 5; + if ((s[2] ^ 0x80) < 0x40) + { + if ((s[3] ^ 0x80) < 0x40) + { + if ((s[4] ^ 0x80) < 0x40) + { + if (c >= 0xf9 || s[1] >= 0x88) + { + *puc = ((unsigned int) (c & 0x03) << 24) + | ((unsigned int) (s[1] ^ 0x80) << 18) + | ((unsigned int) (s[2] ^ 0x80) << 12) + | ((unsigned int) (s[3] ^ 0x80) << 6) + | (unsigned int) (s[4] ^ 0x80); + return 5; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 5; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 4; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 3; + } + /* invalid multibyte character */ + return 2; } /* invalid multibyte character */ } @@ -136,18 +192,44 @@ u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n) { if (n >= 6) { - if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 - && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 - && (s[5] ^ 0x80) < 0x40 - && (c >= 0xfd || s[1] >= 0x84)) + if ((s[1] ^ 0x80) < 0x40) { - *puc = ((unsigned int) (c & 0x01) << 30) - | ((unsigned int) (s[1] ^ 0x80) << 24) - | ((unsigned int) (s[2] ^ 0x80) << 18) - | ((unsigned int) (s[3] ^ 0x80) << 12) - | ((unsigned int) (s[4] ^ 0x80) << 6) - | (unsigned int) (s[5] ^ 0x80); - return 6; + if ((s[2] ^ 0x80) < 0x40) + { + if ((s[3] ^ 0x80) < 0x40) + { + if ((s[4] ^ 0x80) < 0x40) + { + if ((s[5] ^ 0x80) < 0x40) + { + if (c >= 0xfd || s[1] >= 0x84) + { + *puc = ((unsigned int) (c & 0x01) << 30) + | ((unsigned int) (s[1] ^ 0x80) << 24) + | ((unsigned int) (s[2] ^ 0x80) << 18) + | ((unsigned int) (s[3] ^ 0x80) << 12) + | ((unsigned int) (s[4] ^ 0x80) << 6) + | (unsigned int) (s[5] ^ 0x80); + return 6; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 6; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 5; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 4; + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 3; + } + /* invalid multibyte character */ + return 2; } /* invalid multibyte character */ } diff --git a/lib/unistr/u8-mbtoucr.c b/lib/unistr/u8-mbtoucr.c index a749c48..3d8c05f 100644 --- a/lib/unistr/u8-mbtoucr.c +++ b/lib/unistr/u8-mbtoucr.c @@ -1,5 +1,5 @@ /* Look at first character in UTF-8 string, returning an error code. - Copyright (C) 1999-2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999-2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2001. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u8-move.c b/lib/unistr/u8-move.c index 5c48411..5c872ca 100644 --- a/lib/unistr/u8-move.c +++ b/lib/unistr/u8-move.c @@ -1,5 +1,5 @@ /* Copy piece of UTF-8 string. - Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u8-next.c b/lib/unistr/u8-next.c index 34249aa..8c218a2 100644 --- a/lib/unistr/u8-next.c +++ b/lib/unistr/u8-next.c @@ -1,5 +1,5 @@ /* Iterate over next character in UTF-8 string. - Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u8-prev.c b/lib/unistr/u8-prev.c index 97a27f5..e01551e 100644 --- a/lib/unistr/u8-prev.c +++ b/lib/unistr/u8-prev.c @@ -1,5 +1,5 @@ /* Iterate over previous character in UTF-8 string. - Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it @@ -23,7 +23,7 @@ const uint8_t * u8_prev (ucs4_t *puc, const uint8_t *s, const uint8_t *start) { - /* Keep in sync with unistr.h and utf8-ucs4.c. */ + /* Keep in sync with unistr.h and u8-mbtouc-aux.c. */ if (s != start) { uint8_t c_1 = s[-1]; diff --git a/lib/unistr/u8-set.c b/lib/unistr/u8-set.c index 3cca23b..5f9e5e6 100644 --- a/lib/unistr/u8-set.c +++ b/lib/unistr/u8-set.c @@ -1,5 +1,5 @@ /* Fill UTF-8 string. - Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u8-startswith.c b/lib/unistr/u8-startswith.c index 4cc436a..b22fed4 100644 --- a/lib/unistr/u8-startswith.c +++ b/lib/unistr/u8-startswith.c @@ -1,5 +1,5 @@ /* Substring test for UTF-8 strings. - Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u8-stpcpy.c b/lib/unistr/u8-stpcpy.c index 0faba41..8fb3ca5 100644 --- a/lib/unistr/u8-stpcpy.c +++ b/lib/unistr/u8-stpcpy.c @@ -1,5 +1,5 @@ /* Copy UTF-8 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u8-stpncpy.c b/lib/unistr/u8-stpncpy.c index a456bea..62d4972 100644 --- a/lib/unistr/u8-stpncpy.c +++ b/lib/unistr/u8-stpncpy.c @@ -1,5 +1,5 @@ /* Copy UTF-8 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it @@ -25,7 +25,7 @@ /* Specification. */ #include "unistr.h" -#if __GLIBC__ >= 2 +#if __GLIBC__ >= 2 && !defined __UCLIBC__ # include <string.h> diff --git a/lib/unistr/u8-strcat.c b/lib/unistr/u8-strcat.c index 2b54bad..b689201 100644 --- a/lib/unistr/u8-strcat.c +++ b/lib/unistr/u8-strcat.c @@ -1,5 +1,5 @@ /* Concatenate UTF-8 strings. - Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u8-strchr.c b/lib/unistr/u8-strchr.c index 3be14c7..dd1cb84 100644 --- a/lib/unistr/u8-strchr.c +++ b/lib/unistr/u8-strchr.c @@ -1,5 +1,5 @@ /* Search character in UTF-8 string. - Copyright (C) 1999, 2002, 2006-2007, 2009-2010 Free Software Foundation, + Copyright (C) 1999, 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. @@ -21,6 +21,8 @@ /* Specification. */ #include "unistr.h" +#include <string.h> + uint8_t * u8_strchr (const uint8_t *s, ucs4_t uc) { @@ -30,72 +32,209 @@ u8_strchr (const uint8_t *s, ucs4_t uc) { uint8_t c0 = uc; - for (;; s++) + if (false) + { + /* Unoptimized code. */ + for (;;) + { + uint8_t s0 = *s; + if (s0 == c0) + return (uint8_t *) s; + s++; + if (s0 == 0) + break; + } + } + else { - if (*s == c0) - break; - if (*s == 0) - goto notfound; + /* Optimized code. + strchr() is often so well optimized, that it's worth the + added function call. */ + return (uint8_t *) strchr ((const char *) s, c0); } - return (uint8_t *) s; } else + /* Loops equivalent to strstr, optimized for a specific length (2, 3, 4) + of the needle. We use an algorithm similar to Boyer-Moore which + is documented in lib/unistr/u8-chr.c. There is additional + complication because we need to check after every byte for + a NUL byte, but the idea is the same. */ switch (u8_uctomb_aux (c, uc, 6)) { case 2: - if (*s == 0) - goto notfound; + if (*s == 0 || s[1] == 0) + break; { uint8_t c0 = c[0]; uint8_t c1 = c[1]; + /* Search for { c0, c1 }. */ + uint8_t s1 = s[1]; - for (;; s++) + for (;;) { + /* Here s[0] != 0, s[1] != 0. + Test whether s[0..1] == { c0, c1 }. */ + if (s1 == c1) + { + if (*s == c0) + return (uint8_t *) s; + else + /* Skip the search at s + 1, because s[1] = c1 < c0. */ + goto case2_skip2; + } + else + { + if (s1 == c0) + goto case2_skip1; + else + /* Skip the search at s + 1, because s[1] != c0. */ + goto case2_skip2; + } + case2_skip2: + s++; + s1 = s[1]; + if (s[1] == 0) + break; + case2_skip1: + s++; + s1 = s[1]; if (s[1] == 0) - goto notfound; - if (*s == c0 && s[1] == c1) break; } - return (uint8_t *) s; } + break; case 3: - if (*s == 0 || s[1] == 0) - goto notfound; + if (*s == 0 || s[1] == 0 || s[2] == 0) + break; { uint8_t c0 = c[0]; uint8_t c1 = c[1]; uint8_t c2 = c[2]; + /* Search for { c0, c1, c2 }. */ + uint8_t s2 = s[2]; - for (;; s++) + for (;;) { + /* Here s[0] != 0, s[1] != 0, s[2] != 0. + Test whether s[0..2] == { c0, c1, c2 }. */ + if (s2 == c2) + { + if (s[1] == c1 && *s == c0) + return (uint8_t *) s; + else + /* If c2 != c1: + Skip the search at s + 1, because s[2] == c2 != c1. + Skip the search at s + 2, because s[2] == c2 < c0. */ + if (c2 == c1) + goto case3_skip1; + else + goto case3_skip3; + } + else + { + if (s2 == c1) + goto case3_skip1; + else if (s2 == c0) + /* Skip the search at s + 1, because s[2] != c1. */ + goto case3_skip2; + else + /* Skip the search at s + 1, because s[2] != c1. + Skip the search at s + 2, because s[2] != c0. */ + goto case3_skip3; + } + case3_skip3: + s++; + s2 = s[2]; + if (s[2] == 0) + break; + case3_skip2: + s++; + s2 = s[2]; + if (s[2] == 0) + break; + case3_skip1: + s++; + s2 = s[2]; if (s[2] == 0) - goto notfound; - if (*s == c0 && s[1] == c1 && s[2] == c2) break; } - return (uint8_t *) s; } + break; case 4: - if (*s == 0 || s[1] == 0 || s[2] == 0) - goto notfound; + if (*s == 0 || s[1] == 0 || s[2] == 0 || s[3] == 0) + break; { uint8_t c0 = c[0]; uint8_t c1 = c[1]; uint8_t c2 = c[2]; uint8_t c3 = c[3]; + /* Search for { c0, c1, c2, c3 }. */ + uint8_t s3 = s[3]; - for (;; s++) + for (;;) { + /* Here s[0] != 0, s[1] != 0, s[2] != 0, s[3] != 0. + Test whether s[0..3] == { c0, c1, c2, c3 }. */ + if (s3 == c3) + { + if (s[2] == c2 && s[1] == c1 && *s == c0) + return (uint8_t *) s; + else + /* If c3 != c2: + Skip the search at s + 1, because s[3] == c3 != c2. + If c3 != c1: + Skip the search at s + 2, because s[3] == c3 != c1. + Skip the search at s + 3, because s[3] == c3 < c0. */ + if (c3 == c2) + goto case4_skip1; + else if (c3 == c1) + goto case4_skip2; + else + goto case4_skip4; + } + else + { + if (s3 == c2) + goto case4_skip1; + else if (s3 == c1) + /* Skip the search at s + 1, because s[3] != c2. */ + goto case4_skip2; + else if (s3 == c0) + /* Skip the search at s + 1, because s[3] != c2. + Skip the search at s + 2, because s[3] != c1. */ + goto case4_skip3; + else + /* Skip the search at s + 1, because s[3] != c2. + Skip the search at s + 2, because s[3] != c1. + Skip the search at s + 3, because s[3] != c0. */ + goto case4_skip4; + } + case4_skip4: + s++; + s3 = s[3]; + if (s[3] == 0) + break; + case4_skip3: + s++; + s3 = s[3]; + if (s[3] == 0) + break; + case4_skip2: + s++; + s3 = s[3]; + if (s[3] == 0) + break; + case4_skip1: + s++; + s3 = s[3]; if (s[3] == 0) - goto notfound; - if (*s == c0 && s[1] == c1 && s[2] == c2 && s[3] == c3) break; } - return (uint8_t *) s; } + break; } -notfound: + return NULL; } diff --git a/lib/unistr/u8-strcmp.c b/lib/unistr/u8-strcmp.c index 82a4414..93f06ad 100644 --- a/lib/unistr/u8-strcmp.c +++ b/lib/unistr/u8-strcmp.c @@ -1,5 +1,5 @@ /* Compare UTF-8 strings. - Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u8-strcoll.c b/lib/unistr/u8-strcoll.c index 9ffa135..8a2a307 100644 --- a/lib/unistr/u8-strcoll.c +++ b/lib/unistr/u8-strcoll.c @@ -1,5 +1,5 @@ /* Compare UTF-8 strings using the collation rules of the current locale. - Copyright (C) 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This program is free software: you can redistribute it and/or modify it @@ -29,5 +29,5 @@ #define FUNC u8_strcoll #define UNIT uint8_t #define U_STRCMP u8_strcmp -#define U_STRCONV_TO_LOCALE u8_strconv_to_locale +#define U_STRCONV_TO_ENCODING u8_strconv_to_encoding #include "u-strcoll.h" diff --git a/lib/unistr/u8-strcpy.c b/lib/unistr/u8-strcpy.c index 9662de5..40d544e 100644 --- a/lib/unistr/u8-strcpy.c +++ b/lib/unistr/u8-strcpy.c @@ -1,5 +1,5 @@ /* Copy UTF-8 string. - Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u8-strcspn.c b/lib/unistr/u8-strcspn.c index 4b5b8e0..357f480 100644 --- a/lib/unistr/u8-strcspn.c +++ b/lib/unistr/u8-strcspn.c @@ -1,5 +1,5 @@ /* Search for some characters in UTF-8 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u8-strdup.c b/lib/unistr/u8-strdup.c index 58a3077..1ac590f 100644 --- a/lib/unistr/u8-strdup.c +++ b/lib/unistr/u8-strdup.c @@ -1,5 +1,5 @@ /* Copy UTF-8 string. - Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u8-strlen.c b/lib/unistr/u8-strlen.c index b8bebf0..08f011e 100644 --- a/lib/unistr/u8-strlen.c +++ b/lib/unistr/u8-strlen.c @@ -1,5 +1,5 @@ /* Determine length of UTF-8 string. - Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u8-strmblen.c b/lib/unistr/u8-strmblen.c index 52242c5..09b876b 100644 --- a/lib/unistr/u8-strmblen.c +++ b/lib/unistr/u8-strmblen.c @@ -1,5 +1,5 @@ /* Look at first character in UTF-8 string. - Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2010 Free Software + Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. @@ -24,7 +24,7 @@ int u8_strmblen (const uint8_t *s) { - /* Keep in sync with unistr.h and utf8-ucs4.c. */ + /* Keep in sync with unistr.h and u8-mbtouc-aux.c. */ uint8_t c = *s; if (c < 0x80) diff --git a/lib/unistr/u8-strmbtouc.c b/lib/unistr/u8-strmbtouc.c index 67016c6..abfb179 100644 --- a/lib/unistr/u8-strmbtouc.c +++ b/lib/unistr/u8-strmbtouc.c @@ -1,5 +1,5 @@ /* Look at first character in UTF-8 string. - Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2010 Free Software + Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. @@ -24,7 +24,7 @@ int u8_strmbtouc (ucs4_t *puc, const uint8_t *s) { - /* Keep in sync with unistr.h and utf8-ucs4.c. */ + /* Keep in sync with unistr.h and u8-mbtouc-aux.c. */ uint8_t c = *s; if (c < 0x80) diff --git a/lib/unistr/u8-strncat.c b/lib/unistr/u8-strncat.c index 4780350..533355b 100644 --- a/lib/unistr/u8-strncat.c +++ b/lib/unistr/u8-strncat.c @@ -1,5 +1,5 @@ /* Concatenate UTF-8 strings. - Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u8-strncmp.c b/lib/unistr/u8-strncmp.c index 286450b..1b0d0f4 100644 --- a/lib/unistr/u8-strncmp.c +++ b/lib/unistr/u8-strncmp.c @@ -1,5 +1,5 @@ /* Compare UTF-8 strings. - Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u8-strncpy.c b/lib/unistr/u8-strncpy.c index 5ef757b..306b02f 100644 --- a/lib/unistr/u8-strncpy.c +++ b/lib/unistr/u8-strncpy.c @@ -1,5 +1,5 @@ /* Copy UTF-8 string. - Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u8-strnlen.c b/lib/unistr/u8-strnlen.c index e732ea2..5ae8b71 100644 --- a/lib/unistr/u8-strnlen.c +++ b/lib/unistr/u8-strnlen.c @@ -1,5 +1,5 @@ /* Determine bounded length of UTF-8 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it @@ -25,7 +25,7 @@ /* Specification. */ #include "unistr.h" -#if __GLIBC__ >= 2 +#if __GLIBC__ >= 2 || defined __UCLIBC__ # include <string.h> diff --git a/lib/unistr/u8-strpbrk.c b/lib/unistr/u8-strpbrk.c index ec6dc27..e7a8ad2 100644 --- a/lib/unistr/u8-strpbrk.c +++ b/lib/unistr/u8-strpbrk.c @@ -1,5 +1,5 @@ /* Search for some characters in UTF-8 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u8-strrchr.c b/lib/unistr/u8-strrchr.c index 6d8e297..4efd42d 100644 --- a/lib/unistr/u8-strrchr.c +++ b/lib/unistr/u8-strrchr.c @@ -1,5 +1,5 @@ /* Search character in UTF-8 string. - Copyright (C) 1999, 2002, 2006-2007, 2009-2010 Free Software Foundation, + Copyright (C) 1999, 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. diff --git a/lib/unistr/u8-strspn.c b/lib/unistr/u8-strspn.c index 7747815..23ab7b4 100644 --- a/lib/unistr/u8-strspn.c +++ b/lib/unistr/u8-strspn.c @@ -1,5 +1,5 @@ /* Search for some characters in UTF-8 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u8-strstr.c b/lib/unistr/u8-strstr.c index cce37ad..59d35e7 100644 --- a/lib/unistr/u8-strstr.c +++ b/lib/unistr/u8-strstr.c @@ -1,5 +1,5 @@ /* Substring test for UTF-8 strings. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2010-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it @@ -20,9 +20,13 @@ /* Specification. */ #include "unistr.h" +#include <string.h> + /* FIXME: Maybe walking the string via u8_mblen is a win? */ #define FUNC u8_strstr #define UNIT uint8_t #define U_STRCHR u8_strchr +#define U_STRMBTOUC u8_strmbtouc +#define UNIT_IS_UINT8_T 1 #include "u-strstr.h" diff --git a/lib/unistr/u8-strtok.c b/lib/unistr/u8-strtok.c index e5c7203..1e4e6ef 100644 --- a/lib/unistr/u8-strtok.c +++ b/lib/unistr/u8-strtok.c @@ -1,5 +1,5 @@ /* Tokenize UTF-8 string. - Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it @@ -24,4 +24,5 @@ #define UNIT uint8_t #define U_STRSPN u8_strspn #define U_STRPBRK u8_strpbrk +#define U_STRMBLEN u8_strmblen #include "u-strtok.h" diff --git a/lib/unistr/u8-to-u16.c b/lib/unistr/u8-to-u16.c index 3745c2b..cb0f298 100644 --- a/lib/unistr/u8-to-u16.c +++ b/lib/unistr/u8-to-u16.c @@ -1,5 +1,5 @@ /* Convert UTF-8 string to UTF-16 string. - Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u8-to-u32.c b/lib/unistr/u8-to-u32.c index de2a35e..1b3cbc4 100644 --- a/lib/unistr/u8-to-u32.c +++ b/lib/unistr/u8-to-u32.c @@ -1,5 +1,5 @@ /* Convert UTF-8 string to UTF-32 string. - Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u8-uctomb-aux.c b/lib/unistr/u8-uctomb-aux.c index 695921d..cc9c544 100644 --- a/lib/unistr/u8-uctomb-aux.c +++ b/lib/unistr/u8-uctomb-aux.c @@ -1,5 +1,5 @@ /* Conversion UCS-4 to UTF-8. - Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it diff --git a/lib/unistr/u8-uctomb.c b/lib/unistr/u8-uctomb.c index fd33c05..43ef23d 100644 --- a/lib/unistr/u8-uctomb.c +++ b/lib/unistr/u8-uctomb.c @@ -1,5 +1,5 @@ /* Store a character in UTF-8 string. - Copyright (C) 2002, 2005-2006, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2005-2006, 2009-2015 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it |