summaryrefslogtreecommitdiff
path: root/lib/unicase/u-suffix-context.h
blob: 01ee18ef9d38ea1667efa8d3638ba85fbd6af57c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
/* Case-mapping context of suffix UTF-8/UTF-16/UTF-32 string.
   Copyright (C) 2009-2018 Free Software Foundation, Inc.
   Written by Bruno Haible <bruno@clisp.org>, 2009.

   This program is free software: you can redistribute it and/or
   modify it under the terms of either:

     * the GNU Lesser General Public License as published by the Free
       Software Foundation; either version 3 of the License, or (at your
       option) any later version.

   or

     * the GNU General Public License as published by the Free
       Software Foundation; either version 2 of the License, or (at your
       option) any later version.

   or both in parallel, as here.
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public License
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */

casing_suffix_context_t
FUNC1 (const UNIT *s, size_t n)
{
  return FUNC2 (s, n, unicase_empty_suffix_context);
}

casing_suffix_context_t
FUNC2 (const UNIT *s, size_t n, casing_suffix_context_t a_context)
{
  casing_suffix_context_t context;
  /* Evaluate all three conditions in a single pass through the string S.
     The three variables are -1 as long as the value of the condition has
     not been determined.  */
  ucs4_t first_char_except_ignorable = (ucs4_t)(-1);
  int scc_MORE_ABOVE = -1;
  int scc_BEFORE_DOT = -1;
  const UNIT *s_end = s + n;

  while (s < s_end)
    {
      ucs4_t uc;
      int count = U_MBTOUC_UNSAFE (&uc, s, s_end - s);

      if (first_char_except_ignorable == (ucs4_t)(-1))
        {
          if (!uc_is_case_ignorable (uc))
            first_char_except_ignorable = uc;
        }

      if (scc_MORE_ABOVE < 0)
        {
          int ccc = uc_combining_class (uc);
          if (ccc == UC_CCC_A)
            scc_MORE_ABOVE = SCC_MORE_ABOVE_MASK;
          else if (ccc == UC_CCC_NR)
            scc_MORE_ABOVE = 0;
        }

      if (scc_BEFORE_DOT < 0)
        {
          if (uc == 0x0307) /* COMBINING DOT ABOVE */
            scc_BEFORE_DOT = SCC_BEFORE_DOT_MASK;
          else
            {
              int ccc = uc_combining_class (uc);
              if (ccc == UC_CCC_A || ccc == UC_CCC_NR)
                scc_BEFORE_DOT = 0;
            }
        }

      if (first_char_except_ignorable != (ucs4_t)(-1)
          && (scc_MORE_ABOVE | scc_BEFORE_DOT) >= 0)
        /* All conditions have been determined.  */
        break;

      s += count;
    }

  /* For those conditions that have not been determined so far, use the
     value from the argument context.  */
  context.first_char_except_ignorable =
    (first_char_except_ignorable != (ucs4_t)(-1)
     ? first_char_except_ignorable
     : a_context.first_char_except_ignorable);
  context.bits =
    (scc_MORE_ABOVE >= 0
     ? scc_MORE_ABOVE
     : a_context.bits & SCC_MORE_ABOVE_MASK)
    | (scc_BEFORE_DOT >= 0
       ? scc_BEFORE_DOT
       : a_context.bits & SCC_BEFORE_DOT_MASK);
  return context;
}