summaryrefslogtreecommitdiff
path: root/lib/unicase/u-prefix-context.h
blob: b1e4f706a1692c1bd89fb1450ce9f92c5caf5ba9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
/* Case-mapping context of prefix UTF-8/UTF-16/UTF-32 string.
   Copyright (C) 2009-2022 Free Software Foundation, Inc.
   Written by Bruno Haible <bruno@clisp.org>, 2009.

   This file is free software.
   It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
   You can redistribute it and/or modify it under either
     - the terms of the GNU Lesser General Public License as published
       by the Free Software Foundation; either version 3, or (at your
       option) any later version, or
     - the terms of the GNU General Public License as published by the
       Free Software Foundation; either version 2, or (at your option)
       any later version, or
     - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".

   This file is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License and the GNU General Public License
   for more details.

   You should have received a copy of the GNU Lesser General Public
   License and of the GNU General Public License along with this
   program.  If not, see <https://www.gnu.org/licenses/>.  */

casing_prefix_context_t
FUNC1 (const UNIT *s, size_t n)
{
  return FUNC2 (s, n, unicase_empty_prefix_context);
}

casing_prefix_context_t
FUNC2 (const UNIT *s, size_t n, casing_prefix_context_t a_context)
{
#if 0
  /* Forward iteration.  Slow for long strings.  */
  casing_prefix_context_t context = a_context;
  const UNIT *s_end = s + n;

  while (s < s_end)
    {
      ucs4_t uc;
      int count = U_MBTOUC_UNSAFE (&uc, s, s_end - s);

      if (!uc_is_case_ignorable (uc))
        context.last_char_except_ignorable = uc;

      {
        int ccc = uc_combining_class (uc);
        if (ccc == UC_CCC_A || ccc == UC_CCC_NR)
          context.last_char_normal_or_above = uc;
      }

      s += count;
    }

  return context;
#else
  /* Iterate backwards, only as far as needed.  */
  casing_prefix_context_t context;
  ucs4_t last_char_except_ignorable = (ucs4_t)(-1);
  ucs4_t last_char_normal_or_above = (ucs4_t)(-1);
  const UNIT *p = s + n;

  for (;;)
    {
      ucs4_t uc;
      p = U_PREV (&uc, p, s);
      if (p == NULL)
        break;

      if (last_char_except_ignorable == (ucs4_t)(-1))
        {
          if (!uc_is_case_ignorable (uc))
            last_char_except_ignorable = uc;
        }

      if (last_char_normal_or_above == (ucs4_t)(-1))
        {
          int ccc = uc_combining_class (uc);
          if (ccc == UC_CCC_A || ccc == UC_CCC_NR)
            last_char_normal_or_above = uc;
        }

      if (last_char_except_ignorable != (ucs4_t)(-1)
          && last_char_normal_or_above != (ucs4_t)(-1))
        break;
    }
  context.last_char_except_ignorable =
    (last_char_except_ignorable != (ucs4_t)(-1)
     ? last_char_except_ignorable
     : a_context.last_char_except_ignorable);
  context.last_char_normal_or_above =
    (last_char_normal_or_above != (ucs4_t)(-1)
     ? last_char_normal_or_above
     : a_context.last_char_normal_or_above);

  return context;
#endif
}