summaryrefslogtreecommitdiff
path: root/lib/uninorm/decomposition-table.h
blob: b0c981e464e4f21540c7d9534d6e7bd305b3bcc3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
/* Decomposition of Unicode characters.
   Copyright (C) 2001-2003, 2009-2015 Free Software Foundation, Inc.
   Written by Bruno Haible <bruno@clisp.org>, 2009.

   This program is free software: you can redistribute it and/or modify it
   under the terms of the GNU Lesser General Public License as published
   by the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */


#include "unitypes.h"

/* The decomposition table is made of two parts:
     - A table containing the actual arrays of decomposed equivalents.
       (This table is separate because the maximum length of a decomposition
       is 18, much larger than than the average length 1.497 of a decomposition).
     - A 3-level table of indices into this array.  */

#include "decomposition-table1.h"

static inline unsigned short
decomp_index (ucs4_t uc)
{
  unsigned int index1 = uc >> decomp_header_0;
  if (index1 < decomp_header_1)
    {
      int lookup1 = gl_uninorm_decomp_index_table.level1[index1];
      if (lookup1 >= 0)
        {
          unsigned int index2 = (uc >> decomp_header_2) & decomp_header_3;
          int lookup2 = gl_uninorm_decomp_index_table.level2[lookup1 + index2];
          if (lookup2 >= 0)
            {
              unsigned int index3 = uc & decomp_header_4;
              return gl_uninorm_decomp_index_table.level3[lookup2 + index3];
            }
        }
    }
  return (unsigned short)(-1);
}