summaryrefslogtreecommitdiff
path: root/lib/uninorm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/uninorm')
-rw-r--r--lib/uninorm/canonical-decomposition.c104
-rw-r--r--lib/uninorm/compat-decomposition.c2
-rw-r--r--lib/uninorm/composition-table.gperf2
-rw-r--r--lib/uninorm/composition.c84
-rw-r--r--lib/uninorm/decompose-internal.c2
-rw-r--r--lib/uninorm/decompose-internal.h6
-rw-r--r--lib/uninorm/decomposing-form.c2
-rw-r--r--lib/uninorm/decomposition-table.c2
-rw-r--r--lib/uninorm/decomposition-table.h20
-rw-r--r--lib/uninorm/decomposition.c96
-rw-r--r--lib/uninorm/nfc.c2
-rw-r--r--lib/uninorm/nfd.c2
-rw-r--r--lib/uninorm/nfkc.c2
-rw-r--r--lib/uninorm/nfkd.c2
-rw-r--r--lib/uninorm/normalize-internal.h2
-rw-r--r--lib/uninorm/u-normalize-internal.h566
-rw-r--r--lib/uninorm/u-normcmp.h12
-rw-r--r--lib/uninorm/u-normcoll.h14
-rw-r--r--lib/uninorm/u-normxfrm.h40
-rw-r--r--lib/uninorm/u16-normalize.c2
-rw-r--r--lib/uninorm/u16-normcmp.c2
-rw-r--r--lib/uninorm/u16-normcoll.c2
-rw-r--r--lib/uninorm/u16-normxfrm.c2
-rw-r--r--lib/uninorm/u32-normalize.c2
-rw-r--r--lib/uninorm/u32-normcmp.c2
-rw-r--r--lib/uninorm/u32-normcoll.c2
-rw-r--r--lib/uninorm/u32-normxfrm.c2
-rw-r--r--lib/uninorm/u8-normalize.c2
-rw-r--r--lib/uninorm/u8-normcmp.c2
-rw-r--r--lib/uninorm/u8-normcoll.c2
-rw-r--r--lib/uninorm/u8-normxfrm.c2
-rw-r--r--lib/uninorm/uninorm-filter.c394
32 files changed, 690 insertions, 690 deletions
diff --git a/lib/uninorm/canonical-decomposition.c b/lib/uninorm/canonical-decomposition.c
index 210b74b..e6e39e0 100644
--- a/lib/uninorm/canonical-decomposition.c
+++ b/lib/uninorm/canonical-decomposition.c
@@ -1,5 +1,5 @@
/* Canonical decomposition of Unicode characters.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
@@ -31,77 +31,77 @@ uc_canonical_decomposition (ucs4_t uc, ucs4_t *decomposition)
{
/* Hangul syllable. See Unicode standard, chapter 3, section
"Hangul Syllable Decomposition", See also the clarification at
- <http://www.unicode.org/versions/Unicode5.1.0/>, section
- "Clarification of Hangul Jamo Handling". */
+ <http://www.unicode.org/versions/Unicode5.1.0/>, section
+ "Clarification of Hangul Jamo Handling". */
unsigned int t;
uc -= 0xAC00;
t = uc % 28;
if (t == 0)
- {
- unsigned int v, l;
+ {
+ unsigned int v, l;
- uc = uc / 28;
- v = uc % 21;
- l = uc / 21;
+ uc = uc / 28;
+ v = uc % 21;
+ l = uc / 21;
- decomposition[0] = 0x1100 + l;
- decomposition[1] = 0x1161 + v;
- return 2;
- }
+ decomposition[0] = 0x1100 + l;
+ decomposition[1] = 0x1161 + v;
+ return 2;
+ }
else
- {
+ {
#if 1 /* Return the pairwise decomposition, not the full decomposition. */
- decomposition[0] = 0xAC00 + uc - t; /* = 0xAC00 + (l * 21 + v) * 28; */
- decomposition[1] = 0x11A7 + t;
- return 2;
+ decomposition[0] = 0xAC00 + uc - t; /* = 0xAC00 + (l * 21 + v) * 28; */
+ decomposition[1] = 0x11A7 + t;
+ return 2;
#else
- unsigned int v, l;
+ unsigned int v, l;
- uc = uc / 28;
- v = uc % 21;
- l = uc / 21;
+ uc = uc / 28;
+ v = uc % 21;
+ l = uc / 21;
- decomposition[0] = 0x1100 + l;
- decomposition[1] = 0x1161 + v;
- decomposition[2] = 0x11A7 + t;
- return 3;
+ decomposition[0] = 0x1100 + l;
+ decomposition[1] = 0x1161 + v;
+ decomposition[2] = 0x11A7 + t;
+ return 3;
#endif
- }
+ }
}
else if (uc < 0x110000)
{
unsigned short entry = decomp_index (uc);
/* An entry of (unsigned short)(-1) denotes an absent entry.
- Otherwise, bit 15 of the entry tells whether the decomposition
- is a canonical one. */
+ Otherwise, bit 15 of the entry tells whether the decomposition
+ is a canonical one. */
if (entry < 0x8000)
- {
- const unsigned char *p;
- unsigned int element;
- unsigned int length;
-
- p = &gl_uninorm_decomp_chars_table[3 * entry];
- element = (p[0] << 16) | (p[1] << 8) | p[2];
- /* The first element has 5 bits for the decomposition type. */
- if (((element >> 18) & 0x1f) != UC_DECOMP_CANONICAL)
- abort ();
- length = 1;
- for (;;)
- {
- /* Every element has an 18 bits wide Unicode code point. */
- *decomposition = element & 0x3ffff;
- /* Bit 23 tells whether there are more elements, */
- if ((element & (1 << 23)) == 0)
- break;
- p += 3;
- element = (p[0] << 16) | (p[1] << 8) | p[2];
- decomposition++;
- length++;
- }
- return length;
- }
+ {
+ const unsigned char *p;
+ unsigned int element;
+ unsigned int length;
+
+ p = &gl_uninorm_decomp_chars_table[3 * entry];
+ element = (p[0] << 16) | (p[1] << 8) | p[2];
+ /* The first element has 5 bits for the decomposition type. */
+ if (((element >> 18) & 0x1f) != UC_DECOMP_CANONICAL)
+ abort ();
+ length = 1;
+ for (;;)
+ {
+ /* Every element has an 18 bits wide Unicode code point. */
+ *decomposition = element & 0x3ffff;
+ /* Bit 23 tells whether there are more elements, */
+ if ((element & (1 << 23)) == 0)
+ break;
+ p += 3;
+ element = (p[0] << 16) | (p[1] << 8) | p[2];
+ decomposition++;
+ length++;
+ }
+ return length;
+ }
}
return -1;
}
diff --git a/lib/uninorm/compat-decomposition.c b/lib/uninorm/compat-decomposition.c
index 19ec87e..740cc5c 100644
--- a/lib/uninorm/compat-decomposition.c
+++ b/lib/uninorm/compat-decomposition.c
@@ -1,5 +1,5 @@
/* Compatibility decomposition of Unicode characters.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/uninorm/composition-table.gperf b/lib/uninorm/composition-table.gperf
index f7f0feb..4651725 100644
--- a/lib/uninorm/composition-table.gperf
+++ b/lib/uninorm/composition-table.gperf
@@ -2,7 +2,7 @@
/* Canonical composition of Unicode characters. */
/* Generated automatically by gen-uni-tables for Unicode 5.1.0. */
-/* Copyright (C) 2009 Free Software Foundation, Inc.
+/* Copyright (C) 2009-2010 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
diff --git a/lib/uninorm/composition.c b/lib/uninorm/composition.c
index 4caf9a0..c9cddbe 100644
--- a/lib/uninorm/composition.c
+++ b/lib/uninorm/composition.c
@@ -1,5 +1,5 @@
/* Canonical composition of Unicode characters.
- Copyright (C) 2002, 2006, 2009 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
@@ -32,54 +32,54 @@ uc_composition (ucs4_t uc1, ucs4_t uc2)
if (uc1 < 0x10000 && uc2 < 0x10000)
{
if (uc2 >= 0x1161 && uc2 < 0x1161 + 21
- && uc1 >= 0x1100 && uc1 < 0x1100 + 19)
- {
- /* Hangul: Combine single letter L and single letter V to form
- two-letter syllable LV. */
- return 0xAC00 + ((uc1 - 0x1100) * 21 + (uc2 - 0x1161)) * 28;
- }
+ && uc1 >= 0x1100 && uc1 < 0x1100 + 19)
+ {
+ /* Hangul: Combine single letter L and single letter V to form
+ two-letter syllable LV. */
+ return 0xAC00 + ((uc1 - 0x1100) * 21 + (uc2 - 0x1161)) * 28;
+ }
else if (uc2 > 0x11A7 && uc2 < 0x11A7 + 28
- && uc1 >= 0xAC00 && uc1 < 0xD7A4 && ((uc1 - 0xAC00) % 28) == 0)
- {
- /* Hangul: Combine two-letter syllable LV with single-letter T
- to form three-letter syllable LVT. */
- return uc1 + (uc2 - 0x11A7);
- }
+ && uc1 >= 0xAC00 && uc1 < 0xD7A4 && ((uc1 - 0xAC00) % 28) == 0)
+ {
+ /* Hangul: Combine two-letter syllable LV with single-letter T
+ to form three-letter syllable LVT. */
+ return uc1 + (uc2 - 0x11A7);
+ }
else
- {
+ {
#if 0
- unsigned int uc = MUL1 * uc1 * MUL2 * uc2;
- unsigned int index1 = uc >> composition_header_0;
- if (index1 < composition_header_1)
- {
- int lookup1 = u_composition.level1[index1];
- if (lookup1 >= 0)
- {
- unsigned int index2 = (uc >> composition_header_2) & composition_header_3;
- int lookup2 = u_composition.level2[lookup1 + index2];
- if (lookup2 >= 0)
- {
- unsigned int index3 = (uc & composition_header_4);
- unsigned int lookup3 = u_composition.level3[lookup2 + index3];
- if ((lookup3 >> 16) == uc2)
- return lookup3 & ((1U << 16) - 1);
- }
- }
- }
+ unsigned int uc = MUL1 * uc1 * MUL2 * uc2;
+ unsigned int index1 = uc >> composition_header_0;
+ if (index1 < composition_header_1)
+ {
+ int lookup1 = u_composition.level1[index1];
+ if (lookup1 >= 0)
+ {
+ unsigned int index2 = (uc >> composition_header_2) & composition_header_3;
+ int lookup2 = u_composition.level2[lookup1 + index2];
+ if (lookup2 >= 0)
+ {
+ unsigned int index3 = (uc & composition_header_4);
+ unsigned int lookup3 = u_composition.level3[lookup2 + index3];
+ if ((lookup3 >> 16) == uc2)
+ return lookup3 & ((1U << 16) - 1);
+ }
+ }
+ }
#else
- char codes[4];
- const struct composition_rule *rule;
+ char codes[4];
+ const struct composition_rule *rule;
- codes[0] = (uc1 >> 8) & 0xff;
- codes[1] = uc1 & 0xff;
- codes[2] = (uc2 >> 8) & 0xff;
- codes[3] = uc2 & 0xff;
+ codes[0] = (uc1 >> 8) & 0xff;
+ codes[1] = uc1 & 0xff;
+ codes[2] = (uc2 >> 8) & 0xff;
+ codes[3] = uc2 & 0xff;
- rule = gl_uninorm_compose_lookup (codes, 4);
- if (rule != NULL)
- return rule->combined;
+ rule = gl_uninorm_compose_lookup (codes, 4);
+ if (rule != NULL)
+ return rule->combined;
#endif
- }
+ }
}
return 0;
}
diff --git a/lib/uninorm/decompose-internal.c b/lib/uninorm/decompose-internal.c
index 53b745d..f9c2277 100644
--- a/lib/uninorm/decompose-internal.c
+++ b/lib/uninorm/decompose-internal.c
@@ -1,5 +1,5 @@
/* Decomposition of Unicode strings.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/uninorm/decompose-internal.h b/lib/uninorm/decompose-internal.h
index c1bf125..d952e2f 100644
--- a/lib/uninorm/decompose-internal.h
+++ b/lib/uninorm/decompose-internal.h
@@ -1,5 +1,5 @@
/* Decomposition of Unicode strings.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
@@ -27,10 +27,10 @@ extern int
struct ucs4_with_ccc
{
ucs4_t code;
- int ccc; /* range 0..255 */
+ int ccc; /* range 0..255 */
};
/* Stable-sort an array of 'struct ucs4_with_ccc'. */
extern void
gl_uninorm_decompose_merge_sort_inplace (struct ucs4_with_ccc *src, size_t n,
- struct ucs4_with_ccc *tmp);
+ struct ucs4_with_ccc *tmp);
diff --git a/lib/uninorm/decomposing-form.c b/lib/uninorm/decomposing-form.c
index 3b49cb8..a19f6d3 100644
--- a/lib/uninorm/decomposing-form.c
+++ b/lib/uninorm/decomposing-form.c
@@ -1,5 +1,5 @@
/* Decomposing variant of a normalization form.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/uninorm/decomposition-table.c b/lib/uninorm/decomposition-table.c
index 63f6442..b19b47a 100644
--- a/lib/uninorm/decomposition-table.c
+++ b/lib/uninorm/decomposition-table.c
@@ -1,5 +1,5 @@
/* Decomposition of Unicode characters.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/uninorm/decomposition-table.h b/lib/uninorm/decomposition-table.h
index ad0e1a3..9238ff9 100644
--- a/lib/uninorm/decomposition-table.h
+++ b/lib/uninorm/decomposition-table.h
@@ -1,5 +1,5 @@
/* Decomposition of Unicode characters.
- Copyright (C) 2001-2003, 2009 Free Software Foundation, Inc.
+ Copyright (C) 2001-2003, 2009-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
@@ -34,15 +34,15 @@ decomp_index (ucs4_t uc)
{
int lookup1 = gl_uninorm_decomp_index_table.level1[index1];
if (lookup1 >= 0)
- {
- unsigned int index2 = (uc >> decomp_header_2) & decomp_header_3;
- int lookup2 = gl_uninorm_decomp_index_table.level2[lookup1 + index2];
- if (lookup2 >= 0)
- {
- unsigned int index3 = uc & decomp_header_4;
- return gl_uninorm_decomp_index_table.level3[lookup2 + index3];
- }
- }
+ {
+ unsigned int index2 = (uc >> decomp_header_2) & decomp_header_3;
+ int lookup2 = gl_uninorm_decomp_index_table.level2[lookup1 + index2];
+ if (lookup2 >= 0)
+ {
+ unsigned int index3 = uc & decomp_header_4;
+ return gl_uninorm_decomp_index_table.level3[lookup2 + index3];
+ }
+ }
}
return (unsigned short)(-1);
}
diff --git a/lib/uninorm/decomposition.c b/lib/uninorm/decomposition.c
index af03018..a29e105 100644
--- a/lib/uninorm/decomposition.c
+++ b/lib/uninorm/decomposition.c
@@ -1,5 +1,5 @@
/* Decomposition of Unicode characters.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
@@ -29,8 +29,8 @@ uc_decomposition (ucs4_t uc, int *decomp_tag, ucs4_t *decomposition)
{
/* Hangul syllable. See Unicode standard, chapter 3, section
"Hangul Syllable Decomposition", See also the clarification at
- <http://www.unicode.org/versions/Unicode5.1.0/>, section
- "Clarification of Hangul Jamo Handling". */
+ <http://www.unicode.org/versions/Unicode5.1.0/>, section
+ "Clarification of Hangul Jamo Handling". */
unsigned int t;
uc -= 0xAC00;
@@ -38,65 +38,65 @@ uc_decomposition (ucs4_t uc, int *decomp_tag, ucs4_t *decomposition)
*decomp_tag = UC_DECOMP_CANONICAL;
if (t == 0)
- {
- unsigned int v, l;
+ {
+ unsigned int v, l;
- uc = uc / 28;
- v = uc % 21;
- l = uc / 21;
+ uc = uc / 28;
+ v = uc % 21;
+ l = uc / 21;
- decomposition[0] = 0x1100 + l;
- decomposition[1] = 0x1161 + v;
- return 2;
- }
+ decomposition[0] = 0x1100 + l;
+ decomposition[1] = 0x1161 + v;
+ return 2;
+ }
else
- {
+ {
#if 1 /* Return the pairwise decomposition, not the full decomposition. */
- decomposition[0] = 0xAC00 + uc - t; /* = 0xAC00 + (l * 21 + v) * 28; */
- decomposition[1] = 0x11A7 + t;
- return 2;
+ decomposition[0] = 0xAC00 + uc - t; /* = 0xAC00 + (l * 21 + v) * 28; */
+ decomposition[1] = 0x11A7 + t;
+ return 2;
#else
- unsigned int v, l;
+ unsigned int v, l;
- uc = uc / 28;
- v = uc % 21;
- l = uc / 21;
+ uc = uc / 28;
+ v = uc % 21;
+ l = uc / 21;
- decomposition[0] = 0x1100 + l;
- decomposition[1] = 0x1161 + v;
- decomposition[2] = 0x11A7 + t;
- return 3;
+ decomposition[0] = 0x1100 + l;
+ decomposition[1] = 0x1161 + v;
+ decomposition[2] = 0x11A7 + t;
+ return 3;
#endif
- }
+ }
}
else if (uc < 0x110000)
{
unsigned short entry = decomp_index (uc);
if (entry != (unsigned short)(-1))
- {
- const unsigned char *p;
- unsigned int element;
- unsigned int length;
+ {
+ const unsigned char *p;
+ unsigned int element;
+ unsigned int length;
- p = &gl_uninorm_decomp_chars_table[3 * (entry & 0x7FFF)];
- element = (p[0] << 16) | (p[1] << 8) | p[2];
- /* The first element has 5 bits for the decomposition type. */
- *decomp_tag = (element >> 18) & 0x1f;
- length = 1;
- for (;;)
- {
- /* Every element has an 18 bits wide Unicode code point. */
- *decomposition = element & 0x3ffff;
- /* Bit 23 tells whether there are more elements, */
- if ((element & (1 << 23)) == 0)
- break;
- p += 3;
- element = (p[0] << 16) | (p[1] << 8) | p[2];
- decomposition++;
- length++;
- }
- return length;
- }
+ p = &gl_uninorm_decomp_chars_table[3 * (entry & 0x7FFF)];
+ element = (p[0] << 16) | (p[1] << 8) | p[2];
+ /* The first element has 5 bits for the decomposition type. */
+ *decomp_tag = (element >> 18) & 0x1f;
+ length = 1;
+ for (;;)
+ {
+ /* Every element has an 18 bits wide Unicode code point. */
+ *decomposition = element & 0x3ffff;
+ /* Bit 23 tells whether there are more elements, */
+ if ((element & (1 << 23)) == 0)
+ break;
+ p += 3;
+ element = (p[0] << 16) | (p[1] << 8) | p[2];
+ decomposition++;
+ length++;
+ }
+ return length;
+ }
}
return -1;
}
diff --git a/lib/uninorm/nfc.c b/lib/uninorm/nfc.c
index c205b16..ecc1aba 100644
--- a/lib/uninorm/nfc.c
+++ b/lib/uninorm/nfc.c
@@ -1,5 +1,5 @@
/* Unicode Normalization Form C.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/uninorm/nfd.c b/lib/uninorm/nfd.c
index 54c98bc..1e7194d 100644
--- a/lib/uninorm/nfd.c
+++ b/lib/uninorm/nfd.c
@@ -1,5 +1,5 @@
/* Unicode Normalization Form D.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/uninorm/nfkc.c b/lib/uninorm/nfkc.c
index 1fb52f1..fdd05ac 100644
--- a/lib/uninorm/nfkc.c
+++ b/lib/uninorm/nfkc.c
@@ -1,5 +1,5 @@
/* Unicode Normalization Form KC.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/uninorm/nfkd.c b/lib/uninorm/nfkd.c
index 871dd64..cff0df2 100644
--- a/lib/uninorm/nfkd.c
+++ b/lib/uninorm/nfkd.c
@@ -1,5 +1,5 @@
/* Unicode Normalization Form KD.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/uninorm/normalize-internal.h b/lib/uninorm/normalize-internal.h
index 0b346c6..f4c8c8e 100644
--- a/lib/uninorm/normalize-internal.h
+++ b/lib/uninorm/normalize-internal.h
@@ -1,5 +1,5 @@
/* Normalization of Unicode strings.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/uninorm/u-normalize-internal.h b/lib/uninorm/u-normalize-internal.h
index 70c3255..43b7ec3 100644
--- a/lib/uninorm/u-normalize-internal.h
+++ b/lib/uninorm/u-normalize-internal.h
@@ -1,5 +1,5 @@
/* Decomposition and composition of Unicode strings.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
@@ -56,293 +56,293 @@ FUNC (uninorm_t nf, const UNIT *s, size_t n,
for (;;)
{
- int count;
- ucs4_t decomposed[UC_DECOMPOSITION_MAX_LENGTH];
- int decomposed_count;
- int i;
-
- if (s < s_end)
- {
- /* Fetch the next character. */
- count = U_MBTOUC_UNSAFE (&decomposed[0], s, s_end - s);
- decomposed_count = 1;
-
- /* Decompose it, recursively.
- It would be possible to precompute the recursive decomposition
- and store it in a table. But this would significantly increase
- the size of the decomposition tables, because for example for
- U+1FC1 the recursive canonical decomposition and the recursive
- compatibility decomposition are different. */
- {
- int curr;
-
- for (curr = 0; curr < decomposed_count; )
- {
- /* Invariant: decomposed[0..curr-1] is fully decomposed, i.e.
- all elements are atomic. */
- ucs4_t curr_decomposed[UC_DECOMPOSITION_MAX_LENGTH];
- int curr_decomposed_count;
-
- curr_decomposed_count = decomposer (decomposed[curr], curr_decomposed);
- if (curr_decomposed_count >= 0)
- {
- /* Move curr_decomposed[0..curr_decomposed_count-1] over
- decomposed[curr], making room. It's not worth using
- memcpy() here, since the counts are so small. */
- int shift = curr_decomposed_count - 1;
-
- if (shift < 0)
- abort ();
- if (shift > 0)
- {
- int j;
-
- decomposed_count += shift;
- if (decomposed_count > UC_DECOMPOSITION_MAX_LENGTH)
- abort ();
- for (j = decomposed_count - 1 - shift; j > curr; j--)
- decomposed[j + shift] = decomposed[j];
- }
- for (; shift >= 0; shift--)
- decomposed[curr + shift] = curr_decomposed[shift];
- }
- else
- {
- /* decomposed[curr] is atomic. */
- curr++;
- }
- }
- }
- }
- else
- {
- count = 0;
- decomposed_count = 0;
- }
-
- i = 0;
- for (;;)
- {
- ucs4_t uc;
- int ccc;
-
- if (s < s_end)
- {
- /* Fetch the next character from the decomposition. */
- if (i == decomposed_count)
- break;
- uc = decomposed[i];
- ccc = uc_combining_class (uc);
- }
- else
- {
- /* End of string reached. */
- uc = 0;
- ccc = 0;
- }
-
- if (ccc == 0)
- {
- size_t j;
-
- /* Apply the canonical ordering algorithm to the accumulated
- sequence of characters. */
- if (sortbuf_count > 1)
- gl_uninorm_decompose_merge_sort_inplace (sortbuf, sortbuf_count,
- sortbuf + sortbuf_count);
-
- if (composer != NULL)
- {
- /* Attempt to combine decomposed characters, as specified
- in the Unicode Standard Annex #15 "Unicode Normalization
- Forms". We need to check
- 1. whether the first accumulated character is a
- "starter" (i.e. has ccc = 0). This is usually the
- case. But when the string starts with a
- non-starter, the sortbuf also starts with a
- non-starter. Btw, this check could also be
- omitted, because the composition table has only
- entries (code1, code2) for which code1 is a
- starter; if the first accumulated character is not
- a starter, no lookup will succeed.
- 2. If the sortbuf has more than one character, check
- for each of these characters that are not "blocked"
- from the starter (i.e. have a ccc that is higher
- than the ccc of the previous character) whether it
- can be combined with the first character.
- 3. If only one character is left in sortbuf, check
- whether it can be combined with the next character
- (also a starter). */
- if (sortbuf_count > 0 && sortbuf[0].ccc == 0)
- {
- for (j = 1; j < sortbuf_count; )
- {
- if (sortbuf[j].ccc > sortbuf[j - 1].ccc)
- {
- ucs4_t combined =
- composer (sortbuf[0].code, sortbuf[j].code);
- if (combined)
- {
- size_t k;
-
- sortbuf[0].code = combined;
- /* sortbuf[0].ccc = 0, still valid. */
- for (k = j + 1; k < sortbuf_count; k++)
- sortbuf[k - 1] = sortbuf[k];
- sortbuf_count--;
- continue;
- }
- }
- j++;
- }
- if (s < s_end && sortbuf_count == 1)
- {
- ucs4_t combined =
- composer (sortbuf[0].code, uc);
- if (combined)
- {
- uc = combined;
- ccc = 0;
- /* uc could be further combined with subsequent
- characters. So don't put it into sortbuf[0] in
- this round, only in the next round. */
- sortbuf_count = 0;
- }
- }
- }
- }
-
- for (j = 0; j < sortbuf_count; j++)
- {
- ucs4_t muc = sortbuf[j].code;
-
- /* Append muc to the result accumulator. */
- if (length < allocated)
- {
- int ret =
- U_UCTOMB (result + length, muc, allocated - length);
- if (ret == -1)
- {
- errno = EINVAL;
- goto fail;
- }
- if (ret >= 0)
- {
- length += ret;
- goto done_appending;
- }
- }
- {
- size_t old_allocated = allocated;
- size_t new_allocated = 2 * old_allocated;
- if (new_allocated < 64)
- new_allocated = 64;
- if (new_allocated < old_allocated) /* integer overflow? */
- abort ();
- {
- UNIT *larger_result;
- if (result == NULL)
- {
- larger_result =
- (UNIT *) malloc (new_allocated * sizeof (UNIT));
- if (larger_result == NULL)
- {
- errno = ENOMEM;
- goto fail;
- }
- }
- else if (result == resultbuf)
- {
- larger_result =
- (UNIT *) malloc (new_allocated * sizeof (UNIT));
- if (larger_result == NULL)
- {
- errno = ENOMEM;
- goto fail;
- }
- U_CPY (larger_result, resultbuf, length);
- }
- else
- {
- larger_result =
- (UNIT *) realloc (result, new_allocated * sizeof (UNIT));
- if (larger_result == NULL)
- {
- errno = ENOMEM;
- goto fail;
- }
- }
- result = larger_result;
- allocated = new_allocated;
- {
- int ret =
- U_UCTOMB (result + length, muc, allocated - length);
- if (ret == -1)
- {
- errno = EINVAL;
- goto fail;
- }
- if (ret < 0)
- abort ();
- length += ret;
- goto done_appending;
- }
- }
- }
- done_appending: ;
- }
-
- /* sortbuf is now empty. */
- sortbuf_count = 0;
- }
-
- if (!(s < s_end))
- /* End of string reached. */
- break;
-
- /* Append (uc, ccc) to sortbuf. */
- if (sortbuf_count == sortbuf_allocated)
- {
- struct ucs4_with_ccc *new_sortbuf;
-
- sortbuf_allocated = 2 * sortbuf_allocated;
- if (sortbuf_allocated < sortbuf_count) /* integer overflow? */
- abort ();
- new_sortbuf =
- (struct ucs4_with_ccc *) malloc (2 * sortbuf_allocated * sizeof (struct ucs4_with_ccc));
- memcpy (new_sortbuf, sortbuf,
- sortbuf_count * sizeof (struct ucs4_with_ccc));
- if (sortbuf != sortbuf_preallocated)
- free (sortbuf);
- sortbuf = new_sortbuf;
- }
- sortbuf[sortbuf_count].code = uc;
- sortbuf[sortbuf_count].ccc = ccc;
- sortbuf_count++;
-
- i++;
- }
-
- if (!(s < s_end))
- /* End of string reached. */
- break;
-
- s += count;
+ int count;
+ ucs4_t decomposed[UC_DECOMPOSITION_MAX_LENGTH];
+ int decomposed_count;
+ int i;
+
+ if (s < s_end)
+ {
+ /* Fetch the next character. */
+ count = U_MBTOUC_UNSAFE (&decomposed[0], s, s_end - s);
+ decomposed_count = 1;
+
+ /* Decompose it, recursively.
+ It would be possible to precompute the recursive decomposition
+ and store it in a table. But this would significantly increase
+ the size of the decomposition tables, because for example for
+ U+1FC1 the recursive canonical decomposition and the recursive
+ compatibility decomposition are different. */
+ {
+ int curr;
+
+ for (curr = 0; curr < decomposed_count; )
+ {
+ /* Invariant: decomposed[0..curr-1] is fully decomposed, i.e.
+ all elements are atomic. */
+ ucs4_t curr_decomposed[UC_DECOMPOSITION_MAX_LENGTH];
+ int curr_decomposed_count;
+
+ curr_decomposed_count = decomposer (decomposed[curr], curr_decomposed);
+ if (curr_decomposed_count >= 0)
+ {
+ /* Move curr_decomposed[0..curr_decomposed_count-1] over
+ decomposed[curr], making room. It's not worth using
+ memcpy() here, since the counts are so small. */
+ int shift = curr_decomposed_count - 1;
+
+ if (shift < 0)
+ abort ();
+ if (shift > 0)
+ {
+ int j;
+
+ decomposed_count += shift;
+ if (decomposed_count > UC_DECOMPOSITION_MAX_LENGTH)
+ abort ();
+ for (j = decomposed_count - 1 - shift; j > curr; j--)
+ decomposed[j + shift] = decomposed[j];
+ }
+ for (; shift >= 0; shift--)
+ decomposed[curr + shift] = curr_decomposed[shift];
+ }
+ else
+ {
+ /* decomposed[curr] is atomic. */
+ curr++;
+ }
+ }
+ }
+ }
+ else
+ {
+ count = 0;
+ decomposed_count = 0;
+ }
+
+ i = 0;
+ for (;;)
+ {
+ ucs4_t uc;
+ int ccc;
+
+ if (s < s_end)
+ {
+ /* Fetch the next character from the decomposition. */
+ if (i == decomposed_count)
+ break;
+ uc = decomposed[i];
+ ccc = uc_combining_class (uc);
+ }
+ else
+ {
+ /* End of string reached. */
+ uc = 0;
+ ccc = 0;
+ }
+
+ if (ccc == 0)
+ {
+ size_t j;
+
+ /* Apply the canonical ordering algorithm to the accumulated
+ sequence of characters. */
+ if (sortbuf_count > 1)
+ gl_uninorm_decompose_merge_sort_inplace (sortbuf, sortbuf_count,
+ sortbuf + sortbuf_count);
+
+ if (composer != NULL)
+ {
+ /* Attempt to combine decomposed characters, as specified
+ in the Unicode Standard Annex #15 "Unicode Normalization
+ Forms". We need to check
+ 1. whether the first accumulated character is a
+ "starter" (i.e. has ccc = 0). This is usually the
+ case. But when the string starts with a
+ non-starter, the sortbuf also starts with a
+ non-starter. Btw, this check could also be
+ omitted, because the composition table has only
+ entries (code1, code2) for which code1 is a
+ starter; if the first accumulated character is not
+ a starter, no lookup will succeed.
+ 2. If the sortbuf has more than one character, check
+ for each of these characters that are not "blocked"
+ from the starter (i.e. have a ccc that is higher
+ than the ccc of the previous character) whether it
+ can be combined with the first character.
+ 3. If only one character is left in sortbuf, check
+ whether it can be combined with the next character
+ (also a starter). */
+ if (sortbuf_count > 0 && sortbuf[0].ccc == 0)
+ {
+ for (j = 1; j < sortbuf_count; )
+ {
+ if (sortbuf[j].ccc > sortbuf[j - 1].ccc)
+ {
+ ucs4_t combined =
+ composer (sortbuf[0].code, sortbuf[j].code);
+ if (combined)
+ {
+ size_t k;
+
+ sortbuf[0].code = combined;
+ /* sortbuf[0].ccc = 0, still valid. */
+ for (k = j + 1; k < sortbuf_count; k++)
+ sortbuf[k - 1] = sortbuf[k];
+ sortbuf_count--;
+ continue;
+ }
+ }
+ j++;
+ }
+ if (s < s_end && sortbuf_count == 1)
+ {
+ ucs4_t combined =
+ composer (sortbuf[0].code, uc);
+ if (combined)
+ {
+ uc = combined;
+ ccc = 0;
+ /* uc could be further combined with subsequent
+ characters. So don't put it into sortbuf[0] in
+ this round, only in the next round. */
+ sortbuf_count = 0;
+ }
+ }
+ }
+ }
+
+ for (j = 0; j < sortbuf_count; j++)
+ {
+ ucs4_t muc = sortbuf[j].code;
+
+ /* Append muc to the result accumulator. */
+ if (length < allocated)
+ {
+ int ret =
+ U_UCTOMB (result + length, muc, allocated - length);
+ if (ret == -1)
+ {
+ errno = EINVAL;
+ goto fail;
+ }
+ if (ret >= 0)
+ {
+ length += ret;
+ goto done_appending;
+ }
+ }
+ {
+ size_t old_allocated = allocated;
+ size_t new_allocated = 2 * old_allocated;
+ if (new_allocated < 64)
+ new_allocated = 64;
+ if (new_allocated < old_allocated) /* integer overflow? */
+ abort ();
+ {
+ UNIT *larger_result;
+ if (result == NULL)
+ {
+ larger_result =
+ (UNIT *) malloc (new_allocated * sizeof (UNIT));
+ if (larger_result == NULL)
+ {
+ errno = ENOMEM;
+ goto fail;
+ }
+ }
+ else if (result == resultbuf)
+ {
+ larger_result =
+ (UNIT *) malloc (new_allocated * sizeof (UNIT));
+ if (larger_result == NULL)
+ {
+ errno = ENOMEM;
+ goto fail;
+ }
+ U_CPY (larger_result, resultbuf, length);
+ }
+ else
+ {
+ larger_result =
+ (UNIT *) realloc (result, new_allocated * sizeof (UNIT));
+ if (larger_result == NULL)
+ {
+ errno = ENOMEM;
+ goto fail;
+ }
+ }
+ result = larger_result;
+ allocated = new_allocated;
+ {
+ int ret =
+ U_UCTOMB (result + length, muc, allocated - length);
+ if (ret == -1)
+ {
+ errno = EINVAL;
+ goto fail;
+ }
+ if (ret < 0)
+ abort ();
+ length += ret;
+ goto done_appending;
+ }
+ }
+ }
+ done_appending: ;
+ }
+
+ /* sortbuf is now empty. */
+ sortbuf_count = 0;
+ }
+
+ if (!(s < s_end))
+ /* End of string reached. */
+ break;
+
+ /* Append (uc, ccc) to sortbuf. */
+ if (sortbuf_count == sortbuf_allocated)
+ {
+ struct ucs4_with_ccc *new_sortbuf;
+
+ sortbuf_allocated = 2 * sortbuf_allocated;
+ if (sortbuf_allocated < sortbuf_count) /* integer overflow? */
+ abort ();
+ new_sortbuf =
+ (struct ucs4_with_ccc *) malloc (2 * sortbuf_allocated * sizeof (struct ucs4_with_ccc));
+ memcpy (new_sortbuf, sortbuf,
+ sortbuf_count * sizeof (struct ucs4_with_ccc));
+ if (sortbuf != sortbuf_preallocated)
+ free (sortbuf);
+ sortbuf = new_sortbuf;
+ }
+ sortbuf[sortbuf_count].code = uc;
+ sortbuf[sortbuf_count].ccc = ccc;
+ sortbuf_count++;
+
+ i++;
+ }
+
+ if (!(s < s_end))
+ /* End of string reached. */
+ break;
+
+ s += count;
}
}
if (length == 0)
{
if (result == NULL)
- {
- /* Return a non-NULL value. NULL means error. */
- result = (UNIT *) malloc (1);
- if (result == NULL)
- {
- errno = ENOMEM;
- goto fail;
- }
- }
+ {
+ /* Return a non-NULL value. NULL means error. */
+ result = (UNIT *) malloc (1);
+ if (result == NULL)
+ {
+ errno = ENOMEM;
+ goto fail;
+ }
+ }
}
else if (result != resultbuf && length < allocated)
{
@@ -351,7 +351,7 @@ FUNC (uninorm_t nf, const UNIT *s, size_t n,
memory = (UNIT *) realloc (result, length * sizeof (UNIT));
if (memory != NULL)
- result = memory;
+ result = memory;
}
if (sortbuf_count > 0)
diff --git a/lib/uninorm/u-normcmp.h b/lib/uninorm/u-normcmp.h
index 6616440..5cda5db 100644
--- a/lib/uninorm/u-normcmp.h
+++ b/lib/uninorm/u-normcmp.h
@@ -1,5 +1,5 @@
/* Normalization insensitive comparison of Unicode strings.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
@@ -40,11 +40,11 @@ FUNC (const UNIT *s1, size_t n1, const UNIT *s2, size_t n2,
if (norms2 == NULL)
{
if (norms1 != buf1)
- {
- int saved_errno = errno;
- free (norms1);
- errno = saved_errno;
- }
+ {
+ int saved_errno = errno;
+ free (norms1);
+ errno = saved_errno;
+ }
return -1;
}
diff --git a/lib/uninorm/u-normcoll.h b/lib/uninorm/u-normcoll.h
index e30880b..d76e725 100644
--- a/lib/uninorm/u-normcoll.h
+++ b/lib/uninorm/u-normcoll.h
@@ -1,5 +1,5 @@
/* Locale dependent, normalization insensitive comparison of Unicode strings.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
@@ -40,17 +40,17 @@ FUNC (const UNIT *s1, size_t n1, const UNIT *s2, size_t n2,
if (transformed2 == NULL)
{
if (transformed1 != buf1)
- {
- int saved_errno = errno;
- free (transformed1);
- errno = saved_errno;
- }
+ {
+ int saved_errno = errno;
+ free (transformed1);
+ errno = saved_errno;
+ }
return -1;
}
/* Compare the transformed strings. */
cmp = memcmp2 (transformed1, transformed1_length,
- transformed2, transformed2_length);
+ transformed2, transformed2_length);
if (cmp < 0)
cmp = -1;
else if (cmp > 0)
diff --git a/lib/uninorm/u-normxfrm.h b/lib/uninorm/u-normxfrm.h
index 6ed1e3c..ce84ff6 100644
--- a/lib/uninorm/u-normxfrm.h
+++ b/lib/uninorm/u-normxfrm.h
@@ -1,5 +1,5 @@
/* Locale dependent transformation for comparison of Unicode strings.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
@@ -37,18 +37,18 @@ FUNC (const UNIT *s, size_t n, uninorm_t nf,
/* Convert it to locale encoding. */
convs_length = sizeof (convsbuf) - 1;
convs = U_CONV_TO_ENCODING (locale_charset (),
- iconveh_error,
- norms, norms_length,
- NULL,
- convsbuf, &convs_length);
+ iconveh_error,
+ norms, norms_length,
+ NULL,
+ convsbuf, &convs_length);
if (convs == NULL)
{
if (norms != normsbuf)
- {
- int saved_errno = errno;
- free (norms);
- errno = saved_errno;
- }
+ {
+ int saved_errno = errno;
+ free (norms);
+ errno = saved_errno;
+ }
return NULL;
}
@@ -60,11 +60,11 @@ FUNC (const UNIT *s, size_t n, uninorm_t nf,
{
char *memory = (char *) realloc (convs, convs_length + 1);
if (memory == NULL)
- {
- free (convs);
- errno = ENOMEM;
- return NULL;
- }
+ {
+ free (convs);
+ errno = ENOMEM;
+ return NULL;
+ }
convs = memory;
}
@@ -73,11 +73,11 @@ FUNC (const UNIT *s, size_t n, uninorm_t nf,
if (result == NULL)
{
if (convs != convsbuf)
- {
- int saved_errno = errno;
- free (convs);
- errno = saved_errno;
- }
+ {
+ int saved_errno = errno;
+ free (convs);
+ errno = saved_errno;
+ }
return NULL;
}
diff --git a/lib/uninorm/u16-normalize.c b/lib/uninorm/u16-normalize.c
index 86334d5..df50e8d 100644
--- a/lib/uninorm/u16-normalize.c
+++ b/lib/uninorm/u16-normalize.c
@@ -1,5 +1,5 @@
/* Normalization of UTF-16 strings.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/uninorm/u16-normcmp.c b/lib/uninorm/u16-normcmp.c
index c66cc1b..e246a26 100644
--- a/lib/uninorm/u16-normcmp.c
+++ b/lib/uninorm/u16-normcmp.c
@@ -1,5 +1,5 @@
/* Normalization insensitive comparison of UTF-16 strings.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/uninorm/u16-normcoll.c b/lib/uninorm/u16-normcoll.c
index a4f76c1..b851472 100644
--- a/lib/uninorm/u16-normcoll.c
+++ b/lib/uninorm/u16-normcoll.c
@@ -1,5 +1,5 @@
/* Locale dependent, normalization insensitive comparison of UTF-16 strings.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/uninorm/u16-normxfrm.c b/lib/uninorm/u16-normxfrm.c
index cc22d8e..41d274a 100644
--- a/lib/uninorm/u16-normxfrm.c
+++ b/lib/uninorm/u16-normxfrm.c
@@ -1,5 +1,5 @@
/* Locale dependent transformation for comparison of UTF-16 strings.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/uninorm/u32-normalize.c b/lib/uninorm/u32-normalize.c
index 6549423..9ab8e5d 100644
--- a/lib/uninorm/u32-normalize.c
+++ b/lib/uninorm/u32-normalize.c
@@ -1,5 +1,5 @@
/* Normalization of UTF-32 strings.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/uninorm/u32-normcmp.c b/lib/uninorm/u32-normcmp.c
index 58f890d..705c345 100644
--- a/lib/uninorm/u32-normcmp.c
+++ b/lib/uninorm/u32-normcmp.c
@@ -1,5 +1,5 @@
/* Normalization insensitive comparison of UTF-32 strings.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/uninorm/u32-normcoll.c b/lib/uninorm/u32-normcoll.c
index 0343f76..3770803 100644
--- a/lib/uninorm/u32-normcoll.c
+++ b/lib/uninorm/u32-normcoll.c
@@ -1,5 +1,5 @@
/* Locale dependent, normalization insensitive comparison of UTF-32 strings.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/uninorm/u32-normxfrm.c b/lib/uninorm/u32-normxfrm.c
index 4b4dbc0..7e953f9 100644
--- a/lib/uninorm/u32-normxfrm.c
+++ b/lib/uninorm/u32-normxfrm.c
@@ -1,5 +1,5 @@
/* Locale dependent transformation for comparison of UTF-32 strings.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/uninorm/u8-normalize.c b/lib/uninorm/u8-normalize.c
index 7e003ec..6367994 100644
--- a/lib/uninorm/u8-normalize.c
+++ b/lib/uninorm/u8-normalize.c
@@ -1,5 +1,5 @@
/* Normalization of UTF-8 strings.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/uninorm/u8-normcmp.c b/lib/uninorm/u8-normcmp.c
index 6994f6c..02a4d5e 100644
--- a/lib/uninorm/u8-normcmp.c
+++ b/lib/uninorm/u8-normcmp.c
@@ -1,5 +1,5 @@
/* Normalization insensitive comparison of UTF-8 strings.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/uninorm/u8-normcoll.c b/lib/uninorm/u8-normcoll.c
index 4d163d4..5c392d9 100644
--- a/lib/uninorm/u8-normcoll.c
+++ b/lib/uninorm/u8-normcoll.c
@@ -1,5 +1,5 @@
/* Locale dependent, normalization insensitive comparison of UTF-8 strings.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/uninorm/u8-normxfrm.c b/lib/uninorm/u8-normxfrm.c
index 31da05d..47d4fdc 100644
--- a/lib/uninorm/u8-normxfrm.c
+++ b/lib/uninorm/u8-normxfrm.c
@@ -1,5 +1,5 @@
/* Locale dependent transformation for comparison of UTF-8 strings.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/uninorm/uninorm-filter.c b/lib/uninorm/uninorm-filter.c
index 1d03cfa..dbc5e10 100644
--- a/lib/uninorm/uninorm-filter.c
+++ b/lib/uninorm/uninorm-filter.c
@@ -1,5 +1,5 @@
/* Stream-based normalization of Unicode strings.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
@@ -50,8 +50,8 @@ struct uninorm_filter
struct uninorm_filter *
uninorm_filter_create (uninorm_t nf,
- int (*stream_func) (void *stream_data, ucs4_t uc),
- void *stream_data)
+ int (*stream_func) (void *stream_data, ucs4_t uc),
+ void *stream_data)
{
struct uninorm_filter *filter =
(struct uninorm_filter *) malloc (sizeof (struct uninorm_filter));
@@ -92,40 +92,40 @@ uninorm_filter_write (struct uninorm_filter *filter, ucs4_t uc_arg)
for (curr = 0; curr < decomposed_count; )
{
- /* Invariant: decomposed[0..curr-1] is fully decomposed, i.e.
- all elements are atomic. */
- ucs4_t curr_decomposed[UC_DECOMPOSITION_MAX_LENGTH];
- int curr_decomposed_count;
-
- curr_decomposed_count =
- filter->decomposer (decomposed[curr], curr_decomposed);
- if (curr_decomposed_count >= 0)
- {
- /* Move curr_decomposed[0..curr_decomposed_count-1] over
- decomposed[curr], making room. It's not worth using
- memcpy() here, since the counts are so small. */
- int shift = curr_decomposed_count - 1;
-
- if (shift < 0)
- abort ();
- if (shift > 0)
- {
- int j;
-
- decomposed_count += shift;
- if (decomposed_count > UC_DECOMPOSITION_MAX_LENGTH)
- abort ();
- for (j = decomposed_count - 1 - shift; j > curr; j--)
- decomposed[j + shift] = decomposed[j];
- }
- for (; shift >= 0; shift--)
- decomposed[curr + shift] = curr_decomposed[shift];
- }
- else
- {
- /* decomposed[curr] is atomic. */
- curr++;
- }
+ /* Invariant: decomposed[0..curr-1] is fully decomposed, i.e.
+ all elements are atomic. */
+ ucs4_t curr_decomposed[UC_DECOMPOSITION_MAX_LENGTH];
+ int curr_decomposed_count;
+
+ curr_decomposed_count =
+ filter->decomposer (decomposed[curr], curr_decomposed);
+ if (curr_decomposed_count >= 0)
+ {
+ /* Move curr_decomposed[0..curr_decomposed_count-1] over
+ decomposed[curr], making room. It's not worth using
+ memcpy() here, since the counts are so small. */
+ int shift = curr_decomposed_count - 1;
+
+ if (shift < 0)
+ abort ();
+ if (shift > 0)
+ {
+ int j;
+
+ decomposed_count += shift;
+ if (decomposed_count > UC_DECOMPOSITION_MAX_LENGTH)
+ abort ();
+ for (j = decomposed_count - 1 - shift; j > curr; j--)
+ decomposed[j + shift] = decomposed[j];
+ }
+ for (; shift >= 0; shift--)
+ decomposed[curr + shift] = curr_decomposed[shift];
+ }
+ else
+ {
+ /* decomposed[curr] is atomic. */
+ curr++;
+ }
}
}
@@ -137,119 +137,119 @@ uninorm_filter_write (struct uninorm_filter *filter, ucs4_t uc_arg)
for (i = 0; i < decomposed_count; i++)
{
- /* Fetch the next character from the decomposition. */
- ucs4_t uc = decomposed[i];
- int ccc = uc_combining_class (uc);
-
- if (ccc == 0)
- {
- size_t j;
-
- /* Apply the canonical ordering algorithm to the accumulated
- sequence of characters. */
- if (sortbuf_count > 1)
- gl_uninorm_decompose_merge_sort_inplace (sortbuf, sortbuf_count,
- sortbuf + sortbuf_count);
-
- if (filter->composer != NULL)
- {
- /* Attempt to combine decomposed characters, as specified
- in the Unicode Standard Annex #15 "Unicode Normalization
- Forms". We need to check
- 1. whether the first accumulated character is a
- "starter" (i.e. has ccc = 0). This is usually the
- case. But when the string starts with a
- non-starter, the sortbuf also starts with a
- non-starter. Btw, this check could also be
- omitted, because the composition table has only
- entries (code1, code2) for which code1 is a
- starter; if the first accumulated character is not
- a starter, no lookup will succeed.
- 2. If the sortbuf has more than one character, check
- for each of these characters that are not "blocked"
- from the starter (i.e. have a ccc that is higher
- than the ccc of the previous character) whether it
- can be combined with the first character.
- 3. If only one character is left in sortbuf, check
- whether it can be combined with the next character
- (also a starter). */
- if (sortbuf_count > 0 && sortbuf[0].ccc == 0)
- {
- for (j = 1; j < sortbuf_count; )
- {
- if (sortbuf[j].ccc > sortbuf[j - 1].ccc)
- {
- ucs4_t combined =
- filter->composer (sortbuf[0].code, sortbuf[j].code);
- if (combined)
- {
- size_t k;
-
- sortbuf[0].code = combined;
- /* sortbuf[0].ccc = 0, still valid. */
- for (k = j + 1; k < sortbuf_count; k++)
- sortbuf[k - 1] = sortbuf[k];
- sortbuf_count--;
- continue;
- }
- }
- j++;
- }
- if (sortbuf_count == 1)
- {
- ucs4_t combined =
- filter->composer (sortbuf[0].code, uc);
- if (combined)
- {
- uc = combined;
- ccc = 0;
- /* uc could be further combined with subsequent
- characters. So don't put it into sortbuf[0] in
- this round, only in the next round. */
- sortbuf_count = 0;
- }
- }
- }
- }
-
- for (j = 0; j < sortbuf_count; j++)
- {
- ucs4_t muc = sortbuf[j].code;
-
- /* Output muc to the encapsulated stream. */
- int ret = filter->stream_func (filter->stream_data, muc);
- if (ret < 0)
- {
- /* errno is set here. */
- filter->sortbuf_count = 0;
- return -1;
- }
- }
-
- /* sortbuf is now empty. */
- sortbuf_count = 0;
- }
-
- /* Append (uc, ccc) to sortbuf. */
- if (sortbuf_count == filter->sortbuf_allocated)
- {
- struct ucs4_with_ccc *new_sortbuf;
-
- filter->sortbuf_allocated = 2 * filter->sortbuf_allocated;
- if (filter->sortbuf_allocated < sortbuf_count) /* integer overflow? */
- abort ();
- new_sortbuf =
- (struct ucs4_with_ccc *)
- malloc (2 * filter->sortbuf_allocated * sizeof (struct ucs4_with_ccc));
- memcpy (new_sortbuf, filter->sortbuf,
- sortbuf_count * sizeof (struct ucs4_with_ccc));
- if (filter->sortbuf != filter->sortbuf_preallocated)
- free (filter->sortbuf);
- filter->sortbuf = new_sortbuf;
- }
- filter->sortbuf[sortbuf_count].code = uc;
- filter->sortbuf[sortbuf_count].ccc = ccc;
- sortbuf_count++;
+ /* Fetch the next character from the decomposition. */
+ ucs4_t uc = decomposed[i];
+ int ccc = uc_combining_class (uc);
+
+ if (ccc == 0)
+ {
+ size_t j;
+
+ /* Apply the canonical ordering algorithm to the accumulated
+ sequence of characters. */
+ if (sortbuf_count > 1)
+ gl_uninorm_decompose_merge_sort_inplace (sortbuf, sortbuf_count,
+ sortbuf + sortbuf_count);
+
+ if (filter->composer != NULL)
+ {
+ /* Attempt to combine decomposed characters, as specified
+ in the Unicode Standard Annex #15 "Unicode Normalization
+ Forms". We need to check
+ 1. whether the first accumulated character is a
+ "starter" (i.e. has ccc = 0). This is usually the
+ case. But when the string starts with a
+ non-starter, the sortbuf also starts with a
+ non-starter. Btw, this check could also be
+ omitted, because the composition table has only
+ entries (code1, code2) for which code1 is a
+ starter; if the first accumulated character is not
+ a starter, no lookup will succeed.
+ 2. If the sortbuf has more than one character, check
+ for each of these characters that are not "blocked"
+ from the starter (i.e. have a ccc that is higher
+ than the ccc of the previous character) whether it
+ can be combined with the first character.
+ 3. If only one character is left in sortbuf, check
+ whether it can be combined with the next character
+ (also a starter). */
+ if (sortbuf_count > 0 && sortbuf[0].ccc == 0)
+ {
+ for (j = 1; j < sortbuf_count; )
+ {
+ if (sortbuf[j].ccc > sortbuf[j - 1].ccc)
+ {
+ ucs4_t combined =
+ filter->composer (sortbuf[0].code, sortbuf[j].code);
+ if (combined)
+ {
+ size_t k;
+
+ sortbuf[0].code = combined;
+ /* sortbuf[0].ccc = 0, still valid. */
+ for (k = j + 1; k < sortbuf_count; k++)
+ sortbuf[k - 1] = sortbuf[k];
+ sortbuf_count--;
+ continue;
+ }
+ }
+ j++;
+ }
+ if (sortbuf_count == 1)
+ {
+ ucs4_t combined =
+ filter->composer (sortbuf[0].code, uc);
+ if (combined)
+ {
+ uc = combined;
+ ccc = 0;
+ /* uc could be further combined with subsequent
+ characters. So don't put it into sortbuf[0] in
+ this round, only in the next round. */
+ sortbuf_count = 0;
+ }
+ }
+ }
+ }
+
+ for (j = 0; j < sortbuf_count; j++)
+ {
+ ucs4_t muc = sortbuf[j].code;
+
+ /* Output muc to the encapsulated stream. */
+ int ret = filter->stream_func (filter->stream_data, muc);
+ if (ret < 0)
+ {
+ /* errno is set here. */
+ filter->sortbuf_count = 0;
+ return -1;
+ }
+ }
+
+ /* sortbuf is now empty. */
+ sortbuf_count = 0;
+ }
+
+ /* Append (uc, ccc) to sortbuf. */
+ if (sortbuf_count == filter->sortbuf_allocated)
+ {
+ struct ucs4_with_ccc *new_sortbuf;
+
+ filter->sortbuf_allocated = 2 * filter->sortbuf_allocated;
+ if (filter->sortbuf_allocated < sortbuf_count) /* integer overflow? */
+ abort ();
+ new_sortbuf =
+ (struct ucs4_with_ccc *)
+ malloc (2 * filter->sortbuf_allocated * sizeof (struct ucs4_with_ccc));
+ memcpy (new_sortbuf, filter->sortbuf,
+ sortbuf_count * sizeof (struct ucs4_with_ccc));
+ if (filter->sortbuf != filter->sortbuf_preallocated)
+ free (filter->sortbuf);
+ filter->sortbuf = new_sortbuf;
+ }
+ filter->sortbuf[sortbuf_count].code = uc;
+ filter->sortbuf[sortbuf_count].ccc = ccc;
+ sortbuf_count++;
}
filter->sortbuf_count = sortbuf_count;
@@ -276,53 +276,53 @@ uninorm_filter_flush (struct uninorm_filter *filter)
sequence of characters. */
if (sortbuf_count > 1)
gl_uninorm_decompose_merge_sort_inplace (sortbuf, sortbuf_count,
- sortbuf + sortbuf_count);
+ sortbuf + sortbuf_count);
if (filter->composer != NULL)
{
/* Attempt to combine decomposed characters, as specified
- in the Unicode Standard Annex #15 "Unicode Normalization
- Forms". We need to check
- 1. whether the first accumulated character is a
- "starter" (i.e. has ccc = 0). This is usually the
- case. But when the string starts with a
- non-starter, the sortbuf also starts with a
- non-starter. Btw, this check could also be
- omitted, because the composition table has only
- entries (code1, code2) for which code1 is a
- starter; if the first accumulated character is not
- a starter, no lookup will succeed.
- 2. If the sortbuf has more than one character, check
- for each of these characters that are not "blocked"
- from the starter (i.e. have a ccc that is higher
- than the ccc of the previous character) whether it
- can be combined with the first character.
- 3. If only one character is left in sortbuf, check
- whether it can be combined with the next character
- (also a starter). */
+ in the Unicode Standard Annex #15 "Unicode Normalization
+ Forms". We need to check
+ 1. whether the first accumulated character is a
+ "starter" (i.e. has ccc = 0). This is usually the
+ case. But when the string starts with a
+ non-starter, the sortbuf also starts with a
+ non-starter. Btw, this check could also be
+ omitted, because the composition table has only
+ entries (code1, code2) for which code1 is a
+ starter; if the first accumulated character is not
+ a starter, no lookup will succeed.
+ 2. If the sortbuf has more than one character, check
+ for each of these characters that are not "blocked"
+ from the starter (i.e. have a ccc that is higher
+ than the ccc of the previous character) whether it
+ can be combined with the first character.
+ 3. If only one character is left in sortbuf, check
+ whether it can be combined with the next character
+ (also a starter). */
if (sortbuf_count > 0 && sortbuf[0].ccc == 0)
- {
- for (j = 1; j < sortbuf_count; )
- {
- if (sortbuf[j].ccc > sortbuf[j - 1].ccc)
- {
- ucs4_t combined =
- filter->composer (sortbuf[0].code, sortbuf[j].code);
- if (combined)
- {
- size_t k;
-
- sortbuf[0].code = combined;
- /* sortbuf[0].ccc = 0, still valid. */
- for (k = j + 1; k < sortbuf_count; k++)
- sortbuf[k - 1] = sortbuf[k];
- sortbuf_count--;
- continue;
- }
- }
- j++;
- }
- }
+ {
+ for (j = 1; j < sortbuf_count; )
+ {
+ if (sortbuf[j].ccc > sortbuf[j - 1].ccc)
+ {
+ ucs4_t combined =
+ filter->composer (sortbuf[0].code, sortbuf[j].code);
+ if (combined)
+ {
+ size_t k;
+
+ sortbuf[0].code = combined;
+ /* sortbuf[0].ccc = 0, still valid. */
+ for (k = j + 1; k < sortbuf_count; k++)
+ sortbuf[k - 1] = sortbuf[k];
+ sortbuf_count--;
+ continue;
+ }
+ }
+ j++;
+ }
+ }
}
for (j = 0; j < sortbuf_count; j++)
@@ -332,11 +332,11 @@ uninorm_filter_flush (struct uninorm_filter *filter)
/* Output muc to the encapsulated stream. */
int ret = filter->stream_func (filter->stream_data, muc);
if (ret < 0)
- {
- /* errno is set here. */
- filter->sortbuf_count = 0;
- return -1;
- }
+ {
+ /* errno is set here. */
+ filter->sortbuf_count = 0;
+ return -1;
+ }
}
/* sortbuf is now empty. */