summaryrefslogtreecommitdiff
path: root/app/tools/halibut/charset/cstable.c
blob: 509923a08f784bf5abe50c4a660d775997cfdbc6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
/*
 * cstable.c - libcharset supporting utility which draws up a map
 * of the whole Unicode BMP and annotates it with details of which
 * other character sets each character appears in.
 * 
 * Note this is not a libcharset _client_; it is part of the
 * libcharset _package_, using libcharset internals.
 */

#include <stdio.h>
#include <string.h>

#include "charset.h"
#include "internal.h"
#include "sbcsdat.h"

#define ENUM_CHARSET(x) extern charset_spec const charset_##x;
#include "enum.c"
#undef ENUM_CHARSET
static charset_spec const *const cs_table[] = {
#define ENUM_CHARSET(x) &charset_##x,
#include "enum.c"
#undef ENUM_CHARSET
};
static const char *const cs_names[] = {
#define ENUM_CHARSET(x) #x,
#include "enum.c"
#undef ENUM_CHARSET
};

int main(int argc, char **argv)
{
    long int c;
    int internal_names = FALSE;
    int verbose = FALSE;

    while (--argc) {
        char *p = *++argv;
        if (!strcmp(p, "-i"))
            internal_names = TRUE;
        else if (!strcmp(p, "-v"))
            verbose = TRUE;
    }

    for (c = 0; c < 0x30000; c++) {
	int i, plane, row, col, chr;
	char const *sep = "";

	printf("U+%04x:", c);

	/*
	 * Look up in SBCSes.
	 */
	for (i = 0; i < lenof(cs_table); i++)
	    if (cs_table[i]->read == read_sbcs &&
		(chr = sbcs_from_unicode(cs_table[i]->data, c)) != ERROR) {
		printf("%s %s", sep,
		       (internal_names ? cs_names[i] :
			charset_to_localenc(cs_table[i]->charset)));
		if (verbose)
		    printf("[%d]", chr);
		sep = ";";
	    }

	/*
	 * Look up individually in MBCS base charsets. The
	 * `internal_names' flag does not affect these, because
	 * MBCS base charsets aren't directly encoded by CS_*
	 * constants.
	 */
	if (unicode_to_big5(c, &row, &col)) {
	    printf("%s Big5", sep);
	    if (verbose)
		printf("[%d,%d]", row, col);
	    sep = ";";
	}

	if (unicode_to_gb2312(c, &row, &col)) {
	    printf("%s GB2312", sep);
	    if (verbose)
		printf("[%d,%d]", row, col);
	    sep = ";";
	}

	if (unicode_to_jisx0208(c, &row, &col)) {
	    printf("%s JIS X 0208", sep);
	    if (verbose)
		printf("[%d,%d]", row, col);
	    sep = ";";
	}

	if (unicode_to_ksx1001(c, &row, &col)) {
	    printf("%s KS X 1001", sep);
	    if (verbose)
		printf("[%d,%d]", row, col);
	    sep = ";";
	}

	if (unicode_to_cp949(c, &row, &col)) {
	    printf("%s CP949", sep);
	    if (verbose)
		printf("[%d,%d]", row, col);
	    sep = ";";
	}

	if (unicode_to_cns11643(c, &plane, &row, &col)) {
	    printf("%s CNS11643", sep);
	    if (verbose)
		printf("[%d,%d,%d]", plane, row, col);
	    sep = ";";
	}

	if (!*sep)
	    printf(" unicode-only");

	printf("\n");
    }

    return 0;
}