summaryrefslogtreecommitdiff
path: root/app/tools/halibut/charset/cstable.c
diff options
context:
space:
mode:
Diffstat (limited to 'app/tools/halibut/charset/cstable.c')
-rw-r--r--app/tools/halibut/charset/cstable.c120
1 files changed, 120 insertions, 0 deletions
diff --git a/app/tools/halibut/charset/cstable.c b/app/tools/halibut/charset/cstable.c
new file mode 100644
index 0000000..509923a
--- /dev/null
+++ b/app/tools/halibut/charset/cstable.c
@@ -0,0 +1,120 @@
+/*
+ * cstable.c - libcharset supporting utility which draws up a map
+ * of the whole Unicode BMP and annotates it with details of which
+ * other character sets each character appears in.
+ *
+ * Note this is not a libcharset _client_; it is part of the
+ * libcharset _package_, using libcharset internals.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "charset.h"
+#include "internal.h"
+#include "sbcsdat.h"
+
+#define ENUM_CHARSET(x) extern charset_spec const charset_##x;
+#include "enum.c"
+#undef ENUM_CHARSET
+static charset_spec const *const cs_table[] = {
+#define ENUM_CHARSET(x) &charset_##x,
+#include "enum.c"
+#undef ENUM_CHARSET
+};
+static const char *const cs_names[] = {
+#define ENUM_CHARSET(x) #x,
+#include "enum.c"
+#undef ENUM_CHARSET
+};
+
+int main(int argc, char **argv)
+{
+ long int c;
+ int internal_names = FALSE;
+ int verbose = FALSE;
+
+ while (--argc) {
+ char *p = *++argv;
+ if (!strcmp(p, "-i"))
+ internal_names = TRUE;
+ else if (!strcmp(p, "-v"))
+ verbose = TRUE;
+ }
+
+ for (c = 0; c < 0x30000; c++) {
+ int i, plane, row, col, chr;
+ char const *sep = "";
+
+ printf("U+%04x:", c);
+
+ /*
+ * Look up in SBCSes.
+ */
+ for (i = 0; i < lenof(cs_table); i++)
+ if (cs_table[i]->read == read_sbcs &&
+ (chr = sbcs_from_unicode(cs_table[i]->data, c)) != ERROR) {
+ printf("%s %s", sep,
+ (internal_names ? cs_names[i] :
+ charset_to_localenc(cs_table[i]->charset)));
+ if (verbose)
+ printf("[%d]", chr);
+ sep = ";";
+ }
+
+ /*
+ * Look up individually in MBCS base charsets. The
+ * `internal_names' flag does not affect these, because
+ * MBCS base charsets aren't directly encoded by CS_*
+ * constants.
+ */
+ if (unicode_to_big5(c, &row, &col)) {
+ printf("%s Big5", sep);
+ if (verbose)
+ printf("[%d,%d]", row, col);
+ sep = ";";
+ }
+
+ if (unicode_to_gb2312(c, &row, &col)) {
+ printf("%s GB2312", sep);
+ if (verbose)
+ printf("[%d,%d]", row, col);
+ sep = ";";
+ }
+
+ if (unicode_to_jisx0208(c, &row, &col)) {
+ printf("%s JIS X 0208", sep);
+ if (verbose)
+ printf("[%d,%d]", row, col);
+ sep = ";";
+ }
+
+ if (unicode_to_ksx1001(c, &row, &col)) {
+ printf("%s KS X 1001", sep);
+ if (verbose)
+ printf("[%d,%d]", row, col);
+ sep = ";";
+ }
+
+ if (unicode_to_cp949(c, &row, &col)) {
+ printf("%s CP949", sep);
+ if (verbose)
+ printf("[%d,%d]", row, col);
+ sep = ";";
+ }
+
+ if (unicode_to_cns11643(c, &plane, &row, &col)) {
+ printf("%s CNS11643", sep);
+ if (verbose)
+ printf("[%d,%d,%d]", plane, row, col);
+ sep = ";";
+ }
+
+ if (!*sep)
+ printf(" unicode-only");
+
+ printf("\n");
+ }
+
+ return 0;
+}