summaryrefslogtreecommitdiff
path: root/app/tools/halibut/main.c
diff options
context:
space:
mode:
Diffstat (limited to 'app/tools/halibut/main.c')
-rw-r--r--app/tools/halibut/main.c473
1 files changed, 473 insertions, 0 deletions
diff --git a/app/tools/halibut/main.c b/app/tools/halibut/main.c
new file mode 100644
index 0000000..ffc70cc
--- /dev/null
+++ b/app/tools/halibut/main.c
@@ -0,0 +1,473 @@
+/*
+ * main.c: command line parsing and top level
+ */
+
+#include <assert.h>
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "halibut.h"
+
+static void dbg_prtsource(paragraph *sourceform);
+static void dbg_prtwordlist(int level, word *w);
+static void dbg_prtkws(keywordlist *kws);
+
+static const struct pre_backend {
+ void *(*func)(paragraph *, keywordlist *, indexdata *);
+ int bitfield;
+} pre_backends[] = {
+ {paper_pre_backend, 0x0001}
+};
+
+static const struct backend {
+ char *name;
+ void (*func)(paragraph *, keywordlist *, indexdata *, void *);
+ paragraph *(*filename)(char *filename);
+ int bitfield, prebackend_bitfield;
+} backends[] = {
+ {"xhtml", html_backend, html_config_filename, 0x0002, 0},
+ {"html", html_backend, html_config_filename, 0x0002, 0}
+/* {"text", text_backend, text_config_filename, 0x0001, 0},
+ {"xhtml", html_backend, html_config_filename, 0x0002, 0},
+ {"html", html_backend, html_config_filename, 0x0002, 0},
+ {"hlp", whlp_backend, whlp_config_filename, 0x0004, 0},
+ {"whlp", whlp_backend, whlp_config_filename, 0x0004, 0},
+ {"winhelp", whlp_backend, whlp_config_filename, 0x0004, 0},
+ {"man", man_backend, man_config_filename, 0x0008, 0},
+ {"info", info_backend, info_config_filename, 0x0010, 0},
+ {"ps", ps_backend, ps_config_filename, 0x0020, 0x0001},
+ {"pdf", pdf_backend, pdf_config_filename, 0x0040, 0x0001}, */
+};
+
+int main(int argc, char **argv) {
+ char **infiles;
+ int nfiles;
+ int nogo;
+ int errs;
+ int reportcols;
+ int input_charset;
+ int debug;
+ int backendbits, prebackbits;
+ int k, b;
+ paragraph *cfg, *cfg_tail;
+ void *pre_backend_data[16];
+
+ /*
+ * Use the specified locale everywhere. It'll be used for
+ * output of error messages, and as the default character set
+ * for input files if one is not explicitly specified.
+ *
+ * However, we need to use standard numeric formatting for
+ * output of things like PDF.
+ */
+ setlocale(LC_ALL, "");
+ setlocale(LC_NUMERIC, "C");
+
+ /*
+ * Set up initial (default) parameters.
+ */
+ infiles = snewn(argc, char *);
+ nfiles = 0;
+ nogo = errs = FALSE;
+ reportcols = 0;
+ input_charset = CS_ASCII;
+ debug = 0;
+ backendbits = 0;
+ cfg = cfg_tail = NULL;
+
+ if (argc == 1) {
+ usage();
+ exit(EXIT_SUCCESS);
+ }
+
+ /*
+ * Parse command line arguments.
+ */
+ while (--argc) {
+ char *p = *++argv;
+ if (*p == '-') {
+ /*
+ * An option.
+ */
+ while (p && *++p) {
+ char c = *p;
+ switch (c) {
+ case '-':
+ /*
+ * Long option.
+ */
+ {
+ char *opt, *val;
+ opt = p++; /* opt will have _one_ leading - */
+ while (*p && *p != '=')
+ p++; /* find end of option */
+ if (*p == '=') {
+ *p++ = '\0';
+ val = p;
+ } else
+ val = NULL;
+
+ assert(opt[0] == '-');
+ for (k = 0; k < (int)lenof(backends); k++)
+ if (!strcmp(opt+1, backends[k].name)) {
+ backendbits |= backends[k].bitfield;
+ if (val) {
+ paragraph *p = backends[k].filename(val);
+ assert(p);
+ if (cfg_tail)
+ cfg_tail->next = p;
+ else
+ cfg = p;
+ while (p->next)
+ p = p->next;
+ cfg_tail = p;
+ }
+ break;
+ }
+ if (k < (int)lenof(backends)) {
+ /* do nothing */;
+ } else if (!strcmp(opt, "-input-charset")) {
+ if (!val) {
+ errs = TRUE, error(err_optnoarg, opt);
+ } else {
+ int charset = charset_from_localenc(val);
+ if (charset == CS_NONE) {
+ errs = TRUE, error(err_cmdcharset, val);
+ } else {
+ input_charset = charset;
+ }
+ }
+ } else if (!strcmp(opt, "-help")) {
+ help();
+ nogo = TRUE;
+ } else if (!strcmp(opt, "-version")) {
+ showversion();
+ nogo = TRUE;
+ } else if (!strcmp(opt, "-licence") ||
+ !strcmp(opt, "-license")) {
+ licence();
+ nogo = TRUE;
+ } else if (!strcmp(opt, "-list-charsets")) {
+ listcharsets();
+ nogo = TRUE;
+ } else if (!strcmp(opt, "-precise")) {
+ reportcols = 1;
+ } else {
+ errs = TRUE, error(err_nosuchopt, opt);
+ }
+ }
+ p = NULL;
+ break;
+ case 'h':
+ case 'V':
+ case 'L':
+ case 'P':
+ case 'd':
+ /*
+ * Option requiring no parameter.
+ */
+ switch (c) {
+ case 'h':
+ help();
+ nogo = TRUE;
+ break;
+ case 'V':
+ showversion();
+ nogo = TRUE;
+ break;
+ case 'L':
+ licence();
+ nogo = TRUE;
+ break;
+ case 'P':
+ reportcols = 1;
+ break;
+ case 'd':
+ debug = TRUE;
+ break;
+ }
+ break;
+ case 'C':
+ /*
+ * Option requiring parameter.
+ */
+ p++;
+ if (!*p && argc > 1)
+ --argc, p = *++argv;
+ else if (!*p) {
+ char opt[2];
+ opt[0] = c;
+ opt[1] = '\0';
+ errs = TRUE, error(err_optnoarg, opt);
+ }
+ /*
+ * Now c is the option and p is the parameter.
+ */
+ switch (c) {
+ case 'C':
+ /*
+ * -C means we split our argument up into
+ * colon-separated chunks and assemble them
+ * into a config paragraph.
+ */
+ {
+ char *s = dupstr(p), *q, *r;
+ paragraph *para;
+
+ para = cmdline_cfg_new();
+
+ q = r = s;
+ while (*q) {
+ if (*q == ':') {
+ *r = '\0';
+ /* XXX ad-hoc diagnostic */
+ if (!strcmp(s, "input-charset"))
+ error(err_futileopt, "Cinput-charset",
+ "; use --input-charset");
+ cmdline_cfg_add(para, s);
+ r = s;
+ } else {
+ if (*q == '\\' && q[1])
+ q++;
+ *r++ = *q;
+ }
+ q++;
+ }
+ *r = '\0';
+ cmdline_cfg_add(para, s);
+
+ if (cfg_tail)
+ cfg_tail->next = para;
+ else
+ cfg = para;
+ cfg_tail = para;
+ }
+ break;
+ }
+ p = NULL; /* prevent continued processing */
+ break;
+ default:
+ /*
+ * Unrecognised option.
+ */
+ {
+ char opt[2];
+ opt[0] = c;
+ opt[1] = '\0';
+ errs = TRUE, error(err_nosuchopt, opt);
+ }
+ }
+ }
+ } else {
+ /*
+ * A non-option argument.
+ */
+ infiles[nfiles++] = p;
+ }
+ }
+
+ if (errs)
+ exit(EXIT_FAILURE);
+ if (nogo)
+ exit(EXIT_SUCCESS);
+
+ /*
+ * Do the work.
+ */
+ if (nfiles == 0) {
+ error(err_noinput);
+ usage();
+ exit(EXIT_FAILURE);
+ }
+
+ {
+ input in;
+ paragraph *sourceform, *p;
+ indexdata *idx;
+ keywordlist *keywords;
+
+ in.filenames = infiles;
+ in.nfiles = nfiles;
+ in.currfp = NULL;
+ in.currindex = 0;
+ in.npushback = in.pushbacksize = 0;
+ in.pushback = NULL;
+ in.reportcols = reportcols;
+ in.stack = NULL;
+ in.defcharset = input_charset;
+
+ idx = make_index();
+
+ sourceform = read_input(&in, idx);
+ if (!sourceform)
+ exit(EXIT_FAILURE);
+
+ /*
+ * Append the config directives acquired from the command
+ * line.
+ */
+ {
+ paragraph *end;
+
+ end = sourceform;
+ while (end && end->next)
+ end = end->next;
+ assert(end);
+
+ end->next = cfg;
+ }
+
+ sfree(in.pushback);
+
+ sfree(infiles);
+
+ keywords = get_keywords(sourceform);
+ if (!keywords)
+ exit(EXIT_FAILURE);
+ gen_citations(sourceform, keywords);
+ subst_keywords(sourceform, keywords);
+
+ for (p = sourceform; p; p = p->next)
+ if (p->type == para_IM)
+ index_merge(idx, TRUE, p->keyword, p->words, &p->fpos);
+
+ build_index(idx);
+
+ /*
+ * Set up attr_First / attr_Last / attr_Always, in the main
+ * document and in the index entries.
+ */
+ for (p = sourceform; p; p = p->next)
+ mark_attr_ends(p->words);
+ {
+ int i;
+ indexentry *entry;
+
+ for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++)
+ mark_attr_ends(entry->text);
+ }
+
+ if (debug) {
+ index_debug(idx);
+ dbg_prtkws(keywords);
+ dbg_prtsource(sourceform);
+ }
+
+ /*
+ * Select and run the pre-backends.
+ */
+ prebackbits = 0;
+ for (k = 0; k < (int)lenof(backends); k++)
+ if (backendbits == 0 || (backendbits & backends[k].bitfield))
+ prebackbits |= backends[k].prebackend_bitfield;
+ for (k = 0; k < (int)lenof(pre_backends); k++)
+ if (prebackbits & pre_backends[k].bitfield) {
+ assert(k < (int)lenof(pre_backend_data));
+ pre_backend_data[k] =
+ pre_backends[k].func(sourceform, keywords, idx);
+ }
+
+ /*
+ * Run the selected set of backends.
+ */
+ for (k = b = 0; k < (int)lenof(backends); k++)
+ if (b != backends[k].bitfield) {
+ b = backends[k].bitfield;
+ if (backendbits == 0 || (backendbits & b)) {
+ void *pbd = NULL;
+ int pbb = backends[k].prebackend_bitfield;
+ int m;
+
+ for (m = 0; m < (int)lenof(pre_backends); m++)
+ if (pbb & pre_backends[m].bitfield) {
+ assert(m < (int)lenof(pre_backend_data));
+ pbd = pre_backend_data[m];
+ break;
+ }
+
+ backends[k].func(sourceform, keywords, idx, pbd);
+ }
+ }
+
+ free_para_list(sourceform);
+ free_keywords(keywords);
+ cleanup_index(idx);
+ }
+
+ return 0;
+}
+
+static void dbg_prtsource(paragraph *sourceform) {
+ /*
+ * Output source form in debugging format.
+ */
+
+ paragraph *p;
+ for (p = sourceform; p; p = p->next) {
+ wchar_t *wp;
+ printf("para %d ", p->type);
+ if (p->keyword) {
+ wp = p->keyword;
+ while (*wp) {
+ putchar('\"');
+ for (; *wp; wp++)
+ putchar(*wp);
+ putchar('\"');
+ if (*++wp)
+ printf(", ");
+ }
+ } else
+ printf("(no keyword)");
+ printf(" {\n");
+ dbg_prtwordlist(1, p->words);
+ printf("}\n");
+ }
+}
+
+static void dbg_prtkws(keywordlist *kws) {
+ /*
+ * Output keywords in debugging format.
+ */
+
+ int i;
+ keyword *kw;
+
+ for (i = 0; (kw = index234(kws->keys, i)) != NULL; i++) {
+ wchar_t *wp;
+ printf("keyword ");
+ wp = kw->key;
+ while (*wp) {
+ putchar('\"');
+ for (; *wp; wp++)
+ putchar(*wp);
+ putchar('\"');
+ if (*++wp)
+ printf(", ");
+ }
+ printf(" {\n");
+ dbg_prtwordlist(1, kw->text);
+ printf("}\n");
+ }
+}
+
+static void dbg_prtwordlist(int level, word *w) {
+ for (; w; w = w->next) {
+ wchar_t *wp;
+ printf("%*sword %d ", level*4, "", w->type);
+ if (w->text) {
+ printf("\"");
+ for (wp = w->text; *wp; wp++)
+ putchar(*wp);
+ printf("\"");
+ } else
+ printf("(no text)");
+ if (w->breaks)
+ printf(" [breaks]");
+ if (w->alt) {
+ printf(" alt = {\n");
+ dbg_prtwordlist(level+1, w->alt);
+ printf("%*s}", level*4, "");
+ }
+ printf("\n");
+ }
+}