diff options
Diffstat (limited to 'app/tools/halibut/main.c')
-rw-r--r-- | app/tools/halibut/main.c | 473 |
1 files changed, 473 insertions, 0 deletions
diff --git a/app/tools/halibut/main.c b/app/tools/halibut/main.c new file mode 100644 index 0000000..ffc70cc --- /dev/null +++ b/app/tools/halibut/main.c @@ -0,0 +1,473 @@ +/* + * main.c: command line parsing and top level + */ + +#include <assert.h> +#include <locale.h> +#include <stdio.h> +#include <stdlib.h> +#include "halibut.h" + +static void dbg_prtsource(paragraph *sourceform); +static void dbg_prtwordlist(int level, word *w); +static void dbg_prtkws(keywordlist *kws); + +static const struct pre_backend { + void *(*func)(paragraph *, keywordlist *, indexdata *); + int bitfield; +} pre_backends[] = { + {paper_pre_backend, 0x0001} +}; + +static const struct backend { + char *name; + void (*func)(paragraph *, keywordlist *, indexdata *, void *); + paragraph *(*filename)(char *filename); + int bitfield, prebackend_bitfield; +} backends[] = { + {"xhtml", html_backend, html_config_filename, 0x0002, 0}, + {"html", html_backend, html_config_filename, 0x0002, 0} +/* {"text", text_backend, text_config_filename, 0x0001, 0}, + {"xhtml", html_backend, html_config_filename, 0x0002, 0}, + {"html", html_backend, html_config_filename, 0x0002, 0}, + {"hlp", whlp_backend, whlp_config_filename, 0x0004, 0}, + {"whlp", whlp_backend, whlp_config_filename, 0x0004, 0}, + {"winhelp", whlp_backend, whlp_config_filename, 0x0004, 0}, + {"man", man_backend, man_config_filename, 0x0008, 0}, + {"info", info_backend, info_config_filename, 0x0010, 0}, + {"ps", ps_backend, ps_config_filename, 0x0020, 0x0001}, + {"pdf", pdf_backend, pdf_config_filename, 0x0040, 0x0001}, */ +}; + +int main(int argc, char **argv) { + char **infiles; + int nfiles; + int nogo; + int errs; + int reportcols; + int input_charset; + int debug; + int backendbits, prebackbits; + int k, b; + paragraph *cfg, *cfg_tail; + void *pre_backend_data[16]; + + /* + * Use the specified locale everywhere. It'll be used for + * output of error messages, and as the default character set + * for input files if one is not explicitly specified. + * + * However, we need to use standard numeric formatting for + * output of things like PDF. + */ + setlocale(LC_ALL, ""); + setlocale(LC_NUMERIC, "C"); + + /* + * Set up initial (default) parameters. + */ + infiles = snewn(argc, char *); + nfiles = 0; + nogo = errs = FALSE; + reportcols = 0; + input_charset = CS_ASCII; + debug = 0; + backendbits = 0; + cfg = cfg_tail = NULL; + + if (argc == 1) { + usage(); + exit(EXIT_SUCCESS); + } + + /* + * Parse command line arguments. + */ + while (--argc) { + char *p = *++argv; + if (*p == '-') { + /* + * An option. + */ + while (p && *++p) { + char c = *p; + switch (c) { + case '-': + /* + * Long option. + */ + { + char *opt, *val; + opt = p++; /* opt will have _one_ leading - */ + while (*p && *p != '=') + p++; /* find end of option */ + if (*p == '=') { + *p++ = '\0'; + val = p; + } else + val = NULL; + + assert(opt[0] == '-'); + for (k = 0; k < (int)lenof(backends); k++) + if (!strcmp(opt+1, backends[k].name)) { + backendbits |= backends[k].bitfield; + if (val) { + paragraph *p = backends[k].filename(val); + assert(p); + if (cfg_tail) + cfg_tail->next = p; + else + cfg = p; + while (p->next) + p = p->next; + cfg_tail = p; + } + break; + } + if (k < (int)lenof(backends)) { + /* do nothing */; + } else if (!strcmp(opt, "-input-charset")) { + if (!val) { + errs = TRUE, error(err_optnoarg, opt); + } else { + int charset = charset_from_localenc(val); + if (charset == CS_NONE) { + errs = TRUE, error(err_cmdcharset, val); + } else { + input_charset = charset; + } + } + } else if (!strcmp(opt, "-help")) { + help(); + nogo = TRUE; + } else if (!strcmp(opt, "-version")) { + showversion(); + nogo = TRUE; + } else if (!strcmp(opt, "-licence") || + !strcmp(opt, "-license")) { + licence(); + nogo = TRUE; + } else if (!strcmp(opt, "-list-charsets")) { + listcharsets(); + nogo = TRUE; + } else if (!strcmp(opt, "-precise")) { + reportcols = 1; + } else { + errs = TRUE, error(err_nosuchopt, opt); + } + } + p = NULL; + break; + case 'h': + case 'V': + case 'L': + case 'P': + case 'd': + /* + * Option requiring no parameter. + */ + switch (c) { + case 'h': + help(); + nogo = TRUE; + break; + case 'V': + showversion(); + nogo = TRUE; + break; + case 'L': + licence(); + nogo = TRUE; + break; + case 'P': + reportcols = 1; + break; + case 'd': + debug = TRUE; + break; + } + break; + case 'C': + /* + * Option requiring parameter. + */ + p++; + if (!*p && argc > 1) + --argc, p = *++argv; + else if (!*p) { + char opt[2]; + opt[0] = c; + opt[1] = '\0'; + errs = TRUE, error(err_optnoarg, opt); + } + /* + * Now c is the option and p is the parameter. + */ + switch (c) { + case 'C': + /* + * -C means we split our argument up into + * colon-separated chunks and assemble them + * into a config paragraph. + */ + { + char *s = dupstr(p), *q, *r; + paragraph *para; + + para = cmdline_cfg_new(); + + q = r = s; + while (*q) { + if (*q == ':') { + *r = '\0'; + /* XXX ad-hoc diagnostic */ + if (!strcmp(s, "input-charset")) + error(err_futileopt, "Cinput-charset", + "; use --input-charset"); + cmdline_cfg_add(para, s); + r = s; + } else { + if (*q == '\\' && q[1]) + q++; + *r++ = *q; + } + q++; + } + *r = '\0'; + cmdline_cfg_add(para, s); + + if (cfg_tail) + cfg_tail->next = para; + else + cfg = para; + cfg_tail = para; + } + break; + } + p = NULL; /* prevent continued processing */ + break; + default: + /* + * Unrecognised option. + */ + { + char opt[2]; + opt[0] = c; + opt[1] = '\0'; + errs = TRUE, error(err_nosuchopt, opt); + } + } + } + } else { + /* + * A non-option argument. + */ + infiles[nfiles++] = p; + } + } + + if (errs) + exit(EXIT_FAILURE); + if (nogo) + exit(EXIT_SUCCESS); + + /* + * Do the work. + */ + if (nfiles == 0) { + error(err_noinput); + usage(); + exit(EXIT_FAILURE); + } + + { + input in; + paragraph *sourceform, *p; + indexdata *idx; + keywordlist *keywords; + + in.filenames = infiles; + in.nfiles = nfiles; + in.currfp = NULL; + in.currindex = 0; + in.npushback = in.pushbacksize = 0; + in.pushback = NULL; + in.reportcols = reportcols; + in.stack = NULL; + in.defcharset = input_charset; + + idx = make_index(); + + sourceform = read_input(&in, idx); + if (!sourceform) + exit(EXIT_FAILURE); + + /* + * Append the config directives acquired from the command + * line. + */ + { + paragraph *end; + + end = sourceform; + while (end && end->next) + end = end->next; + assert(end); + + end->next = cfg; + } + + sfree(in.pushback); + + sfree(infiles); + + keywords = get_keywords(sourceform); + if (!keywords) + exit(EXIT_FAILURE); + gen_citations(sourceform, keywords); + subst_keywords(sourceform, keywords); + + for (p = sourceform; p; p = p->next) + if (p->type == para_IM) + index_merge(idx, TRUE, p->keyword, p->words, &p->fpos); + + build_index(idx); + + /* + * Set up attr_First / attr_Last / attr_Always, in the main + * document and in the index entries. + */ + for (p = sourceform; p; p = p->next) + mark_attr_ends(p->words); + { + int i; + indexentry *entry; + + for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) + mark_attr_ends(entry->text); + } + + if (debug) { + index_debug(idx); + dbg_prtkws(keywords); + dbg_prtsource(sourceform); + } + + /* + * Select and run the pre-backends. + */ + prebackbits = 0; + for (k = 0; k < (int)lenof(backends); k++) + if (backendbits == 0 || (backendbits & backends[k].bitfield)) + prebackbits |= backends[k].prebackend_bitfield; + for (k = 0; k < (int)lenof(pre_backends); k++) + if (prebackbits & pre_backends[k].bitfield) { + assert(k < (int)lenof(pre_backend_data)); + pre_backend_data[k] = + pre_backends[k].func(sourceform, keywords, idx); + } + + /* + * Run the selected set of backends. + */ + for (k = b = 0; k < (int)lenof(backends); k++) + if (b != backends[k].bitfield) { + b = backends[k].bitfield; + if (backendbits == 0 || (backendbits & b)) { + void *pbd = NULL; + int pbb = backends[k].prebackend_bitfield; + int m; + + for (m = 0; m < (int)lenof(pre_backends); m++) + if (pbb & pre_backends[m].bitfield) { + assert(m < (int)lenof(pre_backend_data)); + pbd = pre_backend_data[m]; + break; + } + + backends[k].func(sourceform, keywords, idx, pbd); + } + } + + free_para_list(sourceform); + free_keywords(keywords); + cleanup_index(idx); + } + + return 0; +} + +static void dbg_prtsource(paragraph *sourceform) { + /* + * Output source form in debugging format. + */ + + paragraph *p; + for (p = sourceform; p; p = p->next) { + wchar_t *wp; + printf("para %d ", p->type); + if (p->keyword) { + wp = p->keyword; + while (*wp) { + putchar('\"'); + for (; *wp; wp++) + putchar(*wp); + putchar('\"'); + if (*++wp) + printf(", "); + } + } else + printf("(no keyword)"); + printf(" {\n"); + dbg_prtwordlist(1, p->words); + printf("}\n"); + } +} + +static void dbg_prtkws(keywordlist *kws) { + /* + * Output keywords in debugging format. + */ + + int i; + keyword *kw; + + for (i = 0; (kw = index234(kws->keys, i)) != NULL; i++) { + wchar_t *wp; + printf("keyword "); + wp = kw->key; + while (*wp) { + putchar('\"'); + for (; *wp; wp++) + putchar(*wp); + putchar('\"'); + if (*++wp) + printf(", "); + } + printf(" {\n"); + dbg_prtwordlist(1, kw->text); + printf("}\n"); + } +} + +static void dbg_prtwordlist(int level, word *w) { + for (; w; w = w->next) { + wchar_t *wp; + printf("%*sword %d ", level*4, "", w->type); + if (w->text) { + printf("\""); + for (wp = w->text; *wp; wp++) + putchar(*wp); + printf("\""); + } else + printf("(no text)"); + if (w->breaks) + printf(" [breaks]"); + if (w->alt) { + printf(" alt = {\n"); + dbg_prtwordlist(level+1, w->alt); + printf("%*s}", level*4, ""); + } + printf("\n"); + } +} |