/* $Revision: 1.21 $ */ /* The troff parser. Most of the troff-specific code is in this file */ #include "unroff.h" extern int compatible; char escape = '\\'; char eqn_delim1, eqn_delim2; static char control = '.'; static char control_nobreak = '\''; static unsigned char argspec[256]; static char *sentence_end = ".?!"; /* Styles of escape char arguments: */ #define ARG_QUOTED 2 /* \h'xxx', \h|xxx|, ... */ #define ARG_SYMBOL 4 /* \fx \f(xx \f[xxx] */ #define ARG_SIZE 6 /* \sx \s(xx \s[xxx] \sdd */ #define ARG_CHAR 8 /* \zx \z\x \z\(xx \z\[xxx] */ #define ARG_LINE 16 /* \" */ #define ARG_SIGN 1 /* optional sign: \s */ /* parse_char() return values: */ #define TOK_CHAR 1 #define TOK_ESC 2 #define skip(c) {\ for ( ; p < ep && *p != c; p++)\ ;\ if (p == ep) {\ warn("missing closing `%s' delimiter", printable_char(c)); return 0;\ }\ } #define check_name(c) if (c) {\ warn("missing escape name"); return 0;\ } #define check_arg(c) if (c) {\ warn("missing escape sequence argument"); return 0;\ } #define check_empty(c) if (c) {\ warn("empty `[xxx]' sequence"); return 0;\ } #define is_request(p) \ ((p)->size > 0 && ((p)->data[0] == control ||\ (p)->data[0] == control_nobreak ||\ (p)->data[0] == '.')) /* Deals with \" \* \n. Returns 0 on error, 1 otherwise. */ int parse_expand(Buffer *ip, Buffer *op) { char sign, *p, *ep, *q, *s, *ret; int len, size_ret, nl, ev, fallback; Object str; buffer_clear(op); for (p = ip->data, ep = p + ip->size; p < ep; ) { if (*p == escape && ++p < ep) { switch (*p) { case '"': len = ep-p-1; nl = p[len] == '\n'; args_clear(); args_add(Make_Char('"')); args_add(Make_String(p+1, len)); ret = event_exec(EV_ESCAPE, "\"", 1, &size_ret, 1); if (ret) buffer_puts(op, ret, size_ret); else if (nl) buffer_putc(op, '\n'); p = ep; break; case 'n': case '*': s = p++; check_name(p == ep); if (*s == 'n' && (*p == '+' || *p == '-')) { sign = *p++; } else sign = 0; check_name(p == ep); switch (*p) { case '(': if ((q = ++p) > ep-2) { warn("escape name truncated"); return 0; } len = 2; p += 2; break; case '[': if (!compatible) { q = ++p; skip(']'); check_empty(p == q); len = p++ - q; break; } default: check_name((q = p++) == ep); len = 1; break; } str = Make_String(q, len); ev = *s == 'n' ? EV_NUMREG : EV_STRING; fallback = event_lookup(EV_ESCAPE, s, 1) && !event_lookup(ev, q, len); args_clear(); if (fallback) args_add(Make_Char(*s)); args_add(str); if (sign) args_add(Make_Char(sign)); if (fallback) ret = event_exec(EV_ESCAPE, s, 1, &size_ret, 1); else ret = event_exec(ev, q, len, &size_ret, 1); if (ret) buffer_puts(op, ret, size_ret); else buffer_puts(op, s-1, p-s+1); break; default: buffer_putc(op, escape); buffer_putc(op, *p); p++; break; } } else if (isspace(UCHAR(*p))) { /* kill space before comment */ for (s = p+1; s < ep-1 && isspace(UCHAR(*s)); s++) ; if (s < ep-1 && *s == escape && s[1] == '"') p = s; else buffer_putc(op, *p++); } else buffer_putc(op, *p++); } return 1; } static int parse_char(char **pp, char *ep, Buffer *op, int doescape, int doexec, int copymode) { char sign, *p, *s, *ret; char *q = 0; /* make gcc -Wuninitialized happy */ int size_ret, tok = 0, len = 0, nl = 0; /* ditto */ unsigned char spec; p = *pp; if (*p == escape && doescape && p < ep-1) { if (copymode) { if (p[1] != '$') { if (doexec && p[1] != '.' && p[1] != '\\') buffer_putc(op, *p); if (doexec) buffer_putc(op, p[1]); *pp = p+2; return TOK_CHAR; } } switch (*++p) { case '(': if (++p > ep-2) { warn("special character truncated"); return 0; } if (doexec) { args_clear(); args_add(Make_String(p, 2)); if ((ret = event_exec_fallback(EV_SPECIAL, p, 2, &size_ret)) == 0) buffer_puts(op, p-2, 4); else buffer_puts(op, ret, size_ret); } *pp = p+2; return TOK_CHAR; case '[': if (!compatible) { q = ++p; skip(']'); check_empty(p == q); if (doexec) { args_clear(); args_add(Make_String(q, p-q)); if ((ret = event_exec_fallback(EV_SPECIAL, q, p-q, &size_ret)) == 0) buffer_puts(op, q-2, p-q+3); else buffer_puts(op, ret, size_ret); } *pp = p+1; return TOK_CHAR; } default: spec = argspec[(unsigned char)*p]; s = p++; if (spec & ARG_SIGN && p < ep && (*p == '+' || *p == '-')) { sign = *p++; } else sign = 0; switch (spec &= ~ARG_SIGN) { case ARG_SYMBOL: case ARG_SIZE: check_arg(p == ep); switch (*p) { case '(': if ((q = ++p) > ep-2) { warn("escape sequence argument truncated"); return 0; } len = 2; p += 2; break; case '[': if (!compatible) { q = ++p; skip(']'); check_empty(p == q); len = p++ - q; break; } default: check_arg((q = p++) == ep); len = 1; if (spec == ARG_SIZE && q < ep-1 && *q > '0' && *q < '4' && isdigit(UCHAR(*p))) { len++, p++; } break; } break; case ARG_CHAR: check_arg((q = p) == ep); if (parse_char(&p, ep, op, doescape, 0, copymode) == 0) return 0; len = p - q; break; case ARG_LINE: q = p; p = ep; len = ep - q; nl = ep[-1] == '\n'; break; case ARG_QUOTED: if ((q = p++) >= ep) { warn("missing opening delimiter"); return 0; } if (p == ep) { undelim: warn("missing closing `%s' delimiter", printable_char(*q)); return 0; } while ((tok = parse_char(&p, ep, op, doescape, 0, copymode)) != 0) { if (tok == TOK_CHAR && p[-1] == *q) break; if (p == ep) goto undelim; } if (tok == 0) return 0; len = p - ++q - 1; break; } if (doexec) { args_clear(); args_add(Make_Char(*s)); if (spec) { args_add(Make_String(q, len)); if (sign) args_add(Make_Char(sign)); } ret = event_exec_fallback(EV_ESCAPE, s, 1, &size_ret); if (ret) buffer_puts(op, ret, size_ret); else if (spec & ARG_LINE) { if (nl) buffer_putc(op, '\n'); } else buffer_puts(op, s-1, p-s+1); } *pp = p; return TOK_ESC; } } else if (doexec && doescape && !copymode && eqn_delim1 && *p == eqn_delim1) { s = ++p; for ( ; p < ep && *p != eqn_delim2; p++) ; if (p == ep) { warn("non-terminated inline equation"); return 0; } args_clear(); args_add(Make_String(s, p-s)); ret = event_exec(EV_EQUATION, 0, 0, &size_ret, 0); if (ret) buffer_puts(op, ret, size_ret); else buffer_puts(op, s-1, p-s+2); *pp = p+1; return TOK_ESC; } else if (doexec && !copymode && event_lookup(EV_CHAR, p, 1)) { args_clear(); args_add(Make_Char(*p)); ret = event_exec(EV_CHAR, p, 1, &size_ret, 0); if (ret) buffer_puts(op, ret, size_ret); else buffer_putc(op, *p); *pp = p+1; return TOK_CHAR; } else { if (doexec) buffer_putc(op, *p); *pp = p+1; return TOK_CHAR; } } /* Deals with \( \[ \x and chardefs. Returns 0 on error, 1 otherwise. */ int parse_escape(Buffer *ip, Buffer *op, int doescape, int copymode) { char *p, *ep; buffer_clear(op); p = ip->data; ep = p + ip->size; while (p < ep && parse_char(&p, ep, op, doescape, 1, copymode) != 0) ; return p != 0; } static int parse_request(Buffer *ip, Buffer *op) { char *p, *q, *t, *ep, *ret, *start; int reqlen, quote, len, size_ret; int num, macro, minargs = 0, maxargs = 0; Elem *event; Object obj; assert(ip->size > 0); p = ip->data+1; ep = ip->data+ip->size; for ( ; p < ep && (*p == ' ' || *p == '\t'); p++) ; start = p; for ( ; p < ep && !isspace(UCHAR(*p)); p++) if (compatible && p == start+2) break; if ((reqlen = p - start) == 0) /* just a period */ return 0; if ((event = event_lookup(EV_MACRO, start, reqlen)) != 0) { macro = 1; } else if ((event = event_lookup(EV_REQUEST, start, reqlen)) != 0) { macro = 0; obj = get_object(event->obj); /* * The following three lines prevent what I believe is an optimizer * bug under OSF/1. `obj' is invalid unless it is passed to an * (arbitrary) function before entering the if-statement. */ #ifdef __osf__ (void)P_Not(obj); /* any function... */ #endif if (TYPE(obj) == T_Compound) { if ((maxargs = COMPOUND(obj)->max_args - 1) < 0) maxargs = INT_MAX; minargs = COMPOUND(obj)->min_args; } } else { warn("no event value for request or macro `%s'", printable_string(start, reqlen)); return 0; } args_clear(); args_add(Make_String(start, reqlen)); for (num = 1; ; num++) { for ( ; p < ep && isspace(UCHAR(*p)); p++) ; if (p >= ep) break; if (macro && *p == '"') { quote = 1; p++; } else quote = 0; q = t = p; while (1) { if (p == ep || *p == '\n') { /* If quote==1, a closing delimiter is missing. This is * not regarded as an error in troff. */ break; } if (!quote && isspace(UCHAR(*p)) && (macro || num < maxargs)) break; if (quote && *p == '"') { if (p < ep-1 && p[1] == '"') /* turn "" into " */ p++; else break; } if (*p == escape && p < ep-1) { if (p[1] != '\\') *t++ = *p; p++; } *t++ = *p++; } p++; len = t-q; args_add(Make_String(q, len)); } if (!macro) while (num++ < minargs) args_add(Make_String("", 0)); ret = event_exec(macro ? EV_MACRO : EV_REQUEST, start, reqlen, &size_ret, 1); if (ret) { buffer_clear(op); buffer_puts(op, ret, size_ret); } return ret != 0; } static int is_sentence_end(Buffer *bp) { int len = bp->size; return len > 1 && bp->data[len-1] == '\n' && strchr(sentence_end, bp->data[len-2]); } static void exec_sentence_end(Buffer *bp) { int size_ret; char *ret; args_clear(); args_add(Make_Char(bp->data[bp->size-2])); if ((ret = event_exec(EV_SENTENCE, 0, 0, &size_ret, 0)) != 0) { bp->size -= 2; buffer_puts(bp, ret, size_ret); } } static void exec_line(Buffer *bp) { args_clear(); if (bp->size > 0) args_add(Make_Char(bp->data[bp->size-1])); else args_add(False); events_vec_exec(EV_LINE); } void parse_line(Buffer *ip, Buffer *op) { int do_sentence_end; if (is_request(ip)) { if (!parse_request(ip, op)) return; if (op->size == 0) /* it's not considered an input line */ return; do_sentence_end = 1; } else { do_sentence_end = is_sentence_end(ip); if (!parse_escape(ip, op, 1, 0)) return; } if (do_sentence_end && is_sentence_end(op)) exec_sentence_end(op); safe_write(op->data, op->size); exec_line(op); } void parse_input(void) { int eof_seen; Buffer *b1, *b2; b1 = buffer_new(0); b2 = buffer_new(0); do { buffer_clear(b1); if ((eof_seen = safe_readline(b1)) && b1->size == 0) break; if (parse_expand(b1, b2)) parse_line(b2, b1); } while (!eof_seen); buffer_delete(b1); buffer_delete(b2); } void init_parse(void) { argspec['b'] = ARG_QUOTED; argspec['c'] = ARG_CHAR; argspec['f'] = ARG_SYMBOL; argspec['h'] = ARG_QUOTED; argspec['k'] = ARG_SYMBOL; argspec['l'] = ARG_QUOTED; argspec['n'] = ARG_SYMBOL|ARG_SIGN; argspec['o'] = ARG_QUOTED; argspec['s'] = ARG_SIZE|ARG_SIGN; argspec['v'] = ARG_QUOTED; argspec['w'] = ARG_QUOTED; argspec['x'] = ARG_QUOTED; argspec['z'] = ARG_CHAR; argspec['*'] = ARG_SYMBOL; argspec['$'] = ARG_SYMBOL; argspec['"'] = ARG_LINE; if (!compatible) { argspec['A'] = ARG_QUOTED; argspec['C'] = ARG_QUOTED; argspec['L'] = ARG_QUOTED; argspec['N'] = ARG_QUOTED; argspec['R'] = ARG_QUOTED; argspec['V'] = ARG_SYMBOL; argspec['Y'] = ARG_SYMBOL; argspec['Z'] = ARG_QUOTED; } }