511 lines
11 KiB
C
511 lines
11 KiB
C
|
#include <stdio.h>
|
||
|
#include <string.h>
|
||
|
#include <sys/types.h>
|
||
|
#include "regex.h"
|
||
|
#include <assert.h>
|
||
|
|
||
|
#include "main.ih"
|
||
|
|
||
|
char *progname;
|
||
|
int debug = 0;
|
||
|
int line = 0;
|
||
|
int status = 0;
|
||
|
|
||
|
int copts = REG_EXTENDED;
|
||
|
int eopts = 0;
|
||
|
regoff_t startoff = 0;
|
||
|
regoff_t endoff = 0;
|
||
|
|
||
|
|
||
|
extern int split();
|
||
|
extern void regprint();
|
||
|
|
||
|
/*
|
||
|
- main - do the simple case, hand off to regress() for regression
|
||
|
*/
|
||
|
main(argc, argv)
|
||
|
int argc;
|
||
|
char *argv[];
|
||
|
{
|
||
|
regex_t re;
|
||
|
# define NS 10
|
||
|
regmatch_t subs[NS];
|
||
|
char erbuf[100];
|
||
|
int err;
|
||
|
size_t len;
|
||
|
int c;
|
||
|
int errflg = 0;
|
||
|
register int i;
|
||
|
extern int optind;
|
||
|
extern char *optarg;
|
||
|
|
||
|
progname = argv[0];
|
||
|
|
||
|
while ((c = getopt(argc, argv, "c:e:S:E:x")) != EOF)
|
||
|
switch (c) {
|
||
|
case 'c': /* compile options */
|
||
|
copts = options('c', optarg);
|
||
|
break;
|
||
|
case 'e': /* execute options */
|
||
|
eopts = options('e', optarg);
|
||
|
break;
|
||
|
case 'S': /* start offset */
|
||
|
startoff = (regoff_t)atoi(optarg);
|
||
|
break;
|
||
|
case 'E': /* end offset */
|
||
|
endoff = (regoff_t)atoi(optarg);
|
||
|
break;
|
||
|
case 'x': /* Debugging. */
|
||
|
debug++;
|
||
|
break;
|
||
|
case '?':
|
||
|
default:
|
||
|
errflg++;
|
||
|
break;
|
||
|
}
|
||
|
if (errflg) {
|
||
|
fprintf(stderr, "usage: %s ", progname);
|
||
|
fprintf(stderr, "[-c copt][-C][-d] [re]\n");
|
||
|
exit(2);
|
||
|
}
|
||
|
|
||
|
if (optind >= argc) {
|
||
|
regress(stdin);
|
||
|
exit(status);
|
||
|
}
|
||
|
|
||
|
err = regcomp(&re, argv[optind++], copts);
|
||
|
if (err) {
|
||
|
len = regerror(err, &re, erbuf, sizeof(erbuf));
|
||
|
fprintf(stderr, "error %s, %d/%d `%s'\n",
|
||
|
eprint(err), len, sizeof(erbuf), erbuf);
|
||
|
exit(status);
|
||
|
}
|
||
|
regprint(&re, stdout);
|
||
|
|
||
|
if (optind >= argc) {
|
||
|
regfree(&re);
|
||
|
exit(status);
|
||
|
}
|
||
|
|
||
|
if (eopts®_STARTEND) {
|
||
|
subs[0].rm_so = startoff;
|
||
|
subs[0].rm_eo = strlen(argv[optind]) - endoff;
|
||
|
}
|
||
|
err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
|
||
|
if (err) {
|
||
|
len = regerror(err, &re, erbuf, sizeof(erbuf));
|
||
|
fprintf(stderr, "error %s, %d/%d `%s'\n",
|
||
|
eprint(err), len, sizeof(erbuf), erbuf);
|
||
|
exit(status);
|
||
|
}
|
||
|
if (!(copts®_NOSUB)) {
|
||
|
len = (int)(subs[0].rm_eo - subs[0].rm_so);
|
||
|
if (subs[0].rm_so != -1) {
|
||
|
if (len != 0)
|
||
|
printf("match `%.*s'\n", len,
|
||
|
argv[optind] + subs[0].rm_so);
|
||
|
else
|
||
|
printf("match `'@%.1s\n",
|
||
|
argv[optind] + subs[0].rm_so);
|
||
|
}
|
||
|
for (i = 1; i < NS; i++)
|
||
|
if (subs[i].rm_so != -1)
|
||
|
printf("(%d) `%.*s'\n", i,
|
||
|
(int)(subs[i].rm_eo - subs[i].rm_so),
|
||
|
argv[optind] + subs[i].rm_so);
|
||
|
}
|
||
|
exit(status);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
- regress - main loop of regression test
|
||
|
== void regress(FILE *in);
|
||
|
*/
|
||
|
void
|
||
|
regress(in)
|
||
|
FILE *in;
|
||
|
{
|
||
|
char inbuf[1000];
|
||
|
# define MAXF 10
|
||
|
char *f[MAXF];
|
||
|
int nf;
|
||
|
int i;
|
||
|
char erbuf[100];
|
||
|
size_t ne;
|
||
|
char *badpat = "invalid regular expression";
|
||
|
# define SHORT 10
|
||
|
char *bpname = "REG_BADPAT";
|
||
|
regex_t re;
|
||
|
|
||
|
while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
|
||
|
line++;
|
||
|
if (inbuf[0] == '#' || inbuf[0] == '\n')
|
||
|
continue; /* NOTE CONTINUE */
|
||
|
inbuf[strlen(inbuf)-1] = '\0'; /* get rid of stupid \n */
|
||
|
if (debug)
|
||
|
fprintf(stdout, "%d:\n", line);
|
||
|
nf = split(inbuf, f, MAXF, "\t\t");
|
||
|
if (nf < 3) {
|
||
|
fprintf(stderr, "bad input, line %d\n", line);
|
||
|
exit(1);
|
||
|
}
|
||
|
for (i = 0; i < nf; i++)
|
||
|
if (strcmp(f[i], "\"\"") == 0)
|
||
|
f[i] = "";
|
||
|
if (nf <= 3)
|
||
|
f[3] = NULL;
|
||
|
if (nf <= 4)
|
||
|
f[4] = NULL;
|
||
|
try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
|
||
|
if (opt('&', f[1])) /* try with either type of RE */
|
||
|
try(f[0], f[1], f[2], f[3], f[4],
|
||
|
options('c', f[1]) &~ REG_EXTENDED);
|
||
|
}
|
||
|
|
||
|
ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
|
||
|
if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
|
||
|
fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
|
||
|
erbuf, badpat);
|
||
|
status = 1;
|
||
|
}
|
||
|
ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT);
|
||
|
if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
|
||
|
ne != strlen(badpat)+1) {
|
||
|
fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
|
||
|
erbuf, SHORT-1, badpat);
|
||
|
status = 1;
|
||
|
}
|
||
|
ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
|
||
|
if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
|
||
|
fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
|
||
|
erbuf, bpname);
|
||
|
status = 1;
|
||
|
}
|
||
|
re.re_endp = bpname;
|
||
|
ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
|
||
|
if (atoi(erbuf) != (int)REG_BADPAT) {
|
||
|
fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
|
||
|
erbuf, (long)REG_BADPAT);
|
||
|
status = 1;
|
||
|
} else if (ne != strlen(erbuf)+1) {
|
||
|
fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
|
||
|
erbuf, (long)REG_BADPAT);
|
||
|
status = 1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
- try - try it, and report on problems
|
||
|
== void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
|
||
|
*/
|
||
|
void
|
||
|
try(f0, f1, f2, f3, f4, opts)
|
||
|
char *f0;
|
||
|
char *f1;
|
||
|
char *f2;
|
||
|
char *f3;
|
||
|
char *f4;
|
||
|
int opts; /* may not match f1 */
|
||
|
{
|
||
|
regex_t re;
|
||
|
# define NSUBS 10
|
||
|
regmatch_t subs[NSUBS];
|
||
|
# define NSHOULD 15
|
||
|
char *should[NSHOULD];
|
||
|
int nshould;
|
||
|
char erbuf[100];
|
||
|
int err;
|
||
|
int len;
|
||
|
char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
|
||
|
register int i;
|
||
|
char *grump;
|
||
|
char f0copy[1000];
|
||
|
char f2copy[1000];
|
||
|
|
||
|
strcpy(f0copy, f0);
|
||
|
re.re_endp = (opts®_PEND) ? f0copy + strlen(f0copy) : NULL;
|
||
|
fixstr(f0copy);
|
||
|
err = regcomp(&re, f0copy, opts);
|
||
|
if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
|
||
|
/* unexpected error or wrong error */
|
||
|
len = regerror(err, &re, erbuf, sizeof(erbuf));
|
||
|
fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n",
|
||
|
line, type, eprint(err), len,
|
||
|
sizeof(erbuf), erbuf);
|
||
|
status = 1;
|
||
|
} else if (err == 0 && opt('C', f1)) {
|
||
|
/* unexpected success */
|
||
|
fprintf(stderr, "%d: %s should have given REG_%s\n",
|
||
|
line, type, f2);
|
||
|
status = 1;
|
||
|
err = 1; /* so we won't try regexec */
|
||
|
}
|
||
|
|
||
|
if (err != 0) {
|
||
|
regfree(&re);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
strcpy(f2copy, f2);
|
||
|
fixstr(f2copy);
|
||
|
|
||
|
if (options('e', f1)®_STARTEND) {
|
||
|
if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
|
||
|
fprintf(stderr, "%d: bad STARTEND syntax\n", line);
|
||
|
subs[0].rm_so = strchr(f2, '(') - f2 + 1;
|
||
|
subs[0].rm_eo = strchr(f2, ')') - f2;
|
||
|
}
|
||
|
err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
|
||
|
|
||
|
if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
|
||
|
/* unexpected error or wrong error */
|
||
|
len = regerror(err, &re, erbuf, sizeof(erbuf));
|
||
|
fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n",
|
||
|
line, type, eprint(err), len,
|
||
|
sizeof(erbuf), erbuf);
|
||
|
status = 1;
|
||
|
} else if (err != 0) {
|
||
|
/* nothing more to check */
|
||
|
} else if (f3 == NULL) {
|
||
|
/* unexpected success */
|
||
|
fprintf(stderr, "%d: %s exec should have failed\n",
|
||
|
line, type);
|
||
|
status = 1;
|
||
|
err = 1; /* just on principle */
|
||
|
} else if (opts®_NOSUB) {
|
||
|
/* nothing more to check */
|
||
|
} else if ((grump = check(f2, subs[0], f3)) != NULL) {
|
||
|
fprintf(stderr, "%d: %s %s\n", line, type, grump);
|
||
|
status = 1;
|
||
|
err = 1;
|
||
|
}
|
||
|
|
||
|
if (err != 0 || f4 == NULL) {
|
||
|
regfree(&re);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
for (i = 1; i < NSHOULD; i++)
|
||
|
should[i] = NULL;
|
||
|
nshould = split(f4, should+1, NSHOULD-1, ",");
|
||
|
if (nshould == 0) {
|
||
|
nshould = 1;
|
||
|
should[1] = "";
|
||
|
}
|
||
|
for (i = 1; i < NSUBS; i++) {
|
||
|
grump = check(f2, subs[i], should[i]);
|
||
|
if (grump != NULL) {
|
||
|
fprintf(stderr, "%d: %s $%d %s\n", line,
|
||
|
type, i, grump);
|
||
|
status = 1;
|
||
|
err = 1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
regfree(&re);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
- options - pick options out of a regression-test string
|
||
|
== int options(int type, char *s);
|
||
|
*/
|
||
|
int
|
||
|
options(type, s)
|
||
|
int type; /* 'c' compile, 'e' exec */
|
||
|
char *s;
|
||
|
{
|
||
|
register char *p;
|
||
|
register int o = (type == 'c') ? copts : eopts;
|
||
|
register char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
|
||
|
|
||
|
for (p = s; *p != '\0'; p++)
|
||
|
if (strchr(legal, *p) != NULL)
|
||
|
switch (*p) {
|
||
|
case 'b':
|
||
|
o &= ~REG_EXTENDED;
|
||
|
break;
|
||
|
case 'i':
|
||
|
o |= REG_ICASE;
|
||
|
break;
|
||
|
case 's':
|
||
|
o |= REG_NOSUB;
|
||
|
break;
|
||
|
case 'n':
|
||
|
o |= REG_NEWLINE;
|
||
|
break;
|
||
|
case 'm':
|
||
|
o &= ~REG_EXTENDED;
|
||
|
o |= REG_NOSPEC;
|
||
|
break;
|
||
|
case 'p':
|
||
|
o |= REG_PEND;
|
||
|
break;
|
||
|
case '^':
|
||
|
o |= REG_NOTBOL;
|
||
|
break;
|
||
|
case '$':
|
||
|
o |= REG_NOTEOL;
|
||
|
break;
|
||
|
case '#':
|
||
|
o |= REG_STARTEND;
|
||
|
break;
|
||
|
case 't': /* trace */
|
||
|
o |= REG_TRACE;
|
||
|
break;
|
||
|
case 'l': /* force long representation */
|
||
|
o |= REG_LARGE;
|
||
|
break;
|
||
|
case 'r': /* force backref use */
|
||
|
o |= REG_BACKR;
|
||
|
break;
|
||
|
}
|
||
|
return(o);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
- opt - is a particular option in a regression string?
|
||
|
== int opt(int c, char *s);
|
||
|
*/
|
||
|
int /* predicate */
|
||
|
opt(c, s)
|
||
|
int c;
|
||
|
char *s;
|
||
|
{
|
||
|
return(strchr(s, c) != NULL);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
- fixstr - transform magic characters in strings
|
||
|
== void fixstr(register char *p);
|
||
|
*/
|
||
|
void
|
||
|
fixstr(p)
|
||
|
register char *p;
|
||
|
{
|
||
|
if (p == NULL)
|
||
|
return;
|
||
|
|
||
|
for (; *p != '\0'; p++)
|
||
|
if (*p == 'N')
|
||
|
*p = '\n';
|
||
|
else if (*p == 'T')
|
||
|
*p = '\t';
|
||
|
else if (*p == 'S')
|
||
|
*p = ' ';
|
||
|
else if (*p == 'Z')
|
||
|
*p = '\0';
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
- check - check a substring match
|
||
|
== char *check(char *str, regmatch_t sub, char *should);
|
||
|
*/
|
||
|
char * /* NULL or complaint */
|
||
|
check(str, sub, should)
|
||
|
char *str;
|
||
|
regmatch_t sub;
|
||
|
char *should;
|
||
|
{
|
||
|
register int len;
|
||
|
register int shlen;
|
||
|
register char *p;
|
||
|
static char grump[500];
|
||
|
register char *at = NULL;
|
||
|
|
||
|
if (should != NULL && strcmp(should, "-") == 0)
|
||
|
should = NULL;
|
||
|
if (should != NULL && should[0] == '@') {
|
||
|
at = should + 1;
|
||
|
should = "";
|
||
|
}
|
||
|
|
||
|
/* check rm_so and rm_eo for consistency */
|
||
|
if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
|
||
|
(sub.rm_so != -1 && sub.rm_eo == -1) ||
|
||
|
(sub.rm_so != -1 && sub.rm_so < 0) ||
|
||
|
(sub.rm_eo != -1 && sub.rm_eo < 0) ) {
|
||
|
sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
|
||
|
(long)sub.rm_eo);
|
||
|
return(grump);
|
||
|
}
|
||
|
|
||
|
/* check for no match */
|
||
|
if (sub.rm_so == -1 && should == NULL)
|
||
|
return(NULL);
|
||
|
if (sub.rm_so == -1)
|
||
|
return("did not match");
|
||
|
|
||
|
/* check for in range */
|
||
|
if (sub.rm_eo > strlen(str)) {
|
||
|
sprintf(grump, "start %ld end %ld, past end of string",
|
||
|
(long)sub.rm_so, (long)sub.rm_eo);
|
||
|
return(grump);
|
||
|
}
|
||
|
|
||
|
len = (int)(sub.rm_eo - sub.rm_so);
|
||
|
shlen = (int)strlen(should);
|
||
|
p = str + sub.rm_so;
|
||
|
|
||
|
/* check for not supposed to match */
|
||
|
if (should == NULL) {
|
||
|
sprintf(grump, "matched `%.*s'", len, p);
|
||
|
return(grump);
|
||
|
}
|
||
|
|
||
|
/* check for wrong match */
|
||
|
if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
|
||
|
sprintf(grump, "matched `%.*s' instead", len, p);
|
||
|
return(grump);
|
||
|
}
|
||
|
if (shlen > 0)
|
||
|
return(NULL);
|
||
|
|
||
|
/* check null match in right place */
|
||
|
if (at == NULL)
|
||
|
return(NULL);
|
||
|
shlen = strlen(at);
|
||
|
if (shlen == 0)
|
||
|
shlen = 1; /* force check for end-of-string */
|
||
|
if (strncmp(p, at, shlen) != 0) {
|
||
|
sprintf(grump, "matched null at `%.20s'", p);
|
||
|
return(grump);
|
||
|
}
|
||
|
return(NULL);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
- eprint - convert error number to name
|
||
|
== static char *eprint(int err);
|
||
|
*/
|
||
|
static char *
|
||
|
eprint(err)
|
||
|
int err;
|
||
|
{
|
||
|
static char epbuf[100];
|
||
|
size_t len;
|
||
|
|
||
|
len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
|
||
|
assert(len <= sizeof(epbuf));
|
||
|
return(epbuf);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
- efind - convert error name to number
|
||
|
== static int efind(char *name);
|
||
|
*/
|
||
|
static int
|
||
|
efind(name)
|
||
|
char *name;
|
||
|
{
|
||
|
static char efbuf[100];
|
||
|
size_t n;
|
||
|
regex_t re;
|
||
|
|
||
|
sprintf(efbuf, "REG_%s", name);
|
||
|
assert(strlen(efbuf) < sizeof(efbuf));
|
||
|
re.re_endp = efbuf;
|
||
|
(void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
|
||
|
return(atoi(efbuf));
|
||
|
}
|