Making a dir for the C regexp engine (Spencer's)
This commit is contained in:
		
							parent
							
								
									eb65bdec23
								
							
						
					
					
						commit
						a195317fc8
					
				|  | @ -0,0 +1,101 @@ | |||
| New in alpha3.6:  A couple more portability glitches fixed. | ||||
| 
 | ||||
| New in alpha3.5:  Active development of this code has been stopped -- | ||||
| I'm working on a complete reimplementation -- but folks have found some | ||||
| minor portability glitches and the like, hence this release to fix them. | ||||
| One penalty:  slightly reduced compatibility with old compilers, because | ||||
| the ANSI C `unsigned long' type and `ul' constant suffix are used in a | ||||
| few places (I could avoid this but it would be considerably more work). | ||||
| 
 | ||||
| New in alpha3.4:  The complex bug alluded to below has been fixed (in a | ||||
| slightly kludgey temporary way that may hurt efficiency a bit; this is | ||||
| another "get it out the door for 4.4" release).  The tests at the end of | ||||
| the tests file have accordingly been uncommented.  The primary sign of | ||||
| the bug was that something like a?b matching ab matched b rather than ab. | ||||
| (The bug was essentially specific to this exact situation, else it would | ||||
| have shown up earlier.) | ||||
| 
 | ||||
| New in alpha3.3:  The definition of word boundaries has been altered | ||||
| slightly, to more closely match the usual programming notion that "_" | ||||
| is an alphabetic.  Stuff used for pre-ANSI systems is now in a subdir, | ||||
| and the makefile no longer alludes to it in mysterious ways.  The | ||||
| makefile has generally been cleaned up some.  Fixes have been made | ||||
| (again!) so that the regression test will run without -DREDEBUG, at | ||||
| the cost of weaker checking.  A workaround for a bug in some folks' | ||||
| <assert.h> has been added.  And some more things have been added to | ||||
| tests, including a couple right at the end which are commented out | ||||
| because the code currently flunks them (complex bug; fix coming). | ||||
| Plus the usual minor cleanup. | ||||
| 
 | ||||
| New in alpha3.2:  Assorted bits of cleanup and portability improvement | ||||
| (the development base is now a BSDI system using GCC instead of an ancient | ||||
| Sun system, and the newer compiler exposed some glitches).  Fix for a | ||||
| serious bug that affected REs using many [] (including REG_ICASE REs | ||||
| because of the way they are implemented), *sometimes*, depending on | ||||
| memory-allocation patterns.  The header-file prototypes no longer name | ||||
| the parameters, avoiding possible name conflicts.  The possibility that | ||||
| some clot has defined CHAR_MIN as (say) `-128' instead of `(-128)' is | ||||
| now handled gracefully.  "uchar" is no longer used as an internal type | ||||
| name (too many people have the same idea).  Still the same old lousy | ||||
| performance, alas. | ||||
| 
 | ||||
| New in alpha3.1:  Basically nothing, this release is just a bookkeeping | ||||
| convenience.  Stay tuned. | ||||
| 
 | ||||
| New in alpha3.0:  Performance is no better, alas, but some fixes have been | ||||
| made and some functionality has been added.  (This is basically the "get | ||||
| it out the door in time for 4.4" release.)  One bug fix:  regfree() didn't | ||||
| free the main internal structure (how embarrassing).  It is now possible | ||||
| to put NULs in either the RE or the target string, using (resp.) a new | ||||
| REG_PEND flag and the old REG_STARTEND flag.  The REG_NOSPEC flag to | ||||
| regcomp() makes all characters ordinary, so you can match a literal | ||||
| string easily (this will become more useful when performance improves!). | ||||
| There are now primitives to match beginnings and ends of words, although | ||||
| the syntax is disgusting and so is the implementation.  The REG_ATOI | ||||
| debugging interface has changed a bit.  And there has been considerable | ||||
| internal cleanup of various kinds. | ||||
| 
 | ||||
| New in alpha2.3:  Split change list out of README, and moved flags notes | ||||
| into Makefile.  Macro-ized the name of regex(7) in regex(3), since it has | ||||
| to change for 4.4BSD.  Cleanup work in engine.c, and some new regression | ||||
| tests to catch tricky cases thereof. | ||||
| 
 | ||||
| New in alpha2.2:  Out-of-date manpages updated.  Regerror() acquires two | ||||
| small extensions -- REG_ITOA and REG_ATOI -- which avoid debugging kludges | ||||
| in my own test program and might be useful to others for similar purposes. | ||||
| The regression test will now compile (and run) without REDEBUG.  The | ||||
| BRE \$ bug is fixed.  Most uses of "uchar" are gone; it's all chars now. | ||||
| Char/uchar parameters are now written int/unsigned, to avoid possible | ||||
| portability problems with unpromoted parameters.  Some unsigned casts have | ||||
| been introduced to minimize portability problems with shifting into sign | ||||
| bits. | ||||
| 
 | ||||
| New in alpha2.1:  Lots of little stuff, cleanup and fixes.  The one big | ||||
| thing is that regex.h is now generated, using mkh, rather than being | ||||
| supplied in the distribution; due to circularities in dependencies, | ||||
| you have to build regex.h explicitly by "make h".  The two known bugs | ||||
| have been fixed (and the regression test now checks for them), as has a | ||||
| problem with assertions not being suppressed in the absence of REDEBUG. | ||||
| No performance work yet. | ||||
| 
 | ||||
| New in alpha2:  Backslash-anything is an ordinary character, not an | ||||
| error (except, of course, for the handful of backslashed metacharacters | ||||
| in BREs), which should reduce script breakage.  The regression test | ||||
| checks *where* null strings are supposed to match, and has generally | ||||
| been tightened up somewhat.  Small bug fixes in parameter passing (not | ||||
| harmful, but technically errors) and some other areas.  Debugging | ||||
| invoked by defining REDEBUG rather than not defining NDEBUG. | ||||
| 
 | ||||
| New in alpha+3:  full prototyping for internal routines, using a little | ||||
| helper program, mkh, which extracts prototypes given in stylized comments. | ||||
| More minor cleanup.  Buglet fix:  it's CHAR_BIT, not CHAR_BITS.  Simple | ||||
| pre-screening of input when a literal string is known to be part of the | ||||
| RE; this does wonders for performance. | ||||
| 
 | ||||
| New in alpha+2:  minor bits of cleanup.  Notably, the number "32" for the | ||||
| word width isn't hardwired into regexec.c any more, the public header | ||||
| file prototypes the functions if __STDC__ is defined, and some small typos | ||||
| in the manpages have been fixed. | ||||
| 
 | ||||
| New in alpha+1:  improvements to the manual pages, and an important | ||||
| extension, the REG_STARTEND option to regexec(). | ||||
|  | @ -0,0 +1,31 @@ | |||
| /* character-class table */ | ||||
| static struct cclass { | ||||
| 	char *name; | ||||
| 	char *chars; | ||||
| 	char *multis; | ||||
| } cclasses[] = { | ||||
| 	"alnum",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
 | ||||
| 0123456789",				"", | ||||
| 	"alpha",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", | ||||
| 					"", | ||||
| 	"blank",	" \t",		"", | ||||
| 	"cntrl",	"\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
 | ||||
| \25\26\27\30\31\32\33\34\35\36\37\177",	"", | ||||
| 	"digit",	"0123456789",	"", | ||||
| 	"graph",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
 | ||||
| 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", | ||||
| 					"", | ||||
| 	"lower",	"abcdefghijklmnopqrstuvwxyz", | ||||
| 					"", | ||||
| 	"print",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
 | ||||
| 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", | ||||
| 					"", | ||||
| 	"punct",	"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", | ||||
| 					"", | ||||
| 	"space",	"\t\n\v\f\r ",	"", | ||||
| 	"upper",	"ABCDEFGHIJKLMNOPQRSTUVWXYZ", | ||||
| 					"", | ||||
| 	"xdigit",	"0123456789ABCDEFabcdef", | ||||
| 					"", | ||||
| 	NULL,		0,		"" | ||||
| }; | ||||
|  | @ -0,0 +1,102 @@ | |||
| /* character-name table */ | ||||
| static struct cname { | ||||
| 	char *name; | ||||
| 	char code; | ||||
| } cnames[] = { | ||||
| 	"NUL",	'\0', | ||||
| 	"SOH",	'\001', | ||||
| 	"STX",	'\002', | ||||
| 	"ETX",	'\003', | ||||
| 	"EOT",	'\004', | ||||
| 	"ENQ",	'\005', | ||||
| 	"ACK",	'\006', | ||||
| 	"BEL",	'\007', | ||||
| 	"alert",	'\007', | ||||
| 	"BS",		'\010', | ||||
| 	"backspace",	'\b', | ||||
| 	"HT",		'\011', | ||||
| 	"tab",		'\t', | ||||
| 	"LF",		'\012', | ||||
| 	"newline",	'\n', | ||||
| 	"VT",		'\013', | ||||
| 	"vertical-tab",	'\v', | ||||
| 	"FF",		'\014', | ||||
| 	"form-feed",	'\f', | ||||
| 	"CR",		'\015', | ||||
| 	"carriage-return",	'\r', | ||||
| 	"SO",	'\016', | ||||
| 	"SI",	'\017', | ||||
| 	"DLE",	'\020', | ||||
| 	"DC1",	'\021', | ||||
| 	"DC2",	'\022', | ||||
| 	"DC3",	'\023', | ||||
| 	"DC4",	'\024', | ||||
| 	"NAK",	'\025', | ||||
| 	"SYN",	'\026', | ||||
| 	"ETB",	'\027', | ||||
| 	"CAN",	'\030', | ||||
| 	"EM",	'\031', | ||||
| 	"SUB",	'\032', | ||||
| 	"ESC",	'\033', | ||||
| 	"IS4",	'\034', | ||||
| 	"FS",	'\034', | ||||
| 	"IS3",	'\035', | ||||
| 	"GS",	'\035', | ||||
| 	"IS2",	'\036', | ||||
| 	"RS",	'\036', | ||||
| 	"IS1",	'\037', | ||||
| 	"US",	'\037', | ||||
| 	"space",		' ', | ||||
| 	"exclamation-mark",	'!', | ||||
| 	"quotation-mark",	'"', | ||||
| 	"number-sign",		'#', | ||||
| 	"dollar-sign",		'$', | ||||
| 	"percent-sign",		'%', | ||||
| 	"ampersand",		'&', | ||||
| 	"apostrophe",		'\'', | ||||
| 	"left-parenthesis",	'(', | ||||
| 	"right-parenthesis",	')', | ||||
| 	"asterisk",	'*', | ||||
| 	"plus-sign",	'+', | ||||
| 	"comma",	',', | ||||
| 	"hyphen",	'-', | ||||
| 	"hyphen-minus",	'-', | ||||
| 	"period",	'.', | ||||
| 	"full-stop",	'.', | ||||
| 	"slash",	'/', | ||||
| 	"solidus",	'/', | ||||
| 	"zero",		'0', | ||||
| 	"one",		'1', | ||||
| 	"two",		'2', | ||||
| 	"three",	'3', | ||||
| 	"four",		'4', | ||||
| 	"five",		'5', | ||||
| 	"six",		'6', | ||||
| 	"seven",	'7', | ||||
| 	"eight",	'8', | ||||
| 	"nine",		'9', | ||||
| 	"colon",	':', | ||||
| 	"semicolon",	';', | ||||
| 	"less-than-sign",	'<', | ||||
| 	"equals-sign",		'=', | ||||
| 	"greater-than-sign",	'>', | ||||
| 	"question-mark",	'?', | ||||
| 	"commercial-at",	'@', | ||||
| 	"left-square-bracket",	'[', | ||||
| 	"backslash",		'\\', | ||||
| 	"reverse-solidus",	'\\', | ||||
| 	"right-square-bracket",	']', | ||||
| 	"circumflex",		'^', | ||||
| 	"circumflex-accent",	'^', | ||||
| 	"underscore",		'_', | ||||
| 	"low-line",		'_', | ||||
| 	"grave-accent",		'`', | ||||
| 	"left-brace",		'{', | ||||
| 	"left-curly-bracket",	'{', | ||||
| 	"vertical-line",	'|', | ||||
| 	"right-brace",		'}', | ||||
| 	"right-curly-bracket",	'}', | ||||
| 	"tilde",		'~', | ||||
| 	"DEL",	'\177', | ||||
| 	NULL,	0, | ||||
| }; | ||||
|  | @ -0,0 +1,242 @@ | |||
| #include <stdio.h> | ||||
| #include <string.h> | ||||
| #include <ctype.h> | ||||
| #include <limits.h> | ||||
| #include <stdlib.h> | ||||
| #include <sys/types.h> | ||||
| #include <regex.h> | ||||
| 
 | ||||
| #include "utils.h" | ||||
| #include "regex2.h" | ||||
| #include "debug.ih" | ||||
| 
 | ||||
| /*
 | ||||
|  - regprint - print a regexp for debugging | ||||
|  == void regprint(regex_t *r, FILE *d); | ||||
|  */ | ||||
| void | ||||
| regprint(r, d) | ||||
| regex_t *r; | ||||
| FILE *d; | ||||
| { | ||||
| 	register struct re_guts *g = r->re_g; | ||||
| 	register int i; | ||||
| 	register int c; | ||||
| 	register int last; | ||||
| 	int nincat[NC]; | ||||
| 
 | ||||
| 	fprintf(d, "%ld states, %d categories", (long)g->nstates, | ||||
| 							g->ncategories); | ||||
| 	fprintf(d, ", first %ld last %ld", (long)g->firststate, | ||||
| 						(long)g->laststate); | ||||
| 	if (g->iflags&USEBOL) | ||||
| 		fprintf(d, ", USEBOL"); | ||||
| 	if (g->iflags&USEEOL) | ||||
| 		fprintf(d, ", USEEOL"); | ||||
| 	if (g->iflags&BAD) | ||||
| 		fprintf(d, ", BAD"); | ||||
| 	if (g->nsub > 0) | ||||
| 		fprintf(d, ", nsub=%ld", (long)g->nsub); | ||||
| 	if (g->must != NULL) | ||||
| 		fprintf(d, ", must(%ld) `%*s'", (long)g->mlen, (int)g->mlen, | ||||
| 								g->must); | ||||
| 	if (g->backrefs) | ||||
| 		fprintf(d, ", backrefs"); | ||||
| 	if (g->nplus > 0) | ||||
| 		fprintf(d, ", nplus %ld", (long)g->nplus); | ||||
| 	fprintf(d, "\n"); | ||||
| 	s_print(g, d); | ||||
| 	for (i = 0; i < g->ncategories; i++) { | ||||
| 		nincat[i] = 0; | ||||
| 		for (c = CHAR_MIN; c <= CHAR_MAX; c++) | ||||
| 			if (g->categories[c] == i) | ||||
| 				nincat[i]++; | ||||
| 	} | ||||
| 	fprintf(d, "cc0#%d", nincat[0]); | ||||
| 	for (i = 1; i < g->ncategories; i++) | ||||
| 		if (nincat[i] == 1) { | ||||
| 			for (c = CHAR_MIN; c <= CHAR_MAX; c++) | ||||
| 				if (g->categories[c] == i) | ||||
| 					break; | ||||
| 			fprintf(d, ", %d=%s", i, regchar(c)); | ||||
| 		} | ||||
| 	fprintf(d, "\n"); | ||||
| 	for (i = 1; i < g->ncategories; i++) | ||||
| 		if (nincat[i] != 1) { | ||||
| 			fprintf(d, "cc%d\t", i); | ||||
| 			last = -1; | ||||
| 			for (c = CHAR_MIN; c <= CHAR_MAX+1; c++)	/* +1 does flush */ | ||||
| 				if (c <= CHAR_MAX && g->categories[c] == i) { | ||||
| 					if (last < 0) { | ||||
| 						fprintf(d, "%s", regchar(c)); | ||||
| 						last = c; | ||||
| 					} | ||||
| 				} else { | ||||
| 					if (last >= 0) { | ||||
| 						if (last != c-1) | ||||
| 							fprintf(d, "-%s", | ||||
| 								regchar(c-1)); | ||||
| 						last = -1; | ||||
| 					} | ||||
| 				} | ||||
| 			fprintf(d, "\n"); | ||||
| 		} | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  - s_print - print the strip for debugging | ||||
|  == static void s_print(register struct re_guts *g, FILE *d); | ||||
|  */ | ||||
| static void | ||||
| s_print(g, d) | ||||
| register struct re_guts *g; | ||||
| FILE *d; | ||||
| { | ||||
| 	register sop *s; | ||||
| 	register cset *cs; | ||||
| 	register int i; | ||||
| 	register int done = 0; | ||||
| 	register sop opnd; | ||||
| 	register int col = 0; | ||||
| 	register int last; | ||||
| 	register sopno offset = 2; | ||||
| #	define	GAP()	{	if (offset % 5 == 0) { \ | ||||
| 					if (col > 40) { \ | ||||
| 						fprintf(d, "\n\t"); \ | ||||
| 						col = 0; \ | ||||
| 					} else { \ | ||||
| 						fprintf(d, " "); \ | ||||
| 						col++; \ | ||||
| 					} \ | ||||
| 				} else \ | ||||
| 					col++; \ | ||||
| 				offset++; \ | ||||
| 			} | ||||
| 
 | ||||
| 	if (OP(g->strip[0]) != OEND) | ||||
| 		fprintf(d, "missing initial OEND!\n"); | ||||
| 	for (s = &g->strip[1]; !done; s++) { | ||||
| 		opnd = OPND(*s); | ||||
| 		switch (OP(*s)) { | ||||
| 		case OEND: | ||||
| 			fprintf(d, "\n"); | ||||
| 			done = 1; | ||||
| 			break; | ||||
| 		case OCHAR: | ||||
| 			if (strchr("\\|()^$.[+*?{}!<> ", (char)opnd) != NULL) | ||||
| 				fprintf(d, "\\%c", (char)opnd); | ||||
| 			else | ||||
| 				fprintf(d, "%s", regchar((char)opnd)); | ||||
| 			break; | ||||
| 		case OBOL: | ||||
| 			fprintf(d, "^"); | ||||
| 			break; | ||||
| 		case OEOL: | ||||
| 			fprintf(d, "$"); | ||||
| 			break; | ||||
| 		case OBOW: | ||||
| 			fprintf(d, "\\{"); | ||||
| 			break; | ||||
| 		case OEOW: | ||||
| 			fprintf(d, "\\}"); | ||||
| 			break; | ||||
| 		case OANY: | ||||
| 			fprintf(d, "."); | ||||
| 			break; | ||||
| 		case OANYOF: | ||||
| 			fprintf(d, "[(%ld)", (long)opnd); | ||||
| 			cs = &g->sets[opnd]; | ||||
| 			last = -1; | ||||
| 			for (i = 0; i < g->csetsize+1; i++)	/* +1 flushes */ | ||||
| 				if (CHIN(cs, i) && i < g->csetsize) { | ||||
| 					if (last < 0) { | ||||
| 						fprintf(d, "%s", regchar(i)); | ||||
| 						last = i; | ||||
| 					} | ||||
| 				} else { | ||||
| 					if (last >= 0) { | ||||
| 						if (last != i-1) | ||||
| 							fprintf(d, "-%s", | ||||
| 								regchar(i-1)); | ||||
| 						last = -1; | ||||
| 					} | ||||
| 				} | ||||
| 			fprintf(d, "]"); | ||||
| 			break; | ||||
| 		case OBACK_: | ||||
| 			fprintf(d, "(\\<%ld>", (long)opnd); | ||||
| 			break; | ||||
| 		case O_BACK: | ||||
| 			fprintf(d, "<%ld>\\)", (long)opnd); | ||||
| 			break; | ||||
| 		case OPLUS_: | ||||
| 			fprintf(d, "(+"); | ||||
| 			if (OP(*(s+opnd)) != O_PLUS) | ||||
| 				fprintf(d, "<%ld>", (long)opnd); | ||||
| 			break; | ||||
| 		case O_PLUS: | ||||
| 			if (OP(*(s-opnd)) != OPLUS_) | ||||
| 				fprintf(d, "<%ld>", (long)opnd); | ||||
| 			fprintf(d, "+)"); | ||||
| 			break; | ||||
| 		case OQUEST_: | ||||
| 			fprintf(d, "(?"); | ||||
| 			if (OP(*(s+opnd)) != O_QUEST) | ||||
| 				fprintf(d, "<%ld>", (long)opnd); | ||||
| 			break; | ||||
| 		case O_QUEST: | ||||
| 			if (OP(*(s-opnd)) != OQUEST_) | ||||
| 				fprintf(d, "<%ld>", (long)opnd); | ||||
| 			fprintf(d, "?)"); | ||||
| 			break; | ||||
| 		case OLPAREN: | ||||
| 			fprintf(d, "((<%ld>", (long)opnd); | ||||
| 			break; | ||||
| 		case ORPAREN: | ||||
| 			fprintf(d, "<%ld>))", (long)opnd); | ||||
| 			break; | ||||
| 		case OCH_: | ||||
| 			fprintf(d, "<"); | ||||
| 			if (OP(*(s+opnd)) != OOR2) | ||||
| 				fprintf(d, "<%ld>", (long)opnd); | ||||
| 			break; | ||||
| 		case OOR1: | ||||
| 			if (OP(*(s-opnd)) != OOR1 && OP(*(s-opnd)) != OCH_) | ||||
| 				fprintf(d, "<%ld>", (long)opnd); | ||||
| 			fprintf(d, "|"); | ||||
| 			break; | ||||
| 		case OOR2: | ||||
| 			fprintf(d, "|"); | ||||
| 			if (OP(*(s+opnd)) != OOR2 && OP(*(s+opnd)) != O_CH) | ||||
| 				fprintf(d, "<%ld>", (long)opnd); | ||||
| 			break; | ||||
| 		case O_CH: | ||||
| 			if (OP(*(s-opnd)) != OOR1) | ||||
| 				fprintf(d, "<%ld>", (long)opnd); | ||||
| 			fprintf(d, ">"); | ||||
| 			break; | ||||
| 		default: | ||||
| 			fprintf(d, "!%d(%d)!", OP(*s), opnd); | ||||
| 			break; | ||||
| 		} | ||||
| 		if (!done) | ||||
| 			GAP(); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  - regchar - make a character printable | ||||
|  == static char *regchar(int ch); | ||||
|  */ | ||||
| static char *			/* -> representation */ | ||||
| regchar(ch) | ||||
| int ch; | ||||
| { | ||||
| 	static char buf[10]; | ||||
| 
 | ||||
| 	if (isprint(ch) || ch == ' ') | ||||
| 		sprintf(buf, "%c", ch); | ||||
| 	else | ||||
| 		sprintf(buf, "\\%o", ch); | ||||
| 	return(buf); | ||||
| } | ||||
|  | @ -0,0 +1,14 @@ | |||
| /* ========= begin header generated by ./mkh ========= */ | ||||
| #ifdef __cplusplus | ||||
| extern "C" { | ||||
| #endif | ||||
| 
 | ||||
| /* === debug.c === */ | ||||
| void regprint(regex_t *r, FILE *d); | ||||
| static void s_print(register struct re_guts *g, FILE *d); | ||||
| static char *regchar(int ch); | ||||
| 
 | ||||
| #ifdef __cplusplus | ||||
| } | ||||
| #endif | ||||
| /* ========= end header generated by ./mkh ========= */ | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							|  | @ -0,0 +1,35 @@ | |||
| /* ========= begin header generated by ./mkh ========= */ | ||||
| #ifdef __cplusplus | ||||
| extern "C" { | ||||
| #endif | ||||
| 
 | ||||
| /* === engine.c === */ | ||||
| static int matcher(register struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags); | ||||
| static char *dissect(register struct match *m, char *start, char *stop, sopno startst, sopno stopst); | ||||
| static char *backref(register struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev); | ||||
| static char *fast(register struct match *m, char *start, char *stop, sopno startst, sopno stopst); | ||||
| static char *slow(register struct match *m, char *start, char *stop, sopno startst, sopno stopst); | ||||
| static states step(register struct re_guts *g, sopno start, sopno stop, register states bef, int ch, register states aft); | ||||
| #define	BOL	(OUT+1) | ||||
| #define	EOL	(BOL+1) | ||||
| #define	BOLEOL	(BOL+2) | ||||
| #define	NOTHING	(BOL+3) | ||||
| #define	BOW	(BOL+4) | ||||
| #define	EOW	(BOL+5) | ||||
| #define	CODEMAX	(BOL+5)		/* highest code used */ | ||||
| #define	NONCHAR(c)	((c) > CHAR_MAX) | ||||
| #define	NNONCHAR	(CODEMAX-CHAR_MAX) | ||||
| #ifdef REDEBUG | ||||
| static void print(struct match *m, char *caption, states st, int ch, FILE *d); | ||||
| #endif | ||||
| #ifdef REDEBUG | ||||
| static void at(struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst); | ||||
| #endif | ||||
| #ifdef REDEBUG | ||||
| static char *pchar(int ch); | ||||
| #endif | ||||
| 
 | ||||
| #ifdef __cplusplus | ||||
| } | ||||
| #endif | ||||
| /* ========= end header generated by ./mkh ========= */ | ||||
|  | @ -0,0 +1,510 @@ | |||
| #include <stdio.h> | ||||
| #include <string.h> | ||||
| #include <sys/types.h> | ||||
| #include <regex.h> | ||||
| #include <assert.h> | ||||
| 
 | ||||
| #include "main.ih" | ||||
| 
 | ||||
| char *progname; | ||||
| int debug = 0; | ||||
| int line = 0; | ||||
| int status = 0; | ||||
| 
 | ||||
| int copts = REG_EXTENDED; | ||||
| int eopts = 0; | ||||
| regoff_t startoff = 0; | ||||
| regoff_t endoff = 0; | ||||
| 
 | ||||
| 
 | ||||
| extern int split(); | ||||
| extern void regprint(); | ||||
| 
 | ||||
| /*
 | ||||
|  - main - do the simple case, hand off to regress() for regression | ||||
|  */ | ||||
| main(argc, argv) | ||||
| int argc; | ||||
| char *argv[]; | ||||
| { | ||||
| 	regex_t re; | ||||
| #	define	NS	10 | ||||
| 	regmatch_t subs[NS]; | ||||
| 	char erbuf[100]; | ||||
| 	int err; | ||||
| 	size_t len; | ||||
| 	int c; | ||||
| 	int errflg = 0; | ||||
| 	register int i; | ||||
| 	extern int optind; | ||||
| 	extern char *optarg; | ||||
| 
 | ||||
| 	progname = argv[0]; | ||||
| 
 | ||||
| 	while ((c = getopt(argc, argv, "c:e:S:E:x")) != EOF) | ||||
| 		switch (c) { | ||||
| 		case 'c':	/* compile options */ | ||||
| 			copts = options('c', optarg); | ||||
| 			break; | ||||
| 		case 'e':	/* execute options */ | ||||
| 			eopts = options('e', optarg); | ||||
| 			break; | ||||
| 		case 'S':	/* start offset */ | ||||
| 			startoff = (regoff_t)atoi(optarg); | ||||
| 			break; | ||||
| 		case 'E':	/* end offset */ | ||||
| 			endoff = (regoff_t)atoi(optarg); | ||||
| 			break; | ||||
| 		case 'x':	/* Debugging. */ | ||||
| 			debug++; | ||||
| 			break; | ||||
| 		case '?': | ||||
| 		default: | ||||
| 			errflg++; | ||||
| 			break; | ||||
| 		} | ||||
| 	if (errflg) { | ||||
| 		fprintf(stderr, "usage: %s ", progname); | ||||
| 		fprintf(stderr, "[-c copt][-C][-d] [re]\n"); | ||||
| 		exit(2); | ||||
| 	} | ||||
| 
 | ||||
| 	if (optind >= argc) { | ||||
| 		regress(stdin); | ||||
| 		exit(status); | ||||
| 	} | ||||
| 
 | ||||
| 	err = regcomp(&re, argv[optind++], copts); | ||||
| 	if (err) { | ||||
| 		len = regerror(err, &re, erbuf, sizeof(erbuf)); | ||||
| 		fprintf(stderr, "error %s, %d/%d `%s'\n", | ||||
| 			eprint(err), len, sizeof(erbuf), erbuf); | ||||
| 		exit(status); | ||||
| 	} | ||||
| 	regprint(&re, stdout);	 | ||||
| 
 | ||||
| 	if (optind >= argc) { | ||||
| 		regfree(&re); | ||||
| 		exit(status); | ||||
| 	} | ||||
| 
 | ||||
| 	if (eopts®_STARTEND) { | ||||
| 		subs[0].rm_so = startoff; | ||||
| 		subs[0].rm_eo = strlen(argv[optind]) - endoff; | ||||
| 	} | ||||
| 	err = regexec(&re, argv[optind], (size_t)NS, subs, eopts); | ||||
| 	if (err) { | ||||
| 		len = regerror(err, &re, erbuf, sizeof(erbuf)); | ||||
| 		fprintf(stderr, "error %s, %d/%d `%s'\n", | ||||
| 			eprint(err), len, sizeof(erbuf), erbuf); | ||||
| 		exit(status); | ||||
| 	} | ||||
| 	if (!(copts®_NOSUB)) { | ||||
| 		len = (int)(subs[0].rm_eo - subs[0].rm_so); | ||||
| 		if (subs[0].rm_so != -1) { | ||||
| 			if (len != 0) | ||||
| 				printf("match `%.*s'\n", len, | ||||
| 					argv[optind] + subs[0].rm_so); | ||||
| 			else | ||||
| 				printf("match `'@%.1s\n", | ||||
| 					argv[optind] + subs[0].rm_so); | ||||
| 		} | ||||
| 		for (i = 1; i < NS; i++) | ||||
| 			if (subs[i].rm_so != -1) | ||||
| 				printf("(%d) `%.*s'\n", i, | ||||
| 					(int)(subs[i].rm_eo - subs[i].rm_so), | ||||
| 					argv[optind] + subs[i].rm_so); | ||||
| 	} | ||||
| 	exit(status); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  - regress - main loop of regression test | ||||
|  == void regress(FILE *in); | ||||
|  */ | ||||
| void | ||||
| regress(in) | ||||
| FILE *in; | ||||
| { | ||||
| 	char inbuf[1000]; | ||||
| #	define	MAXF	10 | ||||
| 	char *f[MAXF]; | ||||
| 	int nf; | ||||
| 	int i; | ||||
| 	char erbuf[100]; | ||||
| 	size_t ne; | ||||
| 	char *badpat = "invalid regular expression"; | ||||
| #	define	SHORT	10 | ||||
| 	char *bpname = "REG_BADPAT"; | ||||
| 	regex_t re; | ||||
| 
 | ||||
| 	while (fgets(inbuf, sizeof(inbuf), in) != NULL) { | ||||
| 		line++; | ||||
| 		if (inbuf[0] == '#' || inbuf[0] == '\n') | ||||
| 			continue;			/* NOTE CONTINUE */ | ||||
| 		inbuf[strlen(inbuf)-1] = '\0';	/* get rid of stupid \n */ | ||||
| 		if (debug) | ||||
| 			fprintf(stdout, "%d:\n", line); | ||||
| 		nf = split(inbuf, f, MAXF, "\t\t"); | ||||
| 		if (nf < 3) { | ||||
| 			fprintf(stderr, "bad input, line %d\n", line); | ||||
| 			exit(1); | ||||
| 		} | ||||
| 		for (i = 0; i < nf; i++) | ||||
| 			if (strcmp(f[i], "\"\"") == 0) | ||||
| 				f[i] = ""; | ||||
| 		if (nf <= 3) | ||||
| 			f[3] = NULL; | ||||
| 		if (nf <= 4) | ||||
| 			f[4] = NULL; | ||||
| 		try(f[0], f[1], f[2], f[3], f[4], options('c', f[1])); | ||||
| 		if (opt('&', f[1]))	/* try with either type of RE */ | ||||
| 			try(f[0], f[1], f[2], f[3], f[4], | ||||
| 					options('c', f[1]) &~ REG_EXTENDED); | ||||
| 	} | ||||
| 
 | ||||
| 	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf)); | ||||
| 	if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) { | ||||
| 		fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n", | ||||
| 							erbuf, badpat); | ||||
| 		status = 1; | ||||
| 	} | ||||
| 	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT); | ||||
| 	if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' || | ||||
| 						ne != strlen(badpat)+1) { | ||||
| 		fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n", | ||||
| 						erbuf, SHORT-1, badpat); | ||||
| 		status = 1; | ||||
| 	} | ||||
| 	ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf)); | ||||
| 	if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) { | ||||
| 		fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n", | ||||
| 						erbuf, bpname); | ||||
| 		status = 1; | ||||
| 	} | ||||
| 	re.re_endp = bpname; | ||||
| 	ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf)); | ||||
| 	if (atoi(erbuf) != (int)REG_BADPAT) { | ||||
| 		fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n", | ||||
| 						erbuf, (long)REG_BADPAT); | ||||
| 		status = 1; | ||||
| 	} else if (ne != strlen(erbuf)+1) { | ||||
| 		fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n", | ||||
| 						erbuf, (long)REG_BADPAT); | ||||
| 		status = 1; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  - try - try it, and report on problems | ||||
|  == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts); | ||||
|  */ | ||||
| void | ||||
| try(f0, f1, f2, f3, f4, opts) | ||||
| char *f0; | ||||
| char *f1; | ||||
| char *f2; | ||||
| char *f3; | ||||
| char *f4; | ||||
| int opts;			/* may not match f1 */ | ||||
| { | ||||
| 	regex_t re; | ||||
| #	define	NSUBS	10 | ||||
| 	regmatch_t subs[NSUBS]; | ||||
| #	define	NSHOULD	15 | ||||
| 	char *should[NSHOULD]; | ||||
| 	int nshould; | ||||
| 	char erbuf[100]; | ||||
| 	int err; | ||||
| 	int len; | ||||
| 	char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE"; | ||||
| 	register int i; | ||||
| 	char *grump; | ||||
| 	char f0copy[1000]; | ||||
| 	char f2copy[1000]; | ||||
| 
 | ||||
| 	strcpy(f0copy, f0); | ||||
| 	re.re_endp = (opts®_PEND) ? f0copy + strlen(f0copy) : NULL; | ||||
| 	fixstr(f0copy); | ||||
| 	err = regcomp(&re, f0copy, opts); | ||||
| 	if (err != 0 && (!opt('C', f1) || err != efind(f2))) { | ||||
| 		/* unexpected error or wrong error */ | ||||
| 		len = regerror(err, &re, erbuf, sizeof(erbuf)); | ||||
| 		fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n", | ||||
| 					line, type, eprint(err), len, | ||||
| 					sizeof(erbuf), erbuf); | ||||
| 		status = 1; | ||||
| 	} else if (err == 0 && opt('C', f1)) { | ||||
| 		/* unexpected success */ | ||||
| 		fprintf(stderr, "%d: %s should have given REG_%s\n", | ||||
| 						line, type, f2); | ||||
| 		status = 1; | ||||
| 		err = 1;	/* so we won't try regexec */ | ||||
| 	} | ||||
| 
 | ||||
| 	if (err != 0) { | ||||
| 		regfree(&re); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	strcpy(f2copy, f2); | ||||
| 	fixstr(f2copy); | ||||
| 
 | ||||
| 	if (options('e', f1)®_STARTEND) { | ||||
| 		if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL) | ||||
| 			fprintf(stderr, "%d: bad STARTEND syntax\n", line); | ||||
| 		subs[0].rm_so = strchr(f2, '(') - f2 + 1; | ||||
| 		subs[0].rm_eo = strchr(f2, ')') - f2; | ||||
| 	} | ||||
| 	err = regexec(&re, f2copy, NSUBS, subs, options('e', f1)); | ||||
| 
 | ||||
| 	if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) { | ||||
| 		/* unexpected error or wrong error */ | ||||
| 		len = regerror(err, &re, erbuf, sizeof(erbuf)); | ||||
| 		fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n", | ||||
| 					line, type, eprint(err), len, | ||||
| 					sizeof(erbuf), erbuf); | ||||
| 		status = 1; | ||||
| 	} else if (err != 0) { | ||||
| 		/* nothing more to check */ | ||||
| 	} else if (f3 == NULL) { | ||||
| 		/* unexpected success */ | ||||
| 		fprintf(stderr, "%d: %s exec should have failed\n", | ||||
| 						line, type); | ||||
| 		status = 1; | ||||
| 		err = 1;		/* just on principle */ | ||||
| 	} else if (opts®_NOSUB) { | ||||
| 		/* nothing more to check */ | ||||
| 	} else if ((grump = check(f2, subs[0], f3)) != NULL) { | ||||
| 		fprintf(stderr, "%d: %s %s\n", line, type, grump); | ||||
| 		status = 1; | ||||
| 		err = 1; | ||||
| 	} | ||||
| 
 | ||||
| 	if (err != 0 || f4 == NULL) { | ||||
| 		regfree(&re); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	for (i = 1; i < NSHOULD; i++) | ||||
| 		should[i] = NULL; | ||||
| 	nshould = split(f4, should+1, NSHOULD-1, ","); | ||||
| 	if (nshould == 0) { | ||||
| 		nshould = 1; | ||||
| 		should[1] = ""; | ||||
| 	} | ||||
| 	for (i = 1; i < NSUBS; i++) { | ||||
| 		grump = check(f2, subs[i], should[i]); | ||||
| 		if (grump != NULL) { | ||||
| 			fprintf(stderr, "%d: %s $%d %s\n", line, | ||||
| 							type, i, grump); | ||||
| 			status = 1; | ||||
| 			err = 1; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	regfree(&re); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  - options - pick options out of a regression-test string | ||||
|  == int options(int type, char *s); | ||||
|  */ | ||||
| int | ||||
| options(type, s) | ||||
| int type;			/* 'c' compile, 'e' exec */ | ||||
| char *s; | ||||
| { | ||||
| 	register char *p; | ||||
| 	register int o = (type == 'c') ? copts : eopts; | ||||
| 	register char *legal = (type == 'c') ? "bisnmp" : "^$#tl"; | ||||
| 
 | ||||
| 	for (p = s; *p != '\0'; p++) | ||||
| 		if (strchr(legal, *p) != NULL) | ||||
| 			switch (*p) { | ||||
| 			case 'b': | ||||
| 				o &= ~REG_EXTENDED; | ||||
| 				break; | ||||
| 			case 'i': | ||||
| 				o |= REG_ICASE; | ||||
| 				break; | ||||
| 			case 's': | ||||
| 				o |= REG_NOSUB; | ||||
| 				break; | ||||
| 			case 'n': | ||||
| 				o |= REG_NEWLINE; | ||||
| 				break; | ||||
| 			case 'm': | ||||
| 				o &= ~REG_EXTENDED; | ||||
| 				o |= REG_NOSPEC; | ||||
| 				break; | ||||
| 			case 'p': | ||||
| 				o |= REG_PEND; | ||||
| 				break; | ||||
| 			case '^': | ||||
| 				o |= REG_NOTBOL; | ||||
| 				break; | ||||
| 			case '$': | ||||
| 				o |= REG_NOTEOL; | ||||
| 				break; | ||||
| 			case '#': | ||||
| 				o |= REG_STARTEND; | ||||
| 				break; | ||||
| 			case 't':	/* trace */ | ||||
| 				o |= REG_TRACE; | ||||
| 				break; | ||||
| 			case 'l':	/* force long representation */ | ||||
| 				o |= REG_LARGE; | ||||
| 				break; | ||||
| 			case 'r':	/* force backref use */ | ||||
| 				o |= REG_BACKR; | ||||
| 				break; | ||||
| 			} | ||||
| 	return(o); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  - opt - is a particular option in a regression string? | ||||
|  == int opt(int c, char *s); | ||||
|  */ | ||||
| int				/* predicate */ | ||||
| opt(c, s) | ||||
| int c; | ||||
| char *s; | ||||
| { | ||||
| 	return(strchr(s, c) != NULL); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  - fixstr - transform magic characters in strings | ||||
|  == void fixstr(register char *p); | ||||
|  */ | ||||
| void | ||||
| fixstr(p) | ||||
| register char *p; | ||||
| { | ||||
| 	if (p == NULL) | ||||
| 		return; | ||||
| 
 | ||||
| 	for (; *p != '\0'; p++) | ||||
| 		if (*p == 'N') | ||||
| 			*p = '\n'; | ||||
| 		else if (*p == 'T') | ||||
| 			*p = '\t'; | ||||
| 		else if (*p == 'S') | ||||
| 			*p = ' '; | ||||
| 		else if (*p == 'Z') | ||||
| 			*p = '\0'; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  - check - check a substring match | ||||
|  == char *check(char *str, regmatch_t sub, char *should); | ||||
|  */ | ||||
| char *				/* NULL or complaint */ | ||||
| check(str, sub, should) | ||||
| char *str; | ||||
| regmatch_t sub; | ||||
| char *should; | ||||
| { | ||||
| 	register int len; | ||||
| 	register int shlen; | ||||
| 	register char *p; | ||||
| 	static char grump[500]; | ||||
| 	register char *at = NULL; | ||||
| 
 | ||||
| 	if (should != NULL && strcmp(should, "-") == 0) | ||||
| 		should = NULL; | ||||
| 	if (should != NULL && should[0] == '@') { | ||||
| 		at = should + 1; | ||||
| 		should = ""; | ||||
| 	} | ||||
| 
 | ||||
| 	/* check rm_so and rm_eo for consistency */ | ||||
| 	if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) || | ||||
| 				(sub.rm_so != -1 && sub.rm_eo == -1) || | ||||
| 				(sub.rm_so != -1 && sub.rm_so < 0) || | ||||
| 				(sub.rm_eo != -1 && sub.rm_eo < 0) ) { | ||||
| 		sprintf(grump, "start %ld end %ld", (long)sub.rm_so, | ||||
| 							(long)sub.rm_eo); | ||||
| 		return(grump); | ||||
| 	} | ||||
| 
 | ||||
| 	/* check for no match */ | ||||
| 	if (sub.rm_so == -1 && should == NULL) | ||||
| 		return(NULL); | ||||
| 	if (sub.rm_so == -1) | ||||
| 		return("did not match"); | ||||
| 
 | ||||
| 	/* check for in range */ | ||||
| 	if (sub.rm_eo > strlen(str)) { | ||||
| 		sprintf(grump, "start %ld end %ld, past end of string", | ||||
| 					(long)sub.rm_so, (long)sub.rm_eo); | ||||
| 		return(grump); | ||||
| 	} | ||||
| 
 | ||||
| 	len = (int)(sub.rm_eo - sub.rm_so); | ||||
| 	shlen = (int)strlen(should); | ||||
| 	p = str + sub.rm_so; | ||||
| 
 | ||||
| 	/* check for not supposed to match */ | ||||
| 	if (should == NULL) { | ||||
| 		sprintf(grump, "matched `%.*s'", len, p); | ||||
| 		return(grump); | ||||
| 	} | ||||
| 
 | ||||
| 	/* check for wrong match */ | ||||
| 	if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) { | ||||
| 		sprintf(grump, "matched `%.*s' instead", len, p); | ||||
| 		return(grump); | ||||
| 	} | ||||
| 	if (shlen > 0) | ||||
| 		return(NULL); | ||||
| 
 | ||||
| 	/* check null match in right place */ | ||||
| 	if (at == NULL) | ||||
| 		return(NULL); | ||||
| 	shlen = strlen(at); | ||||
| 	if (shlen == 0) | ||||
| 		shlen = 1;	/* force check for end-of-string */ | ||||
| 	if (strncmp(p, at, shlen) != 0) { | ||||
| 		sprintf(grump, "matched null at `%.20s'", p); | ||||
| 		return(grump); | ||||
| 	} | ||||
| 	return(NULL); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  - eprint - convert error number to name | ||||
|  == static char *eprint(int err); | ||||
|  */ | ||||
| static char * | ||||
| eprint(err) | ||||
| int err; | ||||
| { | ||||
| 	static char epbuf[100]; | ||||
| 	size_t len; | ||||
| 
 | ||||
| 	len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf)); | ||||
| 	assert(len <= sizeof(epbuf)); | ||||
| 	return(epbuf); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  - efind - convert error name to number | ||||
|  == static int efind(char *name); | ||||
|  */ | ||||
| static int | ||||
| efind(name) | ||||
| char *name; | ||||
| { | ||||
| 	static char efbuf[100]; | ||||
| 	size_t n; | ||||
| 	regex_t re; | ||||
| 
 | ||||
| 	sprintf(efbuf, "REG_%s", name); | ||||
| 	assert(strlen(efbuf) < sizeof(efbuf)); | ||||
| 	re.re_endp = efbuf; | ||||
| 	(void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf)); | ||||
| 	return(atoi(efbuf)); | ||||
| } | ||||
|  | @ -0,0 +1,19 @@ | |||
| /* ========= begin header generated by ./mkh ========= */ | ||||
| #ifdef __cplusplus | ||||
| extern "C" { | ||||
| #endif | ||||
| 
 | ||||
| /* === main.c === */ | ||||
| void regress(FILE *in); | ||||
| void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts); | ||||
| int options(int type, char *s); | ||||
| int opt(int c, char *s); | ||||
| void fixstr(register char *p); | ||||
| char *check(char *str, regmatch_t sub, char *should); | ||||
| static char *eprint(int err); | ||||
| static int efind(char *name); | ||||
| 
 | ||||
| #ifdef __cplusplus | ||||
| } | ||||
| #endif | ||||
| /* ========= end header generated by ./mkh ========= */ | ||||
|  | @ -0,0 +1,76 @@ | |||
| #! /bin/sh | ||||
| # mkh - pull headers out of C source | ||||
| PATH=/bin:/usr/bin ; export PATH | ||||
| 
 | ||||
| # egrep pattern to pick out marked lines | ||||
| egrep='^ =([ 	]|$)' | ||||
| 
 | ||||
| # Sed program to process marked lines into lines for the header file. | ||||
| # The markers have already been removed.  Two things are done here:  removal | ||||
| # of backslashed newlines, and some fudging of comments.  The first is done | ||||
| # because -o needs to have prototypes on one line to strip them down. | ||||
| # Getting comments into the output is tricky; we turn C++-style // comments | ||||
| # into /* */ comments, after altering any existing */'s to avoid trouble. | ||||
| peel='	/\\$/N | ||||
| 	/\\\n[ 	]*/s///g | ||||
| 	/\/\//s;\*/;* /;g | ||||
| 	/\/\//s;//\(.*\);/*\1 */;' | ||||
| 
 | ||||
| for a | ||||
| do | ||||
| 	case "$a" in | ||||
| 	-o)	# old (pre-function-prototype) compiler | ||||
| 		# add code to comment out argument lists | ||||
| 		peel="$peel | ||||
| 			"'/^\([^#\/][^\/]*[a-zA-Z0-9_)]\)(\(.*\))/s;;\1(/*\2*/);' | ||||
| 		shift | ||||
| 		;; | ||||
| 	-b)	# funny Berkeley __P macro | ||||
| 		peel="$peel | ||||
| 			"'/^\([^#\/][^\/]*[a-zA-Z0-9_)]\)(\(.*\))/s;;\1 __P((\2));' | ||||
| 		shift | ||||
| 		;; | ||||
| 	-s)	# compiler doesn't like `static foo();' | ||||
| 		# add code to get rid of the `static' | ||||
| 		peel="$peel | ||||
| 			"'/^static[ 	][^\/]*[a-zA-Z0-9_)](.*)/s;static.;;' | ||||
| 		shift | ||||
| 		;; | ||||
| 	-p)	# private declarations | ||||
| 		egrep='^ ==([ 	]|$)' | ||||
| 		shift | ||||
| 		;; | ||||
| 	-i)	# wrap in #ifndef, argument is name | ||||
| 		ifndef="$2" | ||||
| 		shift ; shift | ||||
| 		;; | ||||
| 	*)	break | ||||
| 		;; | ||||
| 	esac | ||||
| done | ||||
| 
 | ||||
| if test " $ifndef" != " " | ||||
| then | ||||
| 	echo "#ifndef $ifndef" | ||||
| 	echo "#define	$ifndef	/* never again */" | ||||
| fi | ||||
| echo "/* ========= begin header generated by $0 ========= */" | ||||
| echo '#ifdef __cplusplus' | ||||
| echo 'extern "C" {' | ||||
| echo '#endif' | ||||
| for f | ||||
| do | ||||
| 	echo | ||||
| 	echo "/* === $f === */" | ||||
| 	egrep "$egrep" $f | sed 's/^ ==*[ 	]//;s/^ ==*$//' | sed "$peel" | ||||
| 	echo | ||||
| done | ||||
| echo '#ifdef __cplusplus' | ||||
| echo '}' | ||||
| echo '#endif' | ||||
| echo "/* ========= end header generated by $0 ========= */" | ||||
| if test " $ifndef" != " " | ||||
| then | ||||
| 	echo "#endif" | ||||
| fi | ||||
| exit 0 | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							|  | @ -0,0 +1,51 @@ | |||
| /* ========= begin header generated by ./mkh ========= */ | ||||
| #ifdef __cplusplus | ||||
| extern "C" { | ||||
| #endif | ||||
| 
 | ||||
| /* === regcomp.c === */ | ||||
| static void p_ere(register struct parse *p, int stop); | ||||
| static void p_ere_exp(register struct parse *p); | ||||
| static void p_str(register struct parse *p); | ||||
| static void p_bre(register struct parse *p, register int end1, register int end2); | ||||
| static int p_simp_re(register struct parse *p, int starordinary); | ||||
| static int p_count(register struct parse *p); | ||||
| static void p_bracket(register struct parse *p); | ||||
| static void p_b_term(register struct parse *p, register cset *cs); | ||||
| static void p_b_cclass(register struct parse *p, register cset *cs); | ||||
| static void p_b_eclass(register struct parse *p, register cset *cs); | ||||
| static char p_b_symbol(register struct parse *p); | ||||
| static char p_b_coll_elem(register struct parse *p, int endc); | ||||
| static char othercase(int ch); | ||||
| static void bothcases(register struct parse *p, int ch); | ||||
| static void ordinary(register struct parse *p, register int ch); | ||||
| static void nonnewline(register struct parse *p); | ||||
| static void repeat(register struct parse *p, sopno start, int from, int to); | ||||
| static int seterr(register struct parse *p, int e); | ||||
| static cset *allocset(register struct parse *p); | ||||
| static void freeset(register struct parse *p, register cset *cs); | ||||
| static int freezeset(register struct parse *p, register cset *cs); | ||||
| static int firstch(register struct parse *p, register cset *cs); | ||||
| static int nch(register struct parse *p, register cset *cs); | ||||
| static void mcadd(register struct parse *p, register cset *cs, register char *cp); | ||||
| static void mcsub(register cset *cs, register char *cp); | ||||
| static int mcin(register cset *cs, register char *cp); | ||||
| static char *mcfind(register cset *cs, register char *cp); | ||||
| static void mcinvert(register struct parse *p, register cset *cs); | ||||
| static void mccase(register struct parse *p, register cset *cs); | ||||
| static int isinsets(register struct re_guts *g, int c); | ||||
| static int samesets(register struct re_guts *g, int c1, int c2); | ||||
| static void categorize(struct parse *p, register struct re_guts *g); | ||||
| static sopno dupl(register struct parse *p, sopno start, sopno finish); | ||||
| static void doemit(register struct parse *p, sop op, size_t opnd); | ||||
| static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos); | ||||
| static void dofwd(register struct parse *p, sopno pos, sop value); | ||||
| static void enlarge(register struct parse *p, sopno size); | ||||
| static void stripsnug(register struct parse *p, register struct re_guts *g); | ||||
| static void findmust(register struct parse *p, register struct re_guts *g); | ||||
| static sopno pluscount(register struct parse *p, register struct re_guts *g); | ||||
| 
 | ||||
| #ifdef __cplusplus | ||||
| } | ||||
| #endif | ||||
| /* ========= end header generated by ./mkh ========= */ | ||||
|  | @ -0,0 +1,12 @@ | |||
| /* ========= begin header generated by ./mkh ========= */ | ||||
| #ifdef __cplusplus | ||||
| extern "C" { | ||||
| #endif | ||||
| 
 | ||||
| /* === regerror.c === */ | ||||
| static char *regatoi(const regex_t *preg, char *localbuf); | ||||
| 
 | ||||
| #ifdef __cplusplus | ||||
| } | ||||
| #endif | ||||
| /* ========= end header generated by ./mkh ========= */ | ||||
|  | @ -0,0 +1,509 @@ | |||
| .TH REGEX 3 "25 Sept 1997" | ||||
| .BY "Henry Spencer" | ||||
| .de ZR | ||||
| .\" one other place knows this name:  the SEE ALSO section | ||||
| .IR regex (7) \\$1 | ||||
| .. | ||||
| .SH NAME | ||||
| regcomp, regexec, regerror, regfree \- regular-expression library | ||||
| .SH SYNOPSIS | ||||
| .ft B | ||||
| .\".na | ||||
| #include <sys/types.h> | ||||
| .br | ||||
| #include <regex.h> | ||||
| .HP 10 | ||||
| int regcomp(regex_t\ *preg, const\ char\ *pattern, int\ cflags); | ||||
| .HP | ||||
| int\ regexec(const\ regex_t\ *preg, const\ char\ *string, | ||||
| size_t\ nmatch, regmatch_t\ pmatch[], int\ eflags); | ||||
| .HP | ||||
| size_t\ regerror(int\ errcode, const\ regex_t\ *preg, | ||||
| char\ *errbuf, size_t\ errbuf_size); | ||||
| .HP | ||||
| void\ regfree(regex_t\ *preg); | ||||
| .\".ad | ||||
| .ft | ||||
| .SH DESCRIPTION | ||||
| These routines implement POSIX 1003.2 regular expressions (``RE''s); | ||||
| see | ||||
| .ZR . | ||||
| .I Regcomp | ||||
| compiles an RE written as a string into an internal form, | ||||
| .I regexec | ||||
| matches that internal form against a string and reports results, | ||||
| .I regerror | ||||
| transforms error codes from either into human-readable messages, | ||||
| and | ||||
| .I regfree | ||||
| frees any dynamically-allocated storage used by the internal form | ||||
| of an RE. | ||||
| .PP | ||||
| The header | ||||
| .I <regex.h> | ||||
| declares two structure types, | ||||
| .I regex_t | ||||
| and | ||||
| .IR regmatch_t , | ||||
| the former for compiled internal forms and the latter for match reporting. | ||||
| It also declares the four functions, | ||||
| a type | ||||
| .IR regoff_t , | ||||
| and a number of constants with names starting with ``REG_''. | ||||
| .PP | ||||
| .I Regcomp | ||||
| compiles the regular expression contained in the | ||||
| .I pattern | ||||
| string, | ||||
| subject to the flags in | ||||
| .IR cflags , | ||||
| and places the results in the | ||||
| .I regex_t | ||||
| structure pointed to by | ||||
| .IR preg . | ||||
| .I Cflags | ||||
| is the bitwise OR of zero or more of the following flags: | ||||
| .IP REG_EXTENDED \w'REG_EXTENDED'u+2n | ||||
| Compile modern (``extended'') REs, | ||||
| rather than the obsolete (``basic'') REs that | ||||
| are the default. | ||||
| .IP REG_BASIC | ||||
| This is a synonym for 0, | ||||
| provided as a counterpart to REG_EXTENDED to improve readability. | ||||
| This is an extension, | ||||
| compatible with but not specified by POSIX 1003.2, | ||||
| and should be used with | ||||
| caution in software intended to be portable to other systems. | ||||
| .IP REG_NOSPEC | ||||
| Compile with recognition of all special characters turned off. | ||||
| All characters are thus considered ordinary, | ||||
| so the ``RE'' is a literal string. | ||||
| This is an extension, | ||||
| compatible with but not specified by POSIX 1003.2, | ||||
| and should be used with | ||||
| caution in software intended to be portable to other systems. | ||||
| REG_EXTENDED and REG_NOSPEC may not be used | ||||
| in the same call to | ||||
| .IR regcomp . | ||||
| .IP REG_ICASE | ||||
| Compile for matching that ignores upper/lower case distinctions. | ||||
| See | ||||
| .ZR . | ||||
| .IP REG_NOSUB | ||||
| Compile for matching that need only report success or failure, | ||||
| not what was matched. | ||||
| .IP REG_NEWLINE | ||||
| Compile for newline-sensitive matching. | ||||
| By default, newline is a completely ordinary character with no special | ||||
| meaning in either REs or strings. | ||||
| With this flag, | ||||
| `[^' bracket expressions and `.' never match newline, | ||||
| a `^' anchor matches the null string after any newline in the string | ||||
| in addition to its normal function, | ||||
| and the `$' anchor matches the null string before any newline in the | ||||
| string in addition to its normal function. | ||||
| .IP REG_PEND | ||||
| The regular expression ends, | ||||
| not at the first NUL, | ||||
| but just before the character pointed to by the | ||||
| .I re_endp | ||||
| member of the structure pointed to by | ||||
| .IR preg . | ||||
| The | ||||
| .I re_endp | ||||
| member is of type | ||||
| .IR const\ char\ * . | ||||
| This flag permits inclusion of NULs in the RE; | ||||
| they are considered ordinary characters. | ||||
| This is an extension, | ||||
| compatible with but not specified by POSIX 1003.2, | ||||
| and should be used with | ||||
| caution in software intended to be portable to other systems. | ||||
| .PP | ||||
| When successful, | ||||
| .I regcomp | ||||
| returns 0 and fills in the structure pointed to by | ||||
| .IR preg . | ||||
| One member of that structure | ||||
| (other than | ||||
| .IR re_endp ) | ||||
| is publicized: | ||||
| .IR re_nsub , | ||||
| of type | ||||
| .IR size_t , | ||||
| contains the number of parenthesized subexpressions within the RE | ||||
| (except that the value of this member is undefined if the | ||||
| REG_NOSUB flag was used). | ||||
| If | ||||
| .I regcomp | ||||
| fails, it returns a non-zero error code; | ||||
| see DIAGNOSTICS. | ||||
| .PP | ||||
| .I Regexec | ||||
| matches the compiled RE pointed to by | ||||
| .I preg | ||||
| against the | ||||
| .IR string , | ||||
| subject to the flags in | ||||
| .IR eflags , | ||||
| and reports results using | ||||
| .IR nmatch , | ||||
| .IR pmatch , | ||||
| and the returned value. | ||||
| The RE must have been compiled by a previous invocation of | ||||
| .IR regcomp . | ||||
| The compiled form is not altered during execution of | ||||
| .IR regexec , | ||||
| so a single compiled RE can be used simultaneously by multiple threads. | ||||
| .PP | ||||
| By default, | ||||
| the NUL-terminated string pointed to by | ||||
| .I string | ||||
| is considered to be the text of an entire line, | ||||
| with the NUL indicating the end of the line. | ||||
| (That is, | ||||
| any other end-of-line marker is considered to have been removed | ||||
| and replaced by the NUL.) | ||||
| The | ||||
| .I eflags | ||||
| argument is the bitwise OR of zero or more of the following flags: | ||||
| .IP REG_NOTBOL \w'REG_STARTEND'u+2n | ||||
| The first character of | ||||
| the string | ||||
| is not the beginning of a line, so the `^' anchor should not match before it. | ||||
| This does not affect the behavior of newlines under REG_NEWLINE. | ||||
| .IP REG_NOTEOL | ||||
| The NUL terminating | ||||
| the string | ||||
| does not end a line, so the `$' anchor should not match before it. | ||||
| This does not affect the behavior of newlines under REG_NEWLINE. | ||||
| .IP REG_STARTEND | ||||
| The string is considered to start at | ||||
| \fIstring\fR\ + \fIpmatch\fR[0].\fIrm_so\fR | ||||
| and to have a terminating NUL located at | ||||
| \fIstring\fR\ + \fIpmatch\fR[0].\fIrm_eo\fR | ||||
| (there need not actually be a NUL at that location), | ||||
| regardless of the value of | ||||
| .IR nmatch . | ||||
| See below for the definition of | ||||
| .IR pmatch | ||||
| and | ||||
| .IR nmatch . | ||||
| This is an extension, | ||||
| compatible with but not specified by POSIX 1003.2, | ||||
| and should be used with | ||||
| caution in software intended to be portable to other systems. | ||||
| Note that a non-zero \fIrm_so\fR does not imply REG_NOTBOL; | ||||
| REG_STARTEND affects only the location of the string, | ||||
| not how it is matched. | ||||
| .PP | ||||
| See | ||||
| .ZR | ||||
| for a discussion of what is matched in situations where an RE or a | ||||
| portion thereof could match any of several substrings of | ||||
| .IR string . | ||||
| .PP | ||||
| Normally, | ||||
| .I regexec | ||||
| returns 0 for success and the non-zero code REG_NOMATCH for failure. | ||||
| Other non-zero error codes may be returned in exceptional situations; | ||||
| see DIAGNOSTICS. | ||||
| .PP | ||||
| If REG_NOSUB was specified in the compilation of the RE, | ||||
| or if | ||||
| .I nmatch | ||||
| is 0, | ||||
| .I regexec | ||||
| ignores the | ||||
| .I pmatch | ||||
| argument (but see below for the case where REG_STARTEND is specified). | ||||
| Otherwise, | ||||
| .I pmatch | ||||
| points to an array of | ||||
| .I nmatch | ||||
| structures of type | ||||
| .IR regmatch_t . | ||||
| Such a structure has at least the members | ||||
| .I rm_so | ||||
| and | ||||
| .IR rm_eo , | ||||
| both of type | ||||
| .I regoff_t | ||||
| (a signed arithmetic type at least as large as an | ||||
| .I off_t | ||||
| and a | ||||
| .IR ssize_t ), | ||||
| containing respectively the offset of the first character of a substring | ||||
| and the offset of the first character after the end of the substring. | ||||
| Offsets are measured from the beginning of the | ||||
| .I string | ||||
| argument given to | ||||
| .IR regexec . | ||||
| An empty substring is denoted by equal offsets, | ||||
| both indicating the character following the empty substring. | ||||
| .PP | ||||
| The 0th member of the | ||||
| .I pmatch | ||||
| array is filled in to indicate what substring of | ||||
| .I string | ||||
| was matched by the entire RE. | ||||
| Remaining members report what substring was matched by parenthesized | ||||
| subexpressions within the RE; | ||||
| member | ||||
| .I i | ||||
| reports subexpression | ||||
| .IR i , | ||||
| with subexpressions counted (starting at 1) by the order of their opening | ||||
| parentheses in the RE, left to right. | ||||
| Unused entries in the array\(emcorresponding either to subexpressions that | ||||
| did not participate in the match at all, or to subexpressions that do not | ||||
| exist in the RE (that is, \fIi\fR\ > \fIpreg\fR\->\fIre_nsub\fR)\(emhave both | ||||
| .I rm_so | ||||
| and | ||||
| .I rm_eo | ||||
| set to \-1. | ||||
| If a subexpression participated in the match several times, | ||||
| the reported substring is the last one it matched. | ||||
| (Note, as an example in particular, that when the RE `(b*)+' matches `bbb', | ||||
| the parenthesized subexpression matches the three `b's and then | ||||
| an infinite number of empty strings following the last `b', | ||||
| so the reported substring is one of the empties.) | ||||
| .PP | ||||
| If REG_STARTEND is specified, | ||||
| .I pmatch | ||||
| must point to at least one | ||||
| .I regmatch_t | ||||
| (even if | ||||
| .I nmatch | ||||
| is 0 or REG_NOSUB was specified), | ||||
| to hold the input offsets for REG_STARTEND. | ||||
| Use for output is still entirely controlled by | ||||
| .IR nmatch ; | ||||
| if | ||||
| .I nmatch | ||||
| is 0 or REG_NOSUB was specified, | ||||
| the value of | ||||
| .IR pmatch [0] | ||||
| will not be changed by a successful | ||||
| .IR regexec . | ||||
| .PP | ||||
| .I Regerror | ||||
| maps a non-zero | ||||
| .I errcode | ||||
| from either | ||||
| .I regcomp | ||||
| or | ||||
| .I regexec | ||||
| to a human-readable, printable message. | ||||
| If | ||||
| .I preg | ||||
| is non-NULL, | ||||
| the error code should have arisen from use of | ||||
| the | ||||
| .I regex_t | ||||
| pointed to by | ||||
| .IR preg , | ||||
| and if the error code came from | ||||
| .IR regcomp , | ||||
| it should have been the result from the most recent | ||||
| .I regcomp | ||||
| using that | ||||
| .IR regex_t . | ||||
| .RI ( Regerror | ||||
| may be able to supply a more detailed message using information | ||||
| from the | ||||
| .IR regex_t .) | ||||
| .I Regerror | ||||
| places the NUL-terminated message into the buffer pointed to by | ||||
| .IR errbuf , | ||||
| limiting the length (including the NUL) to at most | ||||
| .I errbuf_size | ||||
| bytes. | ||||
| If the whole message won't fit, | ||||
| as much of it as will fit before the terminating NUL is supplied. | ||||
| In any case, | ||||
| the returned value is the size of buffer needed to hold the whole | ||||
| message (including terminating NUL). | ||||
| If | ||||
| .I errbuf_size | ||||
| is 0, | ||||
| .I errbuf | ||||
| is ignored but the return value is still correct. | ||||
| .PP | ||||
| If the | ||||
| .I errcode | ||||
| given to | ||||
| .I regerror | ||||
| is first ORed with REG_ITOA, | ||||
| the ``message'' that results is the printable name of the error code, | ||||
| e.g. ``REG_NOMATCH'', | ||||
| rather than an explanation thereof. | ||||
| If | ||||
| .I errcode | ||||
| is REG_ATOI, | ||||
| then | ||||
| .I preg | ||||
| shall be non-NULL and the | ||||
| .I re_endp | ||||
| member of the structure it points to | ||||
| must point to the printable name of an error code; | ||||
| in this case, the result in | ||||
| .I errbuf | ||||
| is the decimal digits of | ||||
| the numeric value of the error code | ||||
| (0 if the name is not recognized). | ||||
| REG_ITOA and REG_ATOI are intended primarily as debugging facilities; | ||||
| they are extensions, | ||||
| compatible with but not specified by POSIX 1003.2, | ||||
| and should be used with | ||||
| caution in software intended to be portable to other systems. | ||||
| Be warned also that they are considered experimental and changes are possible. | ||||
| .PP | ||||
| .I Regfree | ||||
| frees any dynamically-allocated storage associated with the compiled RE | ||||
| pointed to by | ||||
| .IR preg . | ||||
| The remaining | ||||
| .I regex_t | ||||
| is no longer a valid compiled RE | ||||
| and the effect of supplying it to | ||||
| .I regexec | ||||
| or | ||||
| .I regerror | ||||
| is undefined. | ||||
| .PP | ||||
| None of these functions references global variables except for tables | ||||
| of constants; | ||||
| all are safe for use from multiple threads if the arguments are safe. | ||||
| .SH IMPLEMENTATION CHOICES | ||||
| There are a number of decisions that 1003.2 leaves up to the implementor, | ||||
| either by explicitly saying ``undefined'' or by virtue of them being | ||||
| forbidden by the RE grammar. | ||||
| This implementation treats them as follows. | ||||
| .PP | ||||
| See | ||||
| .ZR | ||||
| for a discussion of the definition of case-independent matching. | ||||
| .PP | ||||
| There is no particular limit on the length of REs, | ||||
| except insofar as memory is limited. | ||||
| Memory usage is approximately linear in RE size, and largely insensitive | ||||
| to RE complexity, except for bounded repetitions. | ||||
| See BUGS for one short RE using them | ||||
| that will run almost any system out of memory. | ||||
| .PP | ||||
| A backslashed character other than one specifically given a magic meaning | ||||
| by 1003.2 (such magic meanings occur only in obsolete [``basic''] REs) | ||||
| is taken as an ordinary character. | ||||
| .PP | ||||
| Any unmatched [ is a REG_EBRACK error. | ||||
| .PP | ||||
| Equivalence classes cannot begin or end bracket-expression ranges. | ||||
| The endpoint of one range cannot begin another. | ||||
| .PP | ||||
| RE_DUP_MAX, the limit on repetition counts in bounded repetitions, is 255. | ||||
| .PP | ||||
| A repetition operator (?, *, +, or bounds) cannot follow another | ||||
| repetition operator. | ||||
| A repetition operator cannot begin an expression or subexpression | ||||
| or follow `^' or `|'. | ||||
| .PP | ||||
| `|' cannot appear first or last in a (sub)expression or after another `|', | ||||
| i.e. an operand of `|' cannot be an empty subexpression. | ||||
| An empty parenthesized subexpression, `()', is legal and matches an | ||||
| empty (sub)string. | ||||
| An empty string is not a legal RE. | ||||
| .PP | ||||
| A `{' followed by a digit is considered the beginning of bounds for a | ||||
| bounded repetition, which must then follow the syntax for bounds. | ||||
| A `{' \fInot\fR followed by a digit is considered an ordinary character. | ||||
| .PP | ||||
| `^' and `$' beginning and ending subexpressions in obsolete (``basic'') | ||||
| REs are anchors, not ordinary characters. | ||||
| .SH SEE ALSO | ||||
| grep(1), regex(7) | ||||
| .PP | ||||
| POSIX 1003.2, sections 2.8 (Regular Expression Notation) | ||||
| and | ||||
| B.5 (C Binding for Regular Expression Matching). | ||||
| .SH DIAGNOSTICS | ||||
| Non-zero error codes from | ||||
| .I regcomp | ||||
| and | ||||
| .I regexec | ||||
| include the following: | ||||
| .PP | ||||
| .nf | ||||
| .ta \w'REG_ECOLLATE'u+3n | ||||
| REG_NOMATCH	regexec() failed to match | ||||
| REG_BADPAT	invalid regular expression | ||||
| REG_ECOLLATE	invalid collating element | ||||
| REG_ECTYPE	invalid character class | ||||
| REG_EESCAPE	\e applied to unescapable character | ||||
| REG_ESUBREG	invalid backreference number | ||||
| REG_EBRACK	brackets [ ] not balanced | ||||
| REG_EPAREN	parentheses ( ) not balanced | ||||
| REG_EBRACE	braces { } not balanced | ||||
| REG_BADBR	invalid repetition count(s) in { } | ||||
| REG_ERANGE	invalid character range in [ ] | ||||
| REG_ESPACE	ran out of memory | ||||
| REG_BADRPT	?, *, or + operand invalid | ||||
| REG_EMPTY	empty (sub)expression | ||||
| REG_ASSERT	``can't happen''\(emyou found a bug | ||||
| REG_INVARG	invalid argument, e.g. negative-length string | ||||
| .fi | ||||
| .SH HISTORY | ||||
| Written by Henry Spencer, | ||||
| henry@zoo.toronto.edu. | ||||
| .SH BUGS | ||||
| This is an alpha release with known defects. | ||||
| Please report problems. | ||||
| .PP | ||||
| There is one known functionality bug. | ||||
| The implementation of internationalization is incomplete: | ||||
| the locale is always assumed to be the default one of 1003.2, | ||||
| and only the collating elements etc. of that locale are available. | ||||
| .PP | ||||
| The back-reference code is subtle and doubts linger about its correctness | ||||
| in complex cases. | ||||
| .PP | ||||
| .I Regexec | ||||
| performance is poor. | ||||
| This will improve with later releases. | ||||
| .I Nmatch | ||||
| exceeding 0 is expensive; | ||||
| .I nmatch | ||||
| exceeding 1 is worse. | ||||
| .I Regexec | ||||
| is largely insensitive to RE complexity \fIexcept\fR that back | ||||
| references are massively expensive. | ||||
| RE length does matter; in particular, there is a strong speed bonus | ||||
| for keeping RE length under about 30 characters, | ||||
| with most special characters counting roughly double. | ||||
| .PP | ||||
| .I Regcomp | ||||
| implements bounded repetitions by macro expansion, | ||||
| which is costly in time and space if counts are large | ||||
| or bounded repetitions are nested. | ||||
| An RE like, say, | ||||
| `((((a{1,100}){1,100}){1,100}){1,100}){1,100}' | ||||
| will (eventually) run almost any existing machine out of swap space. | ||||
| .PP | ||||
| There are suspected problems with response to obscure error conditions. | ||||
| Notably, | ||||
| certain kinds of internal overflow, | ||||
| produced only by truly enormous REs or by multiply nested bounded repetitions, | ||||
| are probably not handled well. | ||||
| .PP | ||||
| Due to a mistake in 1003.2, things like `a)b' are legal REs because `)' is | ||||
| a special character only in the presence of a previous unmatched `('. | ||||
| This can't be fixed until the spec is fixed. | ||||
| .PP | ||||
| The standard's definition of back references is vague. | ||||
| For example, does | ||||
| `a\e(\e(b\e)*\e2\e)*d' match `abbbd'? | ||||
| Until the standard is clarified, | ||||
| behavior in such cases should not be relied on. | ||||
| .PP | ||||
| The implementation of word-boundary matching is a bit of a kludge, | ||||
| and bugs may lurk in combinations of word-boundary matching and anchoring. | ||||
|  | @ -0,0 +1,235 @@ | |||
| .TH REGEX 7 "25 Oct 1995" | ||||
| .BY "Henry Spencer" | ||||
| .SH NAME | ||||
| regex \- POSIX 1003.2 regular expressions | ||||
| .SH DESCRIPTION | ||||
| Regular expressions (``RE''s), | ||||
| as defined in POSIX 1003.2, come in two forms: | ||||
| modern REs (roughly those of | ||||
| .IR egrep ; | ||||
| 1003.2 calls these ``extended'' REs) | ||||
| and obsolete REs (roughly those of | ||||
| .IR ed ; | ||||
| 1003.2 ``basic'' REs). | ||||
| Obsolete REs mostly exist for backward compatibility in some old programs; | ||||
| they will be discussed at the end. | ||||
| 1003.2 leaves some aspects of RE syntax and semantics open; | ||||
| `\(dg' marks decisions on these aspects that | ||||
| may not be fully portable to other 1003.2 implementations. | ||||
| .PP | ||||
| A (modern) RE is one\(dg or more non-empty\(dg \fIbranches\fR, | ||||
| separated by `|'. | ||||
| It matches anything that matches one of the branches. | ||||
| .PP | ||||
| A branch is one\(dg or more \fIpieces\fR, concatenated. | ||||
| It matches a match for the first, followed by a match for the second, etc. | ||||
| .PP | ||||
| A piece is an \fIatom\fR possibly followed | ||||
| by a single\(dg `*', `+', `?', or \fIbound\fR. | ||||
| An atom followed by `*' matches a sequence of 0 or more matches of the atom. | ||||
| An atom followed by `+' matches a sequence of 1 or more matches of the atom. | ||||
| An atom followed by `?' matches a sequence of 0 or 1 matches of the atom. | ||||
| .PP | ||||
| A \fIbound\fR is `{' followed by an unsigned decimal integer, | ||||
| possibly followed by `,' | ||||
| possibly followed by another unsigned decimal integer, | ||||
| always followed by `}'. | ||||
| The integers must lie between 0 and RE_DUP_MAX (255\(dg) inclusive, | ||||
| and if there are two of them, the first may not exceed the second. | ||||
| An atom followed by a bound containing one integer \fIi\fR | ||||
| and no comma matches | ||||
| a sequence of exactly \fIi\fR matches of the atom. | ||||
| An atom followed by a bound | ||||
| containing one integer \fIi\fR and a comma matches | ||||
| a sequence of \fIi\fR or more matches of the atom. | ||||
| An atom followed by a bound | ||||
| containing two integers \fIi\fR and \fIj\fR matches | ||||
| a sequence of \fIi\fR through \fIj\fR (inclusive) matches of the atom. | ||||
| .PP | ||||
| An atom is a regular expression enclosed in `()' (matching a match for the | ||||
| regular expression), | ||||
| an empty set of `()' (matching the null string)\(dg, | ||||
| a \fIbracket expression\fR (see below), `.' | ||||
| (matching any single character), `^' (matching the null string at the | ||||
| beginning of a line), `$' (matching the null string at the | ||||
| end of a line), a `\e' followed by one of the characters | ||||
| `^.[$()|*+?{\e' | ||||
| (matching that character taken as an ordinary character), | ||||
| a `\e' followed by any other character\(dg | ||||
| (matching that character taken as an ordinary character, | ||||
| as if the `\e' had not been present\(dg), | ||||
| or a single character with no other significance (matching that character). | ||||
| A `{' followed by a character other than a digit is an ordinary | ||||
| character, not the beginning of a bound\(dg. | ||||
| It is illegal to end an RE with `\e'. | ||||
| .PP | ||||
| A \fIbracket expression\fR is a list of characters enclosed in `[]'. | ||||
| It normally matches any single character from the list (but see below). | ||||
| If the list begins with `^', | ||||
| it matches any single character | ||||
| (but see below) \fInot\fR from the rest of the list. | ||||
| If two characters in the list are separated by `\-', this is shorthand | ||||
| for the full \fIrange\fR of characters between those two (inclusive) in the | ||||
| collating sequence, | ||||
| e.g. `[0\-9]' in ASCII matches any decimal digit. | ||||
| It is illegal\(dg for two ranges to share an | ||||
| endpoint, e.g. `a\-c\-e'. | ||||
| Ranges are very collating-sequence-dependent, | ||||
| and portable programs should avoid relying on them. | ||||
| .PP | ||||
| To include a literal `]' in the list, make it the first character | ||||
| (following a possible `^'). | ||||
| To include a literal `\-', make it the first or last character, | ||||
| or the second endpoint of a range. | ||||
| To use a literal `\-' as the first endpoint of a range, | ||||
| enclose it in `[.' and `.]' to make it a collating element (see below). | ||||
| With the exception of these and some combinations using `[' (see next | ||||
| paragraphs), all other special characters, including `\e', lose their | ||||
| special significance within a bracket expression. | ||||
| .PP | ||||
| Within a bracket expression, a collating element (a character, | ||||
| a multi-character sequence that collates as if it were a single character, | ||||
| or a collating-sequence name for either) | ||||
| enclosed in `[.' and `.]' stands for the | ||||
| sequence of characters of that collating element. | ||||
| The sequence is a single element of the bracket expression's list. | ||||
| A bracket expression containing a multi-character collating element  | ||||
| can thus match more than one character, | ||||
| e.g. if the collating sequence includes a `ch' collating element, | ||||
| then the RE `[[.ch.]]*c' matches the first five characters | ||||
| of `chchcc'. | ||||
| .PP | ||||
| Within a bracket expression, a collating element enclosed in `[=' and | ||||
| `=]' is an equivalence class, standing for the sequences of characters | ||||
| of all collating elements equivalent to that one, including itself. | ||||
| (If there are no other equivalent collating elements, | ||||
| the treatment is as if the enclosing delimiters were `[.' and `.]'.) | ||||
| For example, if o and \o'o^' are the members of an equivalence class, | ||||
| then `[[=o=]]', `[[=\o'o^'=]]', and `[o\o'o^']' are all synonymous. | ||||
| An equivalence class may not\(dg be an endpoint | ||||
| of a range. | ||||
| .PP | ||||
| Within a bracket expression, the name of a \fIcharacter class\fR enclosed | ||||
| in `[:' and `:]' stands for the list of all characters belonging to that | ||||
| class. | ||||
| Standard character class names are: | ||||
| .PP | ||||
| .RS | ||||
| .nf | ||||
| .ta 3c 6c 9c | ||||
| alnum	digit	punct | ||||
| alpha	graph	space | ||||
| blank	lower	upper | ||||
| cntrl	print	xdigit | ||||
| .fi | ||||
| .RE | ||||
| .PP | ||||
| These stand for the character classes defined in | ||||
| .IR ctype (3). | ||||
| A locale may provide others. | ||||
| A character class may not be used as an endpoint of a range. | ||||
| .PP | ||||
| There are two special cases\(dg of bracket expressions: | ||||
| the bracket expressions `[[:<:]]' and `[[:>:]]' match the null string at | ||||
| the beginning and end of a word respectively. | ||||
| A word is defined as a sequence of | ||||
| word characters | ||||
| which is neither preceded nor followed by | ||||
| word characters. | ||||
| A word character is an | ||||
| .I alnum | ||||
| character (as defined by | ||||
| .IR ctype (3)) | ||||
| or an underscore. | ||||
| This is an extension, | ||||
| compatible with but not specified by POSIX 1003.2, | ||||
| and should be used with | ||||
| caution in software intended to be portable to other systems. | ||||
| .PP | ||||
| In the event that an RE could match more than one substring of a given | ||||
| string, | ||||
| the RE matches the one starting earliest in the string. | ||||
| If the RE could match more than one substring starting at that point, | ||||
| it matches the longest. | ||||
| Subexpressions also match the longest possible substrings, subject to | ||||
| the constraint that the whole match be as long as possible, | ||||
| with subexpressions starting earlier in the RE taking priority over | ||||
| ones starting later. | ||||
| Note that higher-level subexpressions thus take priority over | ||||
| their lower-level component subexpressions. | ||||
| .PP | ||||
| Match lengths are measured in characters, not collating elements. | ||||
| A null string is considered longer than no match at all. | ||||
| For example, | ||||
| `bb*' matches the three middle characters of `abbbc', | ||||
| `(wee|week)(knights|nights)' matches all ten characters of `weeknights', | ||||
| when `(.*).*' is matched against `abc' the parenthesized subexpression | ||||
| matches all three characters, and | ||||
| when `(a*)*' is matched against `bc' both the whole RE and the parenthesized | ||||
| subexpression match the null string. | ||||
| .PP | ||||
| If case-independent matching is specified, | ||||
| the effect is much as if all case distinctions had vanished from the | ||||
| alphabet. | ||||
| When an alphabetic that exists in multiple cases appears as an | ||||
| ordinary character outside a bracket expression, it is effectively | ||||
| transformed into a bracket expression containing both cases, | ||||
| e.g. `x' becomes `[xX]'. | ||||
| When it appears inside a bracket expression, all case counterparts | ||||
| of it are added to the bracket expression, so that (e.g.) `[x]' | ||||
| becomes `[xX]' and `[^x]' becomes `[^xX]'. | ||||
| .PP | ||||
| No particular limit is imposed on the length of REs\(dg. | ||||
| Programs intended to be portable should not employ REs longer | ||||
| than 256 bytes, | ||||
| as an implementation can refuse to accept such REs and remain | ||||
| POSIX-compliant. | ||||
| .PP | ||||
| Obsolete (``basic'') regular expressions differ in several respects. | ||||
| `|', `+', and `?' are ordinary characters and there is no equivalent | ||||
| for their functionality. | ||||
| The delimiters for bounds are `\e{' and `\e}', | ||||
| with `{' and `}' by themselves ordinary characters. | ||||
| The parentheses for nested subexpressions are `\e(' and `\e)', | ||||
| with `(' and `)' by themselves ordinary characters. | ||||
| `^' is an ordinary character except at the beginning of the | ||||
| RE or\(dg the beginning of a parenthesized subexpression, | ||||
| `$' is an ordinary character except at the end of the | ||||
| RE or\(dg the end of a parenthesized subexpression, | ||||
| and `*' is an ordinary character if it appears at the beginning of the | ||||
| RE or the beginning of a parenthesized subexpression | ||||
| (after a possible leading `^'). | ||||
| Finally, there is one new type of atom, a \fIback reference\fR: | ||||
| `\e' followed by a non-zero decimal digit \fId\fR | ||||
| matches the same sequence of characters | ||||
| matched by the \fId\fRth parenthesized subexpression | ||||
| (numbering subexpressions by the positions of their opening parentheses, | ||||
| left to right), | ||||
| so that (e.g.) `\e([bc]\e)\e1' matches `bb' or `cc' but not `bc'. | ||||
| .SH SEE ALSO | ||||
| regex(3) | ||||
| .PP | ||||
| POSIX 1003.2, section 2.8 (Regular Expression Notation). | ||||
| .SH HISTORY | ||||
| Written by Henry Spencer, based on the 1003.2 spec. | ||||
| .SH BUGS | ||||
| Having two kinds of REs is a botch. | ||||
| .PP | ||||
| The current 1003.2 spec says that `)' is an ordinary character in | ||||
| the absence of an unmatched `('; | ||||
| this was an unintentional result of a wording error, | ||||
| and change is likely. | ||||
| Avoid relying on it. | ||||
| .PP | ||||
| Back references are a dreadful botch, | ||||
| posing major problems for efficient implementations. | ||||
| They are also somewhat vaguely defined | ||||
| (does | ||||
| `a\e(\e(b\e)*\e2\e)*d' match `abbbd'?). | ||||
| Avoid using them. | ||||
| .PP | ||||
| 1003.2's specification of case-independent matching is vague. | ||||
| The ``one case implies all cases'' definition given above | ||||
| is current consensus among implementors as to the right interpretation. | ||||
| .PP | ||||
| The syntax for word boundaries is incredibly ugly. | ||||
|  | @ -0,0 +1,74 @@ | |||
| #ifndef _REGEX_H_ | ||||
| #define	_REGEX_H_	/* never again */ | ||||
| /* ========= begin header generated by ./mkh ========= */ | ||||
| #ifdef __cplusplus | ||||
| extern "C" { | ||||
| #endif | ||||
| 
 | ||||
| /* === regex2.h === */ | ||||
| typedef off_t regoff_t; | ||||
| typedef struct { | ||||
| 	int re_magic; | ||||
| 	size_t re_nsub;		/* number of parenthesized subexpressions */ | ||||
| 	const char *re_endp;	/* end pointer for REG_PEND */ | ||||
| 	struct re_guts *re_g;	/* none of your business :-) */ | ||||
| } regex_t; | ||||
| typedef struct { | ||||
| 	regoff_t rm_so;		/* start of match */ | ||||
| 	regoff_t rm_eo;		/* end of match */ | ||||
| } regmatch_t; | ||||
| 
 | ||||
| 
 | ||||
| /* === regcomp.c === */ | ||||
| extern int regcomp(regex_t *, const char *, int); | ||||
| #define	REG_BASIC	0000 | ||||
| #define	REG_EXTENDED	0001 | ||||
| #define	REG_ICASE	0002 | ||||
| #define	REG_NOSUB	0004 | ||||
| #define	REG_NEWLINE	0010 | ||||
| #define	REG_NOSPEC	0020 | ||||
| #define	REG_PEND	0040 | ||||
| #define	REG_DUMP	0200 | ||||
| 
 | ||||
| 
 | ||||
| /* === regerror.c === */ | ||||
| #define	REG_OKAY	 0 | ||||
| #define	REG_NOMATCH	 1 | ||||
| #define	REG_BADPAT	 2 | ||||
| #define	REG_ECOLLATE	 3 | ||||
| #define	REG_ECTYPE	 4 | ||||
| #define	REG_EESCAPE	 5 | ||||
| #define	REG_ESUBREG	 6 | ||||
| #define	REG_EBRACK	 7 | ||||
| #define	REG_EPAREN	 8 | ||||
| #define	REG_EBRACE	 9 | ||||
| #define	REG_BADBR	10 | ||||
| #define	REG_ERANGE	11 | ||||
| #define	REG_ESPACE	12 | ||||
| #define	REG_BADRPT	13 | ||||
| #define	REG_EMPTY	14 | ||||
| #define	REG_ASSERT	15 | ||||
| #define	REG_INVARG	16 | ||||
| #define	REG_ATOI	255	/* convert name to number (!) */ | ||||
| #define	REG_ITOA	0400	/* convert number to name (!) */ | ||||
| extern size_t regerror(int, const regex_t *, char *, size_t); | ||||
| 
 | ||||
| 
 | ||||
| /* === regexec.c === */ | ||||
| extern int regexec(const regex_t *, const char *, size_t, regmatch_t [], int); | ||||
| #define	REG_NOTBOL	00001 | ||||
| #define	REG_NOTEOL	00002 | ||||
| #define	REG_STARTEND	00004 | ||||
| #define	REG_TRACE	00400	/* tracing of execution */ | ||||
| #define	REG_LARGE	01000	/* force large representation */ | ||||
| #define	REG_BACKR	02000	/* force use of backref code */ | ||||
| 
 | ||||
| 
 | ||||
| /* === regfree.c === */ | ||||
| extern void regfree(regex_t *); | ||||
| 
 | ||||
| #ifdef __cplusplus | ||||
| } | ||||
| #endif | ||||
| /* ========= end header generated by ./mkh ========= */ | ||||
| #endif | ||||
|  | @ -0,0 +1,134 @@ | |||
| /*
 | ||||
|  * First, the stuff that ends up in the outside-world include file | ||||
|  = typedef off_t regoff_t; | ||||
|  = typedef struct { | ||||
|  = 	int re_magic; | ||||
|  = 	size_t re_nsub;		// number of parenthesized subexpressions
 | ||||
|  = 	const char *re_endp;	// end pointer for REG_PEND
 | ||||
|  = 	struct re_guts *re_g;	// none of your business :-)
 | ||||
|  = } regex_t; | ||||
|  = typedef struct { | ||||
|  = 	regoff_t rm_so;		// start of match
 | ||||
|  = 	regoff_t rm_eo;		// end of match
 | ||||
|  = } regmatch_t; | ||||
|  */ | ||||
| /*
 | ||||
|  * internals of regex_t | ||||
|  */ | ||||
| #define	MAGIC1	((('r'^0200)<<8) | 'e') | ||||
| 
 | ||||
| /*
 | ||||
|  * The internal representation is a *strip*, a sequence of | ||||
|  * operators ending with an endmarker.  (Some terminology etc. is a | ||||
|  * historical relic of earlier versions which used multiple strips.) | ||||
|  * Certain oddities in the representation are there to permit running | ||||
|  * the machinery backwards; in particular, any deviation from sequential | ||||
|  * flow must be marked at both its source and its destination.  Some | ||||
|  * fine points: | ||||
|  * | ||||
|  * - OPLUS_ and O_PLUS are *inside* the loop they create. | ||||
|  * - OQUEST_ and O_QUEST are *outside* the bypass they create. | ||||
|  * - OCH_ and O_CH are *outside* the multi-way branch they create, while | ||||
|  *   OOR1 and OOR2 are respectively the end and the beginning of one of | ||||
|  *   the branches.  Note that there is an implicit OOR2 following OCH_ | ||||
|  *   and an implicit OOR1 preceding O_CH. | ||||
|  * | ||||
|  * In state representations, an operator's bit is on to signify a state | ||||
|  * immediately *preceding* "execution" of that operator. | ||||
|  */ | ||||
| typedef unsigned long sop;	/* strip operator */ | ||||
| typedef long sopno; | ||||
| #define	OPRMASK	0xf8000000 | ||||
| #define	OPDMASK	0x07ffffff | ||||
| #define	OPSHIFT	((unsigned)27) | ||||
| #define	OP(n)	((n)&OPRMASK) | ||||
| #define	OPND(n)	((n)&OPDMASK) | ||||
| #define	SOP(op, opnd)	((op)|(opnd)) | ||||
| /* operators			   meaning	operand			*/ | ||||
| /*						(back, fwd are offsets)	*/ | ||||
| #define	OEND	(1ul<<OPSHIFT)	/* endmarker	-			*/ | ||||
| #define	OCHAR	(2ul<<OPSHIFT)	/* character	unsigned char		*/ | ||||
| #define	OBOL	(3ul<<OPSHIFT)	/* left anchor	-			*/ | ||||
| #define	OEOL	(4ul<<OPSHIFT)	/* right anchor	-			*/ | ||||
| #define	OANY	(5ul<<OPSHIFT)	/* .		-			*/ | ||||
| #define	OANYOF	(6ul<<OPSHIFT)	/* [...]	set number		*/ | ||||
| #define	OBACK_	(7ul<<OPSHIFT)	/* begin \d	paren number		*/ | ||||
| #define	O_BACK	(8ul<<OPSHIFT)	/* end \d	paren number		*/ | ||||
| #define	OPLUS_	(9ul<<OPSHIFT)	/* + prefix	fwd to suffix		*/ | ||||
| #define	O_PLUS	(10ul<<OPSHIFT)	/* + suffix	back to prefix		*/ | ||||
| #define	OQUEST_	(11ul<<OPSHIFT)	/* ? prefix	fwd to suffix		*/ | ||||
| #define	O_QUEST	(12ul<<OPSHIFT)	/* ? suffix	back to prefix		*/ | ||||
| #define	OLPAREN	(13ul<<OPSHIFT)	/* (		fwd to )		*/ | ||||
| #define	ORPAREN	(14ul<<OPSHIFT)	/* )		back to (		*/ | ||||
| #define	OCH_	(15ul<<OPSHIFT)	/* begin choice	fwd to OOR2		*/ | ||||
| #define	OOR1	(16ul<<OPSHIFT)	/* | pt. 1	back to OOR1 or OCH_	*/ | ||||
| #define	OOR2	(17ul<<OPSHIFT)	/* | pt. 2	fwd to OOR2 or O_CH	*/ | ||||
| #define	O_CH	(18ul<<OPSHIFT)	/* end choice	back to OOR1		*/ | ||||
| #define	OBOW	(19ul<<OPSHIFT)	/* begin word	-			*/ | ||||
| #define	OEOW	(20ul<<OPSHIFT)	/* end word	-			*/ | ||||
| 
 | ||||
| /*
 | ||||
|  * Structure for [] character-set representation.  Character sets are | ||||
|  * done as bit vectors, grouped 8 to a byte vector for compactness. | ||||
|  * The individual set therefore has both a pointer to the byte vector | ||||
|  * and a mask to pick out the relevant bit of each byte.  A hash code | ||||
|  * simplifies testing whether two sets could be identical. | ||||
|  * | ||||
|  * This will get trickier for multicharacter collating elements.  As | ||||
|  * preliminary hooks for dealing with such things, we also carry along | ||||
|  * a string of multi-character elements, and decide the size of the | ||||
|  * vectors at run time. | ||||
|  */ | ||||
| typedef struct { | ||||
| 	uch *ptr;		/* -> uch [csetsize] */ | ||||
| 	uch mask;		/* bit within array */ | ||||
| 	uch hash;		/* hash code */ | ||||
| 	size_t smultis; | ||||
| 	char *multis;		/* -> char[smulti]  ab\0cd\0ef\0\0 */ | ||||
| } cset; | ||||
| /* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */ | ||||
| #define	CHadd(cs, c)	((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c)) | ||||
| #define	CHsub(cs, c)	((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c)) | ||||
| #define	CHIN(cs, c)	((cs)->ptr[(uch)(c)] & (cs)->mask) | ||||
| #define	MCadd(p, cs, cp)	mcadd(p, cs, cp)	/* regcomp() internal fns */ | ||||
| #define	MCsub(p, cs, cp)	mcsub(p, cs, cp) | ||||
| #define	MCin(p, cs, cp)	mcin(p, cs, cp) | ||||
| 
 | ||||
| /* stuff for character categories */ | ||||
| typedef unsigned char cat_t; | ||||
| 
 | ||||
| /*
 | ||||
|  * main compiled-expression structure | ||||
|  */ | ||||
| struct re_guts { | ||||
| 	int magic; | ||||
| #		define	MAGIC2	((('R'^0200)<<8)|'E') | ||||
| 	sop *strip;		/* malloced area for strip */ | ||||
| 	int csetsize;		/* number of bits in a cset vector */ | ||||
| 	int ncsets;		/* number of csets in use */ | ||||
| 	cset *sets;		/* -> cset [ncsets] */ | ||||
| 	uch *setbits;		/* -> uch[csetsize][ncsets/CHAR_BIT] */ | ||||
| 	int cflags;		/* copy of regcomp() cflags argument */ | ||||
| 	sopno nstates;		/* = number of sops */ | ||||
| 	sopno firststate;	/* the initial OEND (normally 0) */ | ||||
| 	sopno laststate;	/* the final OEND */ | ||||
| 	int iflags;		/* internal flags */ | ||||
| #		define	USEBOL	01	/* used ^ */ | ||||
| #		define	USEEOL	02	/* used $ */ | ||||
| #		define	BAD	04	/* something wrong */ | ||||
| 	int nbol;		/* number of ^ used */ | ||||
| 	int neol;		/* number of $ used */ | ||||
| 	int ncategories;	/* how many character categories */ | ||||
| 	cat_t *categories;	/* ->catspace[-CHAR_MIN] */ | ||||
| 	char *must;		/* match must contain this string */ | ||||
| 	int mlen;		/* length of must */ | ||||
| 	size_t nsub;		/* copy of re_nsub */ | ||||
| 	int backrefs;		/* does it use back references? */ | ||||
| 	sopno nplus;		/* how deep does it nest +s? */ | ||||
| 	/* catspace must be last */ | ||||
| 	cat_t catspace[1];	/* actually [NC] */ | ||||
| }; | ||||
| 
 | ||||
| /* misc utilities */ | ||||
| #define	OUT	(CHAR_MAX+1)	/* a non-character value */ | ||||
| #define	ISWORD(c)	(isalnum(c) || (c) == '_') | ||||
|  | @ -0,0 +1,138 @@ | |||
| /*
 | ||||
|  * the outer shell of regexec() | ||||
|  * | ||||
|  * This file includes engine.c *twice*, after muchos fiddling with the | ||||
|  * macros that code uses.  This lets the same code operate on two different | ||||
|  * representations for state sets. | ||||
|  */ | ||||
| #include <sys/types.h> | ||||
| #include <stdio.h> | ||||
| #include <stdlib.h> | ||||
| #include <string.h> | ||||
| #include <limits.h> | ||||
| #include <ctype.h> | ||||
| #include <regex.h> | ||||
| 
 | ||||
| #include "utils.h" | ||||
| #include "regex2.h" | ||||
| 
 | ||||
| static int nope = 0;		/* for use in asserts; shuts lint up */ | ||||
| 
 | ||||
| /* macros for manipulating states, small version */ | ||||
| #define	states	long | ||||
| #define	states1	states		/* for later use in regexec() decision */ | ||||
| #define	CLEAR(v)	((v) = 0) | ||||
| #define	SET0(v, n)	((v) &= ~(1ul << (n))) | ||||
| #define	SET1(v, n)	((v) |= 1ul << (n)) | ||||
| #define	ISSET(v, n)	((v) & (1ul << (n))) | ||||
| #define	ASSIGN(d, s)	((d) = (s)) | ||||
| #define	EQ(a, b)	((a) == (b)) | ||||
| #define	STATEVARS	int dummy	/* dummy version */ | ||||
| #define	STATESETUP(m, n)	/* nothing */ | ||||
| #define	STATETEARDOWN(m)	/* nothing */ | ||||
| #define	SETUP(v)	((v) = 0) | ||||
| #define	onestate	long | ||||
| #define	INIT(o, n)	((o) = (unsigned long)1 << (n)) | ||||
| #define	INC(o)	((o) <<= 1) | ||||
| #define	ISSTATEIN(v, o)	((v) & (o)) | ||||
| /* some abbreviations; note that some of these know variable names! */ | ||||
| /* do "if I'm here, I can also be there" etc without branches */ | ||||
| #define	FWD(dst, src, n)	((dst) |= ((unsigned long)(src)&(here)) << (n)) | ||||
| #define	BACK(dst, src, n)	((dst) |= ((unsigned long)(src)&(here)) >> (n)) | ||||
| #define	ISSETBACK(v, n)	((v) & ((unsigned long)here >> (n))) | ||||
| /* function names */ | ||||
| #define SNAMES			/* engine.c looks after details */ | ||||
| 
 | ||||
| #include "engine.c" | ||||
| 
 | ||||
| /* now undo things */ | ||||
| #undef	states | ||||
| #undef	CLEAR | ||||
| #undef	SET0 | ||||
| #undef	SET1 | ||||
| #undef	ISSET | ||||
| #undef	ASSIGN | ||||
| #undef	EQ | ||||
| #undef	STATEVARS | ||||
| #undef	STATESETUP | ||||
| #undef	STATETEARDOWN | ||||
| #undef	SETUP | ||||
| #undef	onestate | ||||
| #undef	INIT | ||||
| #undef	INC | ||||
| #undef	ISSTATEIN | ||||
| #undef	FWD | ||||
| #undef	BACK | ||||
| #undef	ISSETBACK | ||||
| #undef	SNAMES | ||||
| 
 | ||||
| /* macros for manipulating states, large version */ | ||||
| #define	states	char * | ||||
| #define	CLEAR(v)	memset(v, 0, m->g->nstates) | ||||
| #define	SET0(v, n)	((v)[n] = 0) | ||||
| #define	SET1(v, n)	((v)[n] = 1) | ||||
| #define	ISSET(v, n)	((v)[n]) | ||||
| #define	ASSIGN(d, s)	memcpy(d, s, m->g->nstates) | ||||
| #define	EQ(a, b)	(memcmp(a, b, m->g->nstates) == 0) | ||||
| #define	STATEVARS	int vn; char *space | ||||
| #define	STATESETUP(m, nv)	{ (m)->space = malloc((nv)*(m)->g->nstates); \ | ||||
| 				if ((m)->space == NULL) return(REG_ESPACE); \ | ||||
| 				(m)->vn = 0; } | ||||
| #define	STATETEARDOWN(m)	{ free((m)->space); } | ||||
| #define	SETUP(v)	((v) = &m->space[m->vn++ * m->g->nstates]) | ||||
| #define	onestate	int | ||||
| #define	INIT(o, n)	((o) = (n)) | ||||
| #define	INC(o)	((o)++) | ||||
| #define	ISSTATEIN(v, o)	((v)[o]) | ||||
| /* some abbreviations; note that some of these know variable names! */ | ||||
| /* do "if I'm here, I can also be there" etc without branches */ | ||||
| #define	FWD(dst, src, n)	((dst)[here+(n)] |= (src)[here]) | ||||
| #define	BACK(dst, src, n)	((dst)[here-(n)] |= (src)[here]) | ||||
| #define	ISSETBACK(v, n)	((v)[here - (n)]) | ||||
| /* function names */ | ||||
| #define	LNAMES			/* flag */ | ||||
| 
 | ||||
| #include "engine.c" | ||||
| 
 | ||||
| /*
 | ||||
|  - regexec - interface for matching | ||||
|  = extern int regexec(const regex_t *, const char *, size_t, \ | ||||
|  =					regmatch_t [], int); | ||||
|  = #define	REG_NOTBOL	00001 | ||||
|  = #define	REG_NOTEOL	00002 | ||||
|  = #define	REG_STARTEND	00004 | ||||
|  = #define	REG_TRACE	00400	// tracing of execution
 | ||||
|  = #define	REG_LARGE	01000	// force large representation
 | ||||
|  = #define	REG_BACKR	02000	// force use of backref code
 | ||||
|  * | ||||
|  * We put this here so we can exploit knowledge of the state representation | ||||
|  * when choosing which matcher to call.  Also, by this point the matchers | ||||
|  * have been prototyped. | ||||
|  */ | ||||
| int				/* 0 success, REG_NOMATCH failure */ | ||||
| regexec(preg, string, nmatch, pmatch, eflags) | ||||
| const regex_t *preg; | ||||
| const char *string; | ||||
| size_t nmatch; | ||||
| regmatch_t pmatch[]; | ||||
| int eflags; | ||||
| { | ||||
| 	register struct re_guts *g = preg->re_g; | ||||
| #ifdef REDEBUG | ||||
| #	define	GOODFLAGS(f)	(f) | ||||
| #else | ||||
| #	define	GOODFLAGS(f)	((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND)) | ||||
| #endif | ||||
| 
 | ||||
| 	if (preg->re_magic != MAGIC1 || g->magic != MAGIC2) | ||||
| 		return(REG_BADPAT); | ||||
| 	assert(!(g->iflags&BAD)); | ||||
| 	if (g->iflags&BAD)		/* backstop for no-debug case */ | ||||
| 		return(REG_BADPAT); | ||||
| 	eflags = GOODFLAGS(eflags); | ||||
| 
 | ||||
| 	if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE)) | ||||
| 		return(smatcher(g, (char *)string, nmatch, pmatch, eflags)); | ||||
| 	else | ||||
| 		return(lmatcher(g, (char *)string, nmatch, pmatch, eflags)); | ||||
| } | ||||
|  | @ -0,0 +1,37 @@ | |||
| #include <sys/types.h> | ||||
| #include <stdio.h> | ||||
| #include <stdlib.h> | ||||
| #include <regex.h> | ||||
| 
 | ||||
| #include "utils.h" | ||||
| #include "regex2.h" | ||||
| 
 | ||||
| /*
 | ||||
|  - regfree - free everything | ||||
|  = extern void regfree(regex_t *); | ||||
|  */ | ||||
| void | ||||
| regfree(preg) | ||||
| regex_t *preg; | ||||
| { | ||||
| 	register struct re_guts *g; | ||||
| 
 | ||||
| 	if (preg->re_magic != MAGIC1)	/* oops */ | ||||
| 		return;			/* nice to complain, but hard */ | ||||
| 
 | ||||
| 	g = preg->re_g; | ||||
| 	if (g == NULL || g->magic != MAGIC2)	/* oops again */ | ||||
| 		return; | ||||
| 	preg->re_magic = 0;		/* mark it invalid */ | ||||
| 	g->magic = 0;			/* mark it invalid */ | ||||
| 
 | ||||
| 	if (g->strip != NULL) | ||||
| 		free((char *)g->strip); | ||||
| 	if (g->sets != NULL) | ||||
| 		free((char *)g->sets); | ||||
| 	if (g->setbits != NULL) | ||||
| 		free((char *)g->setbits); | ||||
| 	if (g->must != NULL) | ||||
| 		free(g->must); | ||||
| 	free((char *)g); | ||||
| } | ||||
|  | @ -0,0 +1,316 @@ | |||
| #include <stdio.h> | ||||
| #include <string.h> | ||||
| 
 | ||||
| /*
 | ||||
|  - split - divide a string into fields, like awk split() | ||||
|  = int split(char *string, char *fields[], int nfields, char *sep); | ||||
|  */ | ||||
| int				/* number of fields, including overflow */ | ||||
| split(string, fields, nfields, sep) | ||||
| char *string; | ||||
| char *fields[];			/* list is not NULL-terminated */ | ||||
| int nfields;			/* number of entries available in fields[] */ | ||||
| char *sep;			/* "" white, "c" single char, "ab" [ab]+ */ | ||||
| { | ||||
| 	register char *p = string; | ||||
| 	register char c;			/* latest character */ | ||||
| 	register char sepc = sep[0]; | ||||
| 	register char sepc2; | ||||
| 	register int fn; | ||||
| 	register char **fp = fields; | ||||
| 	register char *sepp; | ||||
| 	register int trimtrail; | ||||
| 
 | ||||
| 	/* white space */ | ||||
| 	if (sepc == '\0') { | ||||
| 		while ((c = *p++) == ' ' || c == '\t') | ||||
| 			continue; | ||||
| 		p--; | ||||
| 		trimtrail = 1; | ||||
| 		sep = " \t";	/* note, code below knows this is 2 long */ | ||||
| 		sepc = ' '; | ||||
| 	} else | ||||
| 		trimtrail = 0; | ||||
| 	sepc2 = sep[1];		/* now we can safely pick this up */ | ||||
| 
 | ||||
| 	/* catch empties */ | ||||
| 	if (*p == '\0') | ||||
| 		return(0); | ||||
| 
 | ||||
| 	/* single separator */ | ||||
| 	if (sepc2 == '\0') { | ||||
| 		fn = nfields; | ||||
| 		for (;;) { | ||||
| 			*fp++ = p; | ||||
| 			fn--; | ||||
| 			if (fn == 0) | ||||
| 				break; | ||||
| 			while ((c = *p++) != sepc) | ||||
| 				if (c == '\0') | ||||
| 					return(nfields - fn); | ||||
| 			*(p-1) = '\0'; | ||||
| 		} | ||||
| 		/* we have overflowed the fields vector -- just count them */ | ||||
| 		fn = nfields; | ||||
| 		for (;;) { | ||||
| 			while ((c = *p++) != sepc) | ||||
| 				if (c == '\0') | ||||
| 					return(fn); | ||||
| 			fn++; | ||||
| 		} | ||||
| 		/* not reached */ | ||||
| 	} | ||||
| 
 | ||||
| 	/* two separators */ | ||||
| 	if (sep[2] == '\0') { | ||||
| 		fn = nfields; | ||||
| 		for (;;) { | ||||
| 			*fp++ = p; | ||||
| 			fn--; | ||||
| 			while ((c = *p++) != sepc && c != sepc2) | ||||
| 				if (c == '\0') { | ||||
| 					if (trimtrail && **(fp-1) == '\0') | ||||
| 						fn++; | ||||
| 					return(nfields - fn); | ||||
| 				} | ||||
| 			if (fn == 0) | ||||
| 				break; | ||||
| 			*(p-1) = '\0'; | ||||
| 			while ((c = *p++) == sepc || c == sepc2) | ||||
| 				continue; | ||||
| 			p--; | ||||
| 		} | ||||
| 		/* we have overflowed the fields vector -- just count them */ | ||||
| 		fn = nfields; | ||||
| 		while (c != '\0') { | ||||
| 			while ((c = *p++) == sepc || c == sepc2) | ||||
| 				continue; | ||||
| 			p--; | ||||
| 			fn++; | ||||
| 			while ((c = *p++) != '\0' && c != sepc && c != sepc2) | ||||
| 				continue; | ||||
| 		} | ||||
| 		/* might have to trim trailing white space */ | ||||
| 		if (trimtrail) { | ||||
| 			p--; | ||||
| 			while ((c = *--p) == sepc || c == sepc2) | ||||
| 				continue; | ||||
| 			p++; | ||||
| 			if (*p != '\0') { | ||||
| 				if (fn == nfields+1) | ||||
| 					*p = '\0'; | ||||
| 				fn--; | ||||
| 			} | ||||
| 		} | ||||
| 		return(fn); | ||||
| 	} | ||||
| 
 | ||||
| 	/* n separators */ | ||||
| 	fn = 0; | ||||
| 	for (;;) { | ||||
| 		if (fn < nfields) | ||||
| 			*fp++ = p; | ||||
| 		fn++; | ||||
| 		for (;;) { | ||||
| 			c = *p++; | ||||
| 			if (c == '\0') | ||||
| 				return(fn); | ||||
| 			sepp = sep; | ||||
| 			while ((sepc = *sepp++) != '\0' && sepc != c) | ||||
| 				continue; | ||||
| 			if (sepc != '\0')	/* it was a separator */ | ||||
| 				break; | ||||
| 		} | ||||
| 		if (fn < nfields) | ||||
| 			*(p-1) = '\0'; | ||||
| 		for (;;) { | ||||
| 			c = *p++; | ||||
| 			sepp = sep; | ||||
| 			while ((sepc = *sepp++) != '\0' && sepc != c) | ||||
| 				continue; | ||||
| 			if (sepc == '\0')	/* it wasn't a separator */ | ||||
| 				break; | ||||
| 		} | ||||
| 		p--; | ||||
| 	} | ||||
| 
 | ||||
| 	/* not reached */ | ||||
| } | ||||
| 
 | ||||
| #ifdef TEST_SPLIT | ||||
| 
 | ||||
| 
 | ||||
| /*
 | ||||
|  * test program | ||||
|  * pgm		runs regression | ||||
|  * pgm sep	splits stdin lines by sep | ||||
|  * pgm str sep	splits str by sep | ||||
|  * pgm str sep n	splits str by sep n times | ||||
|  */ | ||||
| int | ||||
| main(argc, argv) | ||||
| int argc; | ||||
| char *argv[]; | ||||
| { | ||||
| 	char buf[512]; | ||||
| 	register int n; | ||||
| #	define	MNF	10 | ||||
| 	char *fields[MNF]; | ||||
| 
 | ||||
| 	if (argc > 4) | ||||
| 		for (n = atoi(argv[3]); n > 0; n--) { | ||||
| 			(void) strcpy(buf, argv[1]); | ||||
| 		} | ||||
| 	else if (argc > 3) | ||||
| 		for (n = atoi(argv[3]); n > 0; n--) { | ||||
| 			(void) strcpy(buf, argv[1]); | ||||
| 			(void) split(buf, fields, MNF, argv[2]); | ||||
| 		} | ||||
| 	else if (argc > 2) | ||||
| 		dosplit(argv[1], argv[2]); | ||||
| 	else if (argc > 1) | ||||
| 		while (fgets(buf, sizeof(buf), stdin) != NULL) { | ||||
| 			buf[strlen(buf)-1] = '\0';	/* stomp newline */ | ||||
| 			dosplit(buf, argv[1]); | ||||
| 		} | ||||
| 	else | ||||
| 		regress(); | ||||
| 
 | ||||
| 	exit(0); | ||||
| } | ||||
| 
 | ||||
| dosplit(string, seps) | ||||
| char *string; | ||||
| char *seps; | ||||
| { | ||||
| #	define	NF	5 | ||||
| 	char *fields[NF]; | ||||
| 	register int nf; | ||||
| 
 | ||||
| 	nf = split(string, fields, NF, seps); | ||||
| 	print(nf, NF, fields); | ||||
| } | ||||
| 
 | ||||
| print(nf, nfp, fields) | ||||
| int nf; | ||||
| int nfp; | ||||
| char *fields[]; | ||||
| { | ||||
| 	register int fn; | ||||
| 	register int bound; | ||||
| 
 | ||||
| 	bound = (nf > nfp) ? nfp : nf; | ||||
| 	printf("%d:\t", nf); | ||||
| 	for (fn = 0; fn < bound; fn++) | ||||
| 		printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n"); | ||||
| } | ||||
| 
 | ||||
| #define	RNF	5		/* some table entries know this */ | ||||
| struct { | ||||
| 	char *str; | ||||
| 	char *seps; | ||||
| 	int nf; | ||||
| 	char *fi[RNF]; | ||||
| } tests[] = { | ||||
| 	"",		" ",	0,	{ "" }, | ||||
| 	" ",		" ",	2,	{ "", "" }, | ||||
| 	"x",		" ",	1,	{ "x" }, | ||||
| 	"xy",		" ",	1,	{ "xy" }, | ||||
| 	"x y",		" ",	2,	{ "x", "y" }, | ||||
| 	"abc def  g ",	" ",	5,	{ "abc", "def", "", "g", "" }, | ||||
| 	"  a bcd",	" ",	4,	{ "", "", "a", "bcd" }, | ||||
| 	"a b c d e f",	" ",	6,	{ "a", "b", "c", "d", "e f" }, | ||||
| 	" a b c d ",	" ",	6,	{ "", "a", "b", "c", "d " }, | ||||
| 
 | ||||
| 	"",		" _",	0,	{ "" }, | ||||
| 	" ",		" _",	2,	{ "", "" }, | ||||
| 	"x",		" _",	1,	{ "x" }, | ||||
| 	"x y",		" _",	2,	{ "x", "y" }, | ||||
| 	"ab _ cd",	" _",	2,	{ "ab", "cd" }, | ||||
| 	" a_b  c ",	" _",	5,	{ "", "a", "b", "c", "" }, | ||||
| 	"a b c_d e f",	" _",	6,	{ "a", "b", "c", "d", "e f" }, | ||||
| 	" a b c d ",	" _",	6,	{ "", "a", "b", "c", "d " }, | ||||
| 
 | ||||
| 	"",		" _~",	0,	{ "" }, | ||||
| 	" ",		" _~",	2,	{ "", "" }, | ||||
| 	"x",		" _~",	1,	{ "x" }, | ||||
| 	"x y",		" _~",	2,	{ "x", "y" }, | ||||
| 	"ab _~ cd",	" _~",	2,	{ "ab", "cd" }, | ||||
| 	" a_b  c~",	" _~",	5,	{ "", "a", "b", "c", "" }, | ||||
| 	"a b_c d~e f",	" _~",	6,	{ "a", "b", "c", "d", "e f" }, | ||||
| 	"~a b c d ",	" _~",	6,	{ "", "a", "b", "c", "d " }, | ||||
| 
 | ||||
| 	"",		" _~-",	0,	{ "" }, | ||||
| 	" ",		" _~-",	2,	{ "", "" }, | ||||
| 	"x",		" _~-",	1,	{ "x" }, | ||||
| 	"x y",		" _~-",	2,	{ "x", "y" }, | ||||
| 	"ab _~- cd",	" _~-",	2,	{ "ab", "cd" }, | ||||
| 	" a_b  c~",	" _~-",	5,	{ "", "a", "b", "c", "" }, | ||||
| 	"a b_c-d~e f",	" _~-",	6,	{ "a", "b", "c", "d", "e f" }, | ||||
| 	"~a-b c d ",	" _~-",	6,	{ "", "a", "b", "c", "d " }, | ||||
| 
 | ||||
| 	"",		"  ",	0,	{ "" }, | ||||
| 	" ",		"  ",	2,	{ "", "" }, | ||||
| 	"x",		"  ",	1,	{ "x" }, | ||||
| 	"xy",		"  ",	1,	{ "xy" }, | ||||
| 	"x y",		"  ",	2,	{ "x", "y" }, | ||||
| 	"abc def  g ",	"  ",	4,	{ "abc", "def", "g", "" }, | ||||
| 	"  a bcd",	"  ",	3,	{ "", "a", "bcd" }, | ||||
| 	"a b c d e f",	"  ",	6,	{ "a", "b", "c", "d", "e f" }, | ||||
| 	" a b c d ",	"  ",	6,	{ "", "a", "b", "c", "d " }, | ||||
| 
 | ||||
| 	"",		"",	0,	{ "" }, | ||||
| 	" ",		"",	0,	{ "" }, | ||||
| 	"x",		"",	1,	{ "x" }, | ||||
| 	"xy",		"",	1,	{ "xy" }, | ||||
| 	"x y",		"",	2,	{ "x", "y" }, | ||||
| 	"abc def  g ",	"",	3,	{ "abc", "def", "g" }, | ||||
| 	"\t a bcd",	"",	2,	{ "a", "bcd" }, | ||||
| 	"  a \tb\t c ",	"",	3,	{ "a", "b", "c" }, | ||||
| 	"a b c d e ",	"",	5,	{ "a", "b", "c", "d", "e" }, | ||||
| 	"a b\tc d e f",	"",	6,	{ "a", "b", "c", "d", "e f" }, | ||||
| 	" a b c d e f ",	"",	6,	{ "a", "b", "c", "d", "e f " }, | ||||
| 
 | ||||
| 	NULL,		NULL,	0,	{ NULL }, | ||||
| }; | ||||
| 
 | ||||
| regress() | ||||
| { | ||||
| 	char buf[512]; | ||||
| 	register int n; | ||||
| 	char *fields[RNF+1]; | ||||
| 	register int nf; | ||||
| 	register int i; | ||||
| 	register int printit; | ||||
| 	register char *f; | ||||
| 
 | ||||
| 	for (n = 0; tests[n].str != NULL; n++) { | ||||
| 		(void) strcpy(buf, tests[n].str); | ||||
| 		fields[RNF] = NULL; | ||||
| 		nf = split(buf, fields, RNF, tests[n].seps); | ||||
| 		printit = 0; | ||||
| 		if (nf != tests[n].nf) { | ||||
| 			printf("split `%s' by `%s' gave %d fields, not %d\n", | ||||
| 				tests[n].str, tests[n].seps, nf, tests[n].nf); | ||||
| 			printit = 1; | ||||
| 		} else if (fields[RNF] != NULL) { | ||||
| 			printf("split() went beyond array end\n"); | ||||
| 			printit = 1; | ||||
| 		} else { | ||||
| 			for (i = 0; i < nf && i < RNF; i++) { | ||||
| 				f = fields[i]; | ||||
| 				if (f == NULL) | ||||
| 					f = "(NULL)"; | ||||
| 				if (strcmp(f, tests[n].fi[i]) != 0) { | ||||
| 					printf("split `%s' by `%s', field %d is `%s', not `%s'\n", | ||||
| 						tests[n].str, tests[n].seps, | ||||
| 						i, fields[i], tests[n].fi[i]); | ||||
| 					printit = 1; | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 		if (printit) | ||||
| 			print(nf, RNF, fields); | ||||
| 	} | ||||
| } | ||||
| #endif | ||||
|  | @ -0,0 +1,22 @@ | |||
| /* utility definitions */ | ||||
| #ifdef _POSIX2_RE_DUP_MAX | ||||
| #define	DUPMAX	_POSIX2_RE_DUP_MAX | ||||
| #else | ||||
| #define	DUPMAX	255 | ||||
| #endif | ||||
| #define	INFINITY	(DUPMAX + 1) | ||||
| #define	NC		(CHAR_MAX - CHAR_MIN + 1) | ||||
| typedef unsigned char uch; | ||||
| 
 | ||||
| /* switch off assertions (if not already off) if no REDEBUG */ | ||||
| #ifndef REDEBUG | ||||
| #ifndef NDEBUG | ||||
| #define	NDEBUG	/* no assertions please */ | ||||
| #endif | ||||
| #endif | ||||
| #include <assert.h> | ||||
| 
 | ||||
| /* for old systems with bcopy() but no memmove() */ | ||||
| #ifdef USEBCOPY | ||||
| #define	memmove(d, s, c)	bcopy(s, d, c) | ||||
| #endif | ||||
		Loading…
	
		Reference in New Issue
	
	 shivers
						shivers