stk/Tcl/tclUtil.c

2200 lines
56 KiB
C
Raw Normal View History

1996-09-27 06:29:02 -04:00
/*
* tclUtil.c --
*
* This file contains utility procedures that are used by many Tcl
* commands.
*
* Copyright (c) 1987-1993 The Regents of the University of California.
* Copyright (c) 1994-1995 Sun Microsystems, Inc.
*
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
* SCCS: @(#) tclUtil.c 1.112 96/02/15 11:42:52
*/
#include "tclInt.h"
#include "tclPort.h"
/*
* The following values are used in the flags returned by Tcl_ScanElement
* and used by Tcl_ConvertElement. The value TCL_DONT_USE_BRACES is also
* defined in tcl.h; make sure its value doesn't overlap with any of the
* values below.
*
* TCL_DONT_USE_BRACES - 1 means the string mustn't be enclosed in
* braces (e.g. it contains unmatched braces,
* or ends in a backslash character, or user
* just doesn't want braces); handle all
* special characters by adding backslashes.
* USE_BRACES - 1 means the string contains a special
* character that can be handled simply by
* enclosing the entire argument in braces.
* BRACES_UNMATCHED - 1 means that braces aren't properly matched
* in the argument.
*/
#define USE_BRACES 2
#define BRACES_UNMATCHED 4
/*
* Function prototypes for local procedures in this file:
*/
static void SetupAppendBuffer _ANSI_ARGS_((Interp *iPtr,
int newSpace));
/*
*----------------------------------------------------------------------
*
* TclFindElement --
*
* Given a pointer into a Tcl list, locate the first (or next)
* element in the list.
*
* Results:
* The return value is normally TCL_OK, which means that the
* element was successfully located. If TCL_ERROR is returned
* it means that list didn't have proper list structure;
* interp->result contains a more detailed error message.
*
* If TCL_OK is returned, then *elementPtr will be set to point
* to the first element of list, and *nextPtr will be set to point
* to the character just after any white space following the last
* character that's part of the element. If this is the last argument
* in the list, then *nextPtr will point to the NULL character at the
* end of list. If sizePtr is non-NULL, *sizePtr is filled in with
* the number of characters in the element. If the element is in
* braces, then *elementPtr will point to the character after the
* opening brace and *sizePtr will not include either of the braces.
* If there isn't an element in the list, *sizePtr will be zero, and
* both *elementPtr and *termPtr will refer to the null character at
* the end of list. Note: this procedure does NOT collapse backslash
* sequences.
*
* Side effects:
* None.
*
*----------------------------------------------------------------------
*/
int
TclFindElement(interp, list, elementPtr, nextPtr, sizePtr, bracePtr)
Tcl_Interp *interp; /* Interpreter to use for error reporting.
* If NULL, then no error message is left
* after errors. */
register char *list; /* String containing Tcl list with zero
* or more elements (possibly in braces). */
char **elementPtr; /* Fill in with location of first significant
* character in first element of list. */
char **nextPtr; /* Fill in with location of character just
* after all white space following end of
* argument (i.e. next argument or end of
* list). */
int *sizePtr; /* If non-zero, fill in with size of
* element. */
int *bracePtr; /* If non-zero fill in with non-zero/zero
* to indicate that arg was/wasn't
* in braces. */
{
register char *p;
int openBraces = 0;
int inQuotes = 0;
int size;
/*
* Skim off leading white space and check for an opening brace or
* quote. Note: use of "isascii" below and elsewhere in this
* procedure is a temporary hack (7/27/90) because Mx uses characters
* with the high-order bit set for some things. This should probably
* be changed back eventually, or all of Tcl should call isascii.
*/
while (isspace(UCHAR(*list))) {
list++;
}
#ifdef STk_CODE
if (*list == '(') {
#else
if (*list == '{') {
#endif
openBraces = 1;
list++;
} else if (*list == '"') {
inQuotes = 1;
list++;
}
if (bracePtr != 0) {
*bracePtr = openBraces;
}
p = list;
/*
* Find the end of the element (either a space or a close brace or
* the end of the string).
*/
while (1) {
switch (*p) {
/*
* Open brace: don't treat specially unless the element is
* in braces. In this case, keep a nesting count.
*/
#ifdef STk_CODE
case '(':
#else
case '{':
#endif
if (openBraces != 0) {
openBraces++;
}
break;
/*
* Close brace: if element is in braces, keep nesting
* count and quit when the last close brace is seen.
*/
#ifdef STk_CODE
case ')':
#else
case '}':
#endif
if (openBraces == 1) {
char *p2;
size = p - list;
p++;
if (isspace(UCHAR(*p)) || (*p == 0)) {
goto done;
}
for (p2 = p; (*p2 != 0) && (!isspace(UCHAR(*p2)))
&& (p2 < p+20); p2++) {
/* null body */
}
if (interp != NULL) {
Tcl_ResetResult(interp);
sprintf(interp->result,
"list element in braces followed by \"%.*s\" instead of space",
(int) (p2-p), p);
}
return TCL_ERROR;
} else if (openBraces != 0) {
openBraces--;
}
break;
/*
* Backslash: skip over everything up to the end of the
* backslash sequence.
*/
case '\\': {
int size;
(void) Tcl_Backslash(p, &size);
p += size - 1;
break;
}
/*
* Space: ignore if element is in braces or quotes; otherwise
* terminate element.
*/
case ' ':
case '\f':
case '\n':
case '\r':
case '\t':
case '\v':
if ((openBraces == 0) && !inQuotes) {
size = p - list;
goto done;
}
break;
/*
* Double-quote: if element is in quotes then terminate it.
*/
case '"':
if (inQuotes) {
char *p2;
size = p-list;
p++;
if (isspace(UCHAR(*p)) || (*p == 0)) {
goto done;
}
for (p2 = p; (*p2 != 0) && (!isspace(UCHAR(*p2)))
&& (p2 < p+20); p2++) {
/* null body */
}
if (interp != NULL) {
Tcl_ResetResult(interp);
sprintf(interp->result,
"list element in quotes followed by \"%.*s\" %s", (int) (p2-p), p,
"instead of space");
}
return TCL_ERROR;
}
break;
/*
* End of list: terminate element.
*/
case 0:
if (openBraces != 0) {
if (interp != NULL) {
Tcl_SetResult(interp, "unmatched open brace in list",
TCL_STATIC);
}
return TCL_ERROR;
} else if (inQuotes) {
if (interp != NULL) {
Tcl_SetResult(interp, "unmatched open quote in list",
TCL_STATIC);
}
return TCL_ERROR;
}
size = p - list;
goto done;
}
p++;
}
done:
while (isspace(UCHAR(*p))) {
p++;
}
*elementPtr = list;
*nextPtr = p;
if (sizePtr != 0) {
*sizePtr = size;
}
return TCL_OK;
}
/*
*----------------------------------------------------------------------
*
* TclCopyAndCollapse --
*
* Copy a string and eliminate any backslashes that aren't in braces.
*
* Results:
* There is no return value. Count chars. get copied from src
* to dst. Along the way, if backslash sequences are found outside
* braces, the backslashes are eliminated in the copy.
* After scanning count chars. from source, a null character is
* placed at the end of dst.
*
* Side effects:
* None.
*
*----------------------------------------------------------------------
*/
void
TclCopyAndCollapse(count, src, dst)
int count; /* Total number of characters to copy
* from src. */
register char *src; /* Copy from here... */
register char *dst; /* ... to here. */
{
register char c;
int numRead;
for (c = *src; count > 0; src++, c = *src, count--) {
if (c == '\\') {
*dst = Tcl_Backslash(src, &numRead);
dst++;
src += numRead-1;
count -= numRead-1;
} else {
*dst = c;
dst++;
}
}
*dst = 0;
}
/*
*----------------------------------------------------------------------
*
* Tcl_SplitList --
*
* Splits a list up into its constituent fields.
*
* Results
* The return value is normally TCL_OK, which means that
* the list was successfully split up. If TCL_ERROR is
* returned, it means that "list" didn't have proper list
* structure; interp->result will contain a more detailed
* error message.
*
* *argvPtr will be filled in with the address of an array
* whose elements point to the elements of list, in order.
* *argcPtr will get filled in with the number of valid elements
* in the array. A single block of memory is dynamically allocated
* to hold both the argv array and a copy of the list (with
* backslashes and braces removed in the standard way).
* The caller must eventually free this memory by calling free()
* on *argvPtr. Note: *argvPtr and *argcPtr are only modified
* if the procedure returns normally.
*
* Side effects:
* Memory is allocated.
*
*----------------------------------------------------------------------
*/
int
Tcl_SplitList(interp, list, argcPtr, argvPtr)
Tcl_Interp *interp; /* Interpreter to use for error reporting.
* If NULL, then no error message is left. */
char *list; /* Pointer to string with list structure. */
int *argcPtr; /* Pointer to location to fill in with
* the number of elements in the list. */
char ***argvPtr; /* Pointer to place to store pointer to array
* of pointers to list elements. */
{
char **argv;
register char *p;
int size, i, result, elSize, brace;
char *element;
/*
* Figure out how much space to allocate. There must be enough
* space for both the array of pointers and also for a copy of
* the list. To estimate the number of pointers needed, count
* the number of space characters in the list.
*/
#ifdef STk_CODE
{
char *q;
for (size = 1, p = q = list; *p != 0; p++) {
if (isspace(UCHAR(*p))) size++;
if (*p == ')') q = p;
}
/*
* Tcl considers strings of the form "( ... )" as quoted string
* (rather than lists). So if the string is of this form, open
* and close parenthesis are replaced by spaces
*/
if (q > list && *list == '(' && *q == ')') {
*list = *q = ' ';
}
}
#else
for (size = 1, p = list; *p != 0; p++) {
if (isspace(UCHAR(*p))) {
size++;
}
}
#endif
size++; /* Leave space for final NULL pointer. */
argv = (char **) ckalloc((unsigned)
((size * sizeof(char *)) + (p - list) + 1));
for (i = 0, p = ((char *) argv) + size*sizeof(char *);
*list != 0; i++) {
result = TclFindElement(interp, list, &element, &list, &elSize, &brace);
if (result != TCL_OK) {
ckfree((char *) argv);
return result;
}
if (*element == 0) {
break;
}
if (i >= size) {
ckfree((char *) argv);
if (interp != NULL) {
Tcl_SetResult(interp, "internal error in Tcl_SplitList",
TCL_STATIC);
}
return TCL_ERROR;
}
argv[i] = p;
if (brace) {
strncpy(p, element, (size_t) elSize);
p += elSize;
*p = 0;
p++;
} else {
TclCopyAndCollapse(elSize, element, p);
p += elSize+1;
}
}
argv[i] = NULL;
*argvPtr = argv;
*argcPtr = i;
return TCL_OK;
}
/*
*----------------------------------------------------------------------
*
* Tcl_ScanElement --
*
* This procedure is a companion procedure to Tcl_ConvertElement.
* It scans a string to see what needs to be done to it (e.g.
* add backslashes or enclosing braces) to make the string into
* a valid Tcl list element.
*
* Results:
* The return value is an overestimate of the number of characters
* that will be needed by Tcl_ConvertElement to produce a valid
* list element from string. The word at *flagPtr is filled in
* with a value needed by Tcl_ConvertElement when doing the actual
* conversion.
*
* Side effects:
* None.
*
*----------------------------------------------------------------------
*/
int
Tcl_ScanElement(string, flagPtr)
char *string; /* String to convert to Tcl list element. */
int *flagPtr; /* Where to store information to guide
* Tcl_ConvertElement. */
{
int flags, nestingLevel;
register char *p;
/*
* This procedure and Tcl_ConvertElement together do two things:
*
* 1. They produce a proper list, one that will yield back the
* argument strings when evaluated or when disassembled with
* Tcl_SplitList. This is the most important thing.
*
* 2. They try to produce legible output, which means minimizing the
* use of backslashes (using braces instead). However, there are
* some situations where backslashes must be used (e.g. an element
* like "{abc": the leading brace will have to be backslashed. For
* each element, one of three things must be done:
*
* (a) Use the element as-is (it doesn't contain anything special
* characters). This is the most desirable option.
*
* (b) Enclose the element in braces, but leave the contents alone.
* This happens if the element contains embedded space, or if it
* contains characters with special interpretation ($, [, ;, or \),
* or if it starts with a brace or double-quote, or if there are
* no characters in the element.
*
* (c) Don't enclose the element in braces, but add backslashes to
* prevent special interpretation of special characters. This is a
* last resort used when the argument would normally fall under case
* (b) but contains unmatched braces. It also occurs if the last
* character of the argument is a backslash or if the element contains
* a backslash followed by newline.
*
* The procedure figures out how many bytes will be needed to store
* the result (actually, it overestimates). It also collects information
* about the element in the form of a flags word.
*/
nestingLevel = 0;
#ifdef STk_CODE
flags = TCL_DONT_USE_BRACES;
#else
flags = 0;
#endif
if (string == NULL) {
string = "";
}
p = string;
if ((*p == '{') || (*p == '"') || (*p == 0)) {
flags |= USE_BRACES;
}
for ( ; *p != 0; p++) {
switch (*p) {
case '{':
nestingLevel++;
break;
case '}':
nestingLevel--;
if (nestingLevel < 0) {
flags |= TCL_DONT_USE_BRACES|BRACES_UNMATCHED;
}
break;
#ifndef STk_CODE
case '[':
case '$':
case ';':
case ' ':
case '\f':
case '\n':
case '\r':
case '\t':
case '\v':
flags |= USE_BRACES;
break;
#endif
case '\\':
if ((p[1] == 0) || (p[1] == '\n')) {
flags = TCL_DONT_USE_BRACES;
} else {
int size;
(void) Tcl_Backslash(p, &size);
p += size-1;
flags |= USE_BRACES;
}
break;
}
}
if (nestingLevel != 0) {
flags = TCL_DONT_USE_BRACES | BRACES_UNMATCHED;
}
*flagPtr = flags;
/*
* Allow enough space to backslash every character plus leave
* two spaces for braces.
*/
return 2*(p-string) + 2;
}
/*
*----------------------------------------------------------------------
*
* Tcl_ConvertElement --
*
* This is a companion procedure to Tcl_ScanElement. Given the
* information produced by Tcl_ScanElement, this procedure converts
* a string to a list element equal to that string.
*
* Results:
* Information is copied to *dst in the form of a list element
* identical to src (i.e. if Tcl_SplitList is applied to dst it
* will produce a string identical to src). The return value is
* a count of the number of characters copied (not including the
* terminating NULL character).
*
* Side effects:
* None.
*
*----------------------------------------------------------------------
*/
int
Tcl_ConvertElement(src, dst, flags)
register char *src; /* Source information for list element. */
char *dst; /* Place to put list-ified element. */
int flags; /* Flags produced by Tcl_ScanElement. */
{
register char *p = dst;
/*
* See the comment block at the beginning of the Tcl_ScanElement
* code for details of how this works.
*/
#ifdef STk_CODE
if ((src == NULL) || (*src == 0)) {
p[0] = '\\';
p[1] = '0';
p[2] = 0;
return 2;
}
while (*p++ = *src++) /* Nothing */;
return p - dst - 1;
#else
if ((src == NULL) || (*src == 0)) {
p[0] = '{';
p[1] = '}';
p[2] = 0;
return 2;
}
if ((flags & USE_BRACES) && !(flags & TCL_DONT_USE_BRACES)) {
*p = '{';
p++;
for ( ; *src != 0; src++, p++) {
*p = *src;
}
*p = '}';
p++;
} else {
if (*src == '{') {
/*
* Can't have a leading brace unless the whole element is
* enclosed in braces. Add a backslash before the brace.
* Furthermore, this may destroy the balance between open
* and close braces, so set BRACES_UNMATCHED.
*/
p[0] = '\\';
p[1] = '{';
p += 2;
src++;
flags |= BRACES_UNMATCHED;
}
for (; *src != 0 ; src++) {
switch (*src) {
case ']':
case '[':
case '$':
case ';':
case ' ':
case '\\':
case '"':
*p = '\\';
p++;
break;
case '{':
case '}':
/*
* It may not seem necessary to backslash braces, but
* it is. The reason for this is that the resulting
* list element may actually be an element of a sub-list
* enclosed in braces (e.g. if Tcl_DStringStartSublist
* has been invoked), so there may be a brace mismatch
* if the braces aren't backslashed.
*/
if (flags & BRACES_UNMATCHED) {
*p = '\\';
p++;
}
break;
case '\f':
*p = '\\';
p++;
*p = 'f';
p++;
continue;
case '\n':
*p = '\\';
p++;
*p = 'n';
p++;
continue;
case '\r':
*p = '\\';
p++;
*p = 'r';
p++;
continue;
case '\t':
*p = '\\';
p++;
*p = 't';
p++;
continue;
case '\v':
*p = '\\';
p++;
*p = 'v';
p++;
continue;
}
*p = *src;
p++;
}
}
*p = '\0';
return p-dst;
#endif
}
/*
*----------------------------------------------------------------------
*
* Tcl_Merge --
*
* Given a collection of strings, merge them together into a
* single string that has proper Tcl list structured (i.e.
* Tcl_SplitList may be used to retrieve strings equal to the
* original elements, and Tcl_Eval will parse the string back
* into its original elements).
*
* Results:
* The return value is the address of a dynamically-allocated
* string containing the merged list.
*
* Side effects:
* None.
*
*----------------------------------------------------------------------
*/
char *
Tcl_Merge(argc, argv)
int argc; /* How many strings to merge. */
char **argv; /* Array of string values. */
{
# define LOCAL_SIZE 20
int localFlags[LOCAL_SIZE], *flagPtr;
int numChars;
char *result;
register char *dst;
int i;
/*
* Pass 1: estimate space, gather flags.
*/
if (argc <= LOCAL_SIZE) {
flagPtr = localFlags;
} else {
flagPtr = (int *) ckalloc((unsigned) argc*sizeof(int));
}
#ifdef STk_CODE
numChars = 3; /* +2 cause of () */
#else
numChars = 1;
#endif
for (i = 0; i < argc; i++) {
numChars += Tcl_ScanElement(argv[i], &flagPtr[i]) + 1;
}
/*
* Pass two: copy into the result area.
*/
result = (char *) ckalloc((unsigned) numChars);
#ifdef STk_CODE
*result = '('; dst = result+1;
#else
dst = result;
#endif
for (i = 0; i < argc; i++) {
numChars = Tcl_ConvertElement(argv[i], dst, flagPtr[i]);
dst += numChars;
*dst = ' ';
dst++;
}
#ifdef STk_CODE
if (dst != result+1) dst -= 1;
dst[0] = ')';
dst[1] = '\0';
#else
if (dst == result) {
*dst = 0;
} else {
dst[-1] = 0;
}
#endif
if (flagPtr != localFlags) {
ckfree((char *) flagPtr);
}
return result;
}
/*
*----------------------------------------------------------------------
*
* Tcl_Concat --
*
* Concatenate a set of strings into a single large string.
*
* Results:
* The return value is dynamically-allocated string containing
* a concatenation of all the strings in argv, with spaces between
* the original argv elements.
*
* Side effects:
* Memory is allocated for the result; the caller is responsible
* for freeing the memory.
*
*----------------------------------------------------------------------
*/
char *
Tcl_Concat(argc, argv)
int argc; /* Number of strings to concatenate. */
char **argv; /* Array of strings to concatenate. */
{
int totalSize, i;
register char *p;
char *result;
for (totalSize = 1, i = 0; i < argc; i++) {
totalSize += strlen(argv[i]) + 1;
}
result = (char *) ckalloc((unsigned) totalSize);
if (argc == 0) {
*result = '\0';
return result;
}
for (p = result, i = 0; i < argc; i++) {
char *element;
int length;
/*
* Clip white space off the front and back of the string
* to generate a neater result, and ignore any empty
* elements.
*/
element = argv[i];
while (isspace(UCHAR(*element))) {
element++;
}
for (length = strlen(element);
(length > 0) && (isspace(UCHAR(element[length-1])));
length--) {
/* Null loop body. */
}
if (length == 0) {
continue;
}
(void) strncpy(p, element, (size_t) length);
p += length;
*p = ' ';
p++;
}
if (p != result) {
p[-1] = 0;
} else {
*p = 0;
}
return result;
}
/*
*----------------------------------------------------------------------
*
* Tcl_StringMatch --
*
* See if a particular string matches a particular pattern.
*
* Results:
* The return value is 1 if string matches pattern, and
* 0 otherwise. The matching operation permits the following
* special characters in the pattern: *?\[] (see the manual
* entry for details on what these mean).
*
* Side effects:
* None.
*
*----------------------------------------------------------------------
*/
int
Tcl_StringMatch(string, pattern)
register char *string; /* String. */
register char *pattern; /* Pattern, which may contain
* special characters. */
{
char c2;
while (1) {
/* See if we're at the end of both the pattern and the string.
* If so, we succeeded. If we're at the end of the pattern
* but not at the end of the string, we failed.
*/
if (*pattern == 0) {
if (*string == 0) {
return 1;
} else {
return 0;
}
}
if ((*string == 0) && (*pattern != '*')) {
return 0;
}
/* Check for a "*" as the next pattern character. It matches
* any substring. We handle this by calling ourselves
* recursively for each postfix of string, until either we
* match or we reach the end of the string.
*/
if (*pattern == '*') {
pattern += 1;
if (*pattern == 0) {
return 1;
}
while (1) {
if (Tcl_StringMatch(string, pattern)) {
return 1;
}
if (*string == 0) {
return 0;
}
string += 1;
}
}
/* Check for a "?" as the next pattern character. It matches
* any single character.
*/
if (*pattern == '?') {
goto thisCharOK;
}
/* Check for a "[" as the next pattern character. It is followed
* by a list of characters that are acceptable, or by a range
* (two characters separated by "-").
*/
if (*pattern == '[') {
pattern += 1;
while (1) {
if ((*pattern == ']') || (*pattern == 0)) {
return 0;
}
if (*pattern == *string) {
break;
}
if (pattern[1] == '-') {
c2 = pattern[2];
if (c2 == 0) {
return 0;
}
if ((*pattern <= *string) && (c2 >= *string)) {
break;
}
if ((*pattern >= *string) && (c2 <= *string)) {
break;
}
pattern += 2;
}
pattern += 1;
}
while (*pattern != ']') {
if (*pattern == 0) {
pattern--;
break;
}
pattern += 1;
}
goto thisCharOK;
}
/* If the next pattern character is '/', just strip off the '/'
* so we do exact matching on the character that follows.
*/
if (*pattern == '\\') {
pattern += 1;
if (*pattern == 0) {
return 0;
}
}
/* There's no special character. Just make sure that the next
* characters of each string match.
*/
if (*pattern != *string) {
return 0;
}
thisCharOK: pattern += 1;
string += 1;
}
}
/*
*----------------------------------------------------------------------
*
* Tcl_SetResult --
*
* Arrange for "string" to be the Tcl return value.
*
* Results:
* None.
*
* Side effects:
* interp->result is left pointing either to "string" (if "copy" is 0)
* or to a copy of string.
*
*----------------------------------------------------------------------
*/
void
Tcl_SetResult(interp, string, freeProc)
Tcl_Interp *interp; /* Interpreter with which to associate the
* return value. */
char *string; /* Value to be returned. If NULL,
* the result is set to an empty string. */
Tcl_FreeProc *freeProc; /* Gives information about the string:
* TCL_STATIC, TCL_VOLATILE, or the address
* of a Tcl_FreeProc such as free. */
{
register Interp *iPtr = (Interp *) interp;
int length;
Tcl_FreeProc *oldFreeProc = iPtr->freeProc;
char *oldResult = iPtr->result;
if (string == NULL) {
iPtr->resultSpace[0] = 0;
iPtr->result = iPtr->resultSpace;
iPtr->freeProc = 0;
} else if (freeProc == TCL_DYNAMIC) {
iPtr->result = string;
iPtr->freeProc = TCL_DYNAMIC;
} else if (freeProc == TCL_VOLATILE) {
length = strlen(string);
if (length > TCL_RESULT_SIZE) {
iPtr->result = (char *) ckalloc((unsigned) length+1);
iPtr->freeProc = TCL_DYNAMIC;
} else {
iPtr->result = iPtr->resultSpace;
iPtr->freeProc = 0;
}
strcpy(iPtr->result, string);
} else {
iPtr->result = string;
iPtr->freeProc = freeProc;
}
/*
* If the old result was dynamically-allocated, free it up. Do it
* here, rather than at the beginning, in case the new result value
* was part of the old result value.
*/