/*
* tclParse.c --
*
* This file contains a collection of procedures that are used
* to parse Tcl commands or parts of commands (like quoted
* strings or nested sub-commands).
*
* Copyright (c) 1987-1993 The Regents of the University of California.
* Copyright (c) 1994-1996 Sun Microsystems, Inc.
*
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
* SCCS: @(#) tclParse.c 1.51 96/09/06 09:47:29
*/
#include "tclInt.h"
#include "tclPort.h"
/*
* The following table assigns a type to each character. Only types
* meaningful to Tcl parsing are represented here. The table is
* designed to be referenced with either signed or unsigned characters,
* so it has 384 entries. The first 128 entries correspond to negative
* character values, the next 256 correspond to positive character
* values. The last 128 entries are identical to the first 128. The
* table is always indexed with a 128-byte offset (the 128th entry
* corresponds to a 0 character value).
*/
char tclTypeTable[] = {
/*
* Negative character values, from -128 to -1:
*/
/*
* Positive character values, from 0-127:
*/
/*
* Large unsigned character values, from 128-255:
*/
};
/*
* Function prototypes for procedures local to this file:
*/
/*
*----------------------------------------------------------------------
*
* Tcl_Backslash --
*
* Figure out how to handle a backslash sequence.
*
* Results:
* The return value is the character that should be substituted
* in place of the backslash sequence that starts at src. If
* readPtr isn't NULL then it is filled in with a count of the
* number of characters in the backslash sequence.
*
* Side effects:
* None.
*
*----------------------------------------------------------------------
*/
char
char *src; /* Points to the backslash character of
* a backslash sequence. */
int *readPtr; /* Fill in with number of characters read
* from src, unless NULL. */
{
register char *p = src+1;
char result;
int count;
count = 2;
switch (*p) {
/*
* Note: in the conversions below, use absolute values (e.g.,
* 0xa) rather than symbolic values (e.g. \n) that get converted
* by the compiler. It's possible that compilers on some
* platforms will do the symbolic conversions differently, which
* could result in non-portable Tcl scripts.
*/
case 'a':
result = 0x7;
break;
case 'b':
result = 0x8;
break;
case 'f':
result = 0xc;
break;
case 'n':
result = 0xa;
break;
case 'r':
result = 0xd;
break;
case 't':
result = 0x9;
break;
case 'v':
result = 0xb;
break;
case 'x':
char *end;
} else {
count = 2;
result = 'x';
}
break;
case '\n':
do {
p++;
} while ((*p == ' ') || (*p == '\t'));
result = ' ';
break;
case 0:
result = '\\';
count = 1;
break;
default:
result = (char)(*p - '0');
p++;
break;
}
count = 3;
p++;
break;
}
count = 4;
break;
}
result = *p;
count = 2;
break;
}
}
return result;
}
/*
*--------------------------------------------------------------
*
* TclParseQuotes --
*
* This procedure parses a double-quoted string such as a
* quoted Tcl command argument or a quoted value in a Tcl
* expression. This procedure is also used to parse array
* element names within parentheses, or anything else that
* needs all the substitutions that happen in quotes.
*
* Results:
* The return value is a standard Tcl result, which is
* TCL_OK unless there was an error while parsing the
* quoted string. If an error occurs then interp->result
* contains a standard error message. *TermPtr is filled
* in with the address of the character just after the
* last one successfully processed; this is usually the
* character just after the matching close-quote. The
* fully-substituted contents of the quotes are stored in
* standard fashion in *pvPtr, null-terminated with
* pvPtr->next pointing to the terminating null character.
*
* Side effects:
* The buffer space in pvPtr may be enlarged by calling its
* expandProc.
*
*--------------------------------------------------------------
*/
int
* evaluations and error messages. */
char *string; /* Character just after opening double-
* quote. */
int termChar; /* Character that terminates "quoted" string
* (usually double-quote, but sometimes
* right-paren or something else). */
int flags; /* Flags to pass to nested Tcl_Eval calls. */
char **termPtr; /* Store address of terminating character
* here. */
* fully-substituted result of parse. */
{
while (1) {
/*
* Target buffer space is about to run out. Make more space.
*/
}
c = *src;
src++;
if (c == termChar) {
*dst = '\0';
return TCL_OK;
} else if (CHAR_TYPE(c) == TCL_NORMAL) {
copy:
*dst = c;
dst++;
continue;
} else if (c == '$') {
int length;
char *value;
return TCL_ERROR;
}
}
continue;
} else if (c == '[') {
int result;
return result;
}
continue;
} else if (c == '\\') {
int numRead;
src--;
dst++;
continue;
} else if (c == '\0') {
return TCL_ERROR;
} else {
goto copy;
}
}
}
/*
*--------------------------------------------------------------
*
* TclParseNestedCmd --
*
* This procedure parses a nested Tcl command between
* brackets, returning the result of the command.
*
* Results:
* The return value is a standard Tcl result, which is
* TCL_OK unless there was an error while executing the
* nested command. If an error occurs then interp->result
* contains a standard error message. *TermPtr is filled
* in with the address of the character just after the
* last one processed; this is usually the character just
* after the matching close-bracket, or the null character
* at the end of the string if the close-bracket was missing
* (a missing close bracket is an error). The result returned
* by the command is stored in standard fashion in *pvPtr,
* null-terminated, with pvPtr->next pointing to the null
* character.
*
* Side effects:
* The storage space at *pvPtr may be expanded.
*
*--------------------------------------------------------------
*/
int
* evaluations and error messages. */
char *string; /* Character just after opening bracket. */
int flags; /* Flags to pass to nested Tcl_Eval. */
char **termPtr; /* Store address of terminating character
* here. */
* result of command. */
{
/*
* The increment below results in slightly cleaner message in
* the errorInfo variable (the close-bracket will appear).
*/
if (**termPtr == ']') {
*termPtr += 1;
}
return result;
}
(*termPtr) += 1;
if (shortfall > 0) {
}
return TCL_OK;
}
/*
*--------------------------------------------------------------
*
* TclParseBraces --
*
* This procedure scans the information between matching
* curly braces.
*
* Results:
* The return value is a standard Tcl result, which is
* TCL_OK unless there was an error while parsing string.
* If an error occurs then interp->result contains a
* standard error message. *TermPtr is filled
* in with the address of the character just after the
* last one successfully processed; this is usually the
* character just after the matching close-brace. The
* information between curly braces is stored in standard
* fashion in *pvPtr, null-terminated with pvPtr->next
* pointing to the terminating null character.
*
* Side effects:
* The storage space at *pvPtr may be expanded.
*
*--------------------------------------------------------------
*/
int
* evaluations and error messages. */
char *string; /* Character just after opening bracket. */
char **termPtr; /* Store address of terminating character
* here. */
* result of command. */
{
int level;
register char c;
level = 1;
/*
* Copy the characters one at a time to the result area, stopping
* when the matching close-brace is found.
*/
while (1) {
c = *src;
src++;
}
*dst = c;
dst++;
if (CHAR_TYPE(c) == TCL_NORMAL) {
continue;
} else if (c == '{') {
level++;
} else if (c == '}') {
level--;
if (level == 0) {
dst--; /* Don't copy the last close brace. */
break;
}
} else if (c == '\\') {
int count;
/*
* Must always squish out backslash-newlines, even when in
* braces. This is needed so that this sequence can appear
* anywhere in a command, such as the middle of an expression.
*/
if (*src == '\n') {
} else {
while (count > 1) {
}
dst++;
src++;
count--;
}
}
} else if (c == '\0') {
return TCL_ERROR;
}
}
*dst = '\0';
return TCL_OK;
}
/*
*--------------------------------------------------------------
*
* TclParseWords --
*
* This procedure parses one or more words from a command
* string and creates argv-style pointers to fully-substituted
* copies of those words.
*
* Results:
* The return value is a standard Tcl result.
*
* *argcPtr is modified to hold a count of the number of words
* successfully parsed, which may be 0. At most maxWords words
* will be parsed. If 0 <= *argcPtr < maxWords then it
* means that a command separator was seen. If *argcPtr
* is maxWords then it means that a command separator was
* not seen yet.
*
* *TermPtr is filled in with the address of the character
* just after the last one successfully processed in the
* last word. This is either the command terminator (if
* *argcPtr < maxWords), the character just after the last
* one in a word (if *argcPtr is maxWords), or the vicinity
* of an error (if the result is not TCL_OK).
*
* The pointers at *argv are filled in with pointers to the
* fully-substituted words, and the actual contents of the
* words are copied to the buffer at pvPtr.
*
* If an error occurrs then an error message is left in
* interp->result and the information at *argv, *argcPtr,
* and *pvPtr may be incomplete.
*
* Side effects:
* The buffer space in pvPtr may be enlarged by calling its
* expandProc.
*
*--------------------------------------------------------------
*/
int
* evaluations and error messages. */
char *string; /* First character of word. */
int flags; /* Flags to control parsing (same values as
* passed to Tcl_Eval). */
int maxWords; /* Maximum number of words to parse. */
char **termPtr; /* Store address of terminating character
* here. */
int *argcPtr; /* Filled in with actual number of words
* parsed. */
char **argv; /* Store addresses of individual words here. */
* fully-substituted word. */
{
register char c;
* reallocated, so we can adjust all of the
* argv pointers. */
/*
* Skip leading space.
*/
c = *src;
src++;
c = *src;
}
/*
* Handle the normal case (i.e. no leading double-quote or brace).
*/
if (type == TCL_NORMAL) {
while (1) {
/*
* Target buffer space is about to run out. Make
* more space.
*/
}
if (type == TCL_NORMAL) {
copy:
*dst = c;
dst++;
src++;
goto wordEnd;
} else if (type == TCL_DOLLAR) {
int length;
char *value;
return TCL_ERROR;
}
}
} else if (type == TCL_COMMAND_END) {
goto copy;
}
/*
* End of command; simulate a word-end first, so
* that the end-of-command can be processed as the
* first thing in a new word.
*/
goto wordEnd;
} else if (type == TCL_OPEN_BRACKET) {
pvPtr);
return result;
}
} else if (type == TCL_BACKSLASH) {
int numRead;
/*
* The following special check allows a backslash-newline
* to be treated as a word-separator, as if the backslash
* and newline had been collapsed before command parsing
* began.
*/
goto wordEnd;
}
dst++;
} else {
goto copy;
}
c = *src;
}
} else {
/*
* Check for the end of the command.
*/
if (type == TCL_COMMAND_END) {
if (flags & TCL_BRACKET_TERM) {
if (c == '\0') {
return TCL_ERROR;
}
} else {
if (c == ']') {
goto normalArg;
}
}
goto done;
}
/*
* Now handle the special cases: open braces, double-quotes,
* and backslash-newline.
*/
} else if (type == TCL_OPEN_BRACE) {
/*
* This code is needed so that a backslash-newline at the
* very beginning of a word is treated as part of the white
* space between words and not as a space within the word.
*/
src += 2;
goto skipSpace;
} else {
goto normalArg;
}
return result;
}
/*
* Back from quotes or braces; make sure that the terminating
* character was the end of the word.
*/
c = **termPtr;
/*
* Line is continued on next line; the backslash-newline
* sequence turns into space, which is OK. No need to do
* anything here.
*/
} else {
if (*src == '"') {
"extra characters after close-quote",
} else {
"extra characters after close-brace",
}
return TCL_ERROR;
}
}
}
/*
* We're at the end of a word, so add a null terminator. Then
* see if the buffer was re-allocated during this word. If so,
* update all of the argv pointers.
*/
*dst = '\0';
dst++;
int i;
for (i = 0; i <= argc; i++) {
}
}
}
done:
return TCL_OK;
}
/*
*--------------------------------------------------------------
*
* TclExpandParseValue --
*
* This procedure is commonly used as the value of the
* expandProc in a ParseValue. It uses malloc to allocate
* more space for the result of a parse.
*
* Results:
* The buffer space in *pvPtr is reallocated to something
* larger, and if pvPtr->clientData is non-zero the old
* buffer is freed. Information is copied from the old
* buffer to the new one.
*
* Side effects:
* None.
*
*--------------------------------------------------------------
*/
void
* must be expanded. If the clientData
* in the structure is non-zero, it
* means that the current buffer is
* dynamically allocated. */
int needed; /* Minimum amount of additional space
* to allocate. */
{
int newSpace;
char *new;
/*
* Either double the size of the buffer or add enough new space
* to meet the demand, whichever produces a larger new buffer.
*/
} else {
}
/*
* Copy from old buffer to new, free old buffer if needed, and
* mark new buffer as malloc-ed.
*/
if (pvPtr->clientData != 0) {
}
}
/*
*----------------------------------------------------------------------
*
* TclWordEnd --
*
* Given a pointer into a Tcl command, find the end of the next
* word of the command.
*
* Results:
* The return value is a pointer to the last character that's part
* of the word pointed to by "start". If the word doesn't end
* properly within the string then the return value is the address
* of the null character at the end of the string.
*
* Side effects:
* None.
*
*----------------------------------------------------------------------
*/
char *
char *start; /* Beginning of a word of a Tcl command. */
int nested; /* Zero means this is a top-level command.
* One means this is a nested command (close
* bracket is a word terminator). */
int *semiPtr; /* Set to 1 if word ends with a command-
* terminating semi-colon, zero otherwise.
* If NULL then ignored. */
{
register char *p;
int count;
*semiPtr = 0;
}
/*
* Skip leading white space (backslash-newline must be treated like
* white-space, except that it better not be the last thing in the
* command).
*/
for (p = start; ; p++) {
continue;
}
if ((p[0] == '\\') && (p[1] == '\n')) {
if (p[2] == 0) {
return p+2;
}
continue;
}
break;
}
/*
* Handle words beginning with a double-quote or a brace.
*/
if (*p == '"') {
if (*p == 0) {
return p;
}
p++;
} else if (*p == '{') {
while (braces != 0) {
p++;
while (*p == '\\') {
(void) Tcl_Backslash(p, &count);
p += count;
}
if (*p == '}') {
braces--;
} else if (*p == '{') {
braces++;
} else if (*p == 0) {
return p;
}
}
p++;
}
/*
* Handle words that don't start with a brace or double-quote.
* This code is also invoked if the word starts with a brace or
* double-quote and there is garbage after the closing brace or
* quote. This is an error as far as Tcl_Eval is concerned, but
* for here the garbage is treated as part of the word.
*/
while (1) {
if (*p == '[') {
if (*p == 0) {
return p;
}
p++;
} else if (*p == '\\') {
if (p[1] == '\n') {
/*
* Backslash-newline: it maps to a space character
* that is a word separator, so the word ends just before
* the backslash.
*/
return p-1;
}
(void) Tcl_Backslash(p, &count);
p += count;
} else if (*p == '$') {
p = VarNameEnd(p);
if (*p == 0) {
return p;
}
p++;
} else if (*p == ';') {
/*
* Include the semi-colon in the word that is returned.
*/
*semiPtr = 1;
}
return p;
return p-1;
} else if ((*p == ']') && nested) {
return p-1;
} else if (*p == 0) {
if (nested) {
/*
* Nested commands can't end because of the end of the
* string.
*/
return p;
}
return p-1;
} else {
p++;
}
}
}
/*
*----------------------------------------------------------------------
*
* QuoteEnd --
*
* Given a pointer to a string that obeys the parsing conventions
* for quoted things in Tcl, find the end of that quoted thing.
* The actual thing may be a quoted argument or a parenthesized
* index name.
*
* Results:
* The return value is a pointer to the last character that is
* part of the quoted string (i.e the character that's equal to
* term). If the quoted string doesn't terminate properly then
* the return value is a pointer to the null character at the
* end of the string.
*
* Side effects:
* None.
*
*----------------------------------------------------------------------
*/
static char *
char *string; /* Pointer to character just after opening
* "quote". */
int term; /* This character will terminate the
* quoted string (e.g. '"' or ')'). */
{
register char *p = string;
int count;
while (*p != term) {
if (*p == '\\') {
(void) Tcl_Backslash(p, &count);
p += count;
} else if (*p == '[') {
for (p++; *p != ']'; p++) {
if (*p == 0) {
return p;
}
}
p++;
} else if (*p == '$') {
p = VarNameEnd(p);
if (*p == 0) {
return p;
}
p++;
} else if (*p == 0) {
return p;
} else {
p++;
}
}
return p-1;
}
/*
*----------------------------------------------------------------------
*
* VarNameEnd --
*
* Given a pointer to a variable reference using $-notation, find
* the end of the variable name spec.
*
* Results:
* The return value is a pointer to the last character that
* is part of the variable name. If the variable name doesn't
* terminate properly then the return value is a pointer to the
* null character at the end of the string.
*
* Side effects:
* None.
*
*----------------------------------------------------------------------
*/
static char *
char *string; /* Pointer to dollar-sign character. */
{
register char *p = string+1;
if (*p == '{') {
for (p++; (*p != '}') && (*p != 0); p++) {
/* Empty loop body. */
}
return p;
}
p++;
}
}
return p-1;
}
/*
*----------------------------------------------------------------------
*
* ScriptEnd --
*
* Given a pointer to the beginning of a Tcl script, find the end of
* the script.
*
* Results:
* The return value is a pointer to the last character that's part
* of the script pointed to by "p". If the command doesn't end
* properly within the string then the return value is the address
* of the null character at the end of the string.
*
* Side effects:
* None.
*
*----------------------------------------------------------------------
*/
static char *
char *p; /* Script to check. */
int nested; /* Zero means this is a top-level command.
* One means this is a nested command (the
* last character of the script must be
* an unquoted ]). */
{
int length;
while (1) {
if (*p == '\n') {
commentOK = 1;
}
p++;
}
if ((*p == '#') && commentOK) {
do {
if (*p == '\\') {
/*
* If the script ends with backslash-newline, then
* this command isn't complete.
*/
if ((p[1] == '\n') && (p[2] == 0)) {
return p+2;
}
Tcl_Backslash(p, &length);
p += length;
} else {
p++;
}
} while ((*p != 0) && (*p != '\n'));
continue;
}
if (*p == 0) {
return p;
}
p++;
if (nested) {
if (*p == ']') {
return p;
}
} else {
if (*p == 0) {
return p-1;
}
}
}
}
/*
*----------------------------------------------------------------------
*
* Tcl_ParseVar --
*
* Given a string starting with a $ sign, parse off a variable
* name and return its value.
*
* Results:
* The return value is the contents of the variable given by
* the leading characters of string. If termPtr isn't NULL,
* *termPtr gets filled in with the address of the character
* just after the last one in the variable specifier. If the
* variable doesn't exist, then the return value is NULL and
* an error message will be left in interp->result.
*
* Side effects:
* None.
*
*----------------------------------------------------------------------
*/
char *
register char *string; /* String containing variable name.
* First character must be "$". */
char **termPtr; /* If non-NULL, points to word to fill
* in with character just after last
* one in the variable specifier. */
{
register char *name2;
/*
* There are three cases:
* 1. The $ sign is followed by an open curly brace. Then the variable
* name is everything up to the next close curly brace, and the
* variable is a scalar variable.
* 2. The $ sign is not followed by an open curly brace. Then the
* variable name is everything up to the next character that isn't
* a letter, digit, or underscore. If the following character is an
* open parenthesis, then the information between parentheses is
* the array element name, which can include any of the substitutions
* permissible between quotes.
* 3. The $ sign is followed by something that isn't a letter, digit,
* or underscore: in this case, there is no variable name, and "$"
* is returned.
*/
string++;
if (*string == '{') {
string++;
while (*string != '}') {
if (*string == 0) {
if (termPtr != 0) {
}
return NULL;
}
string++;
}
string++;
} else {
string++;
}
if (termPtr != 0) {
}
return "$";
}
if (*string == '(') {
char *end;
/*
* Perform substitutions on the array element name, just as
* is done for quotes.
*/
!= TCL_OK) {
int length;
if (length > 100) {
length = 100;
}
if (termPtr != 0) {
}
goto done;
}
}
}
if (termPtr != 0) {
}
return "";
}
c = *name1End;
*name1End = 0;
*name1End = c;
done:
}
return result;
}
/*
*----------------------------------------------------------------------
*
* Tcl_CommandComplete --
*
* Given a partial or complete Tcl command, this procedure
* determines whether the command is complete in the sense
* of having matched braces and quotes and brackets.
*
* Results:
* 1 is returned if the command is complete, 0 otherwise.
*
* Side effects:
* None.
*
*----------------------------------------------------------------------
*/
int
char *cmd; /* Command to check. */
{
char *p;
if (*cmd == 0) {
return 1;
}
return (*p != 0);
}