parseAttribute.cxx revision 7c478bd95313f5f23a4c958a745db2134aa03244
// Copyright (c) 1994, 1995 James Clark
// See the file COPYING for copying permission.
#pragma ident "%Z%%M% %I% %E% SMI"
#include "splib.h"
#include "Parser.h"
#include "MessageArg.h"
#include "token.h"
#include "macros.h"
#include "ParserMessages.h"
#ifdef SP_NAMESPACE
namespace SP_NAMESPACE {
#endif
Boolean Parser::parseAttributeSpec(Boolean inDecl,
AttributeList &atts,
Boolean &netEnabling,
Ptr<AttributeDefinitionList> &newAttDef)
{
unsigned specLength = 0;
AttributeParameter::Type curParm;
if (!parseAttributeParameter(inDecl, 0, curParm, netEnabling))
return 0;
while (curParm != AttributeParameter::end) {
switch (curParm) {
case AttributeParameter::name:
{
Text text;
text.addChars(currentInput()->currentTokenStart(),
currentInput()->currentTokenLength(),
currentLocation());
size_t nameMarkupIndex;
if (currentMarkup())
nameMarkupIndex = currentMarkup()->size() - 1;
text.subst(*syntax().generalSubstTable(), syntax().space());
if (!parseAttributeParameter(inDecl, 1, curParm, netEnabling))
return 0;
if (curParm == AttributeParameter::vi) {
specLength += text.size() + syntax().normsep();
if (!parseAttributeValueSpec(inDecl, text.string(), atts,
specLength, newAttDef))
return 0;
// setup for next attribute
if (!parseAttributeParameter(inDecl, 0, curParm, netEnabling))
return 0;
}
else {
if (currentMarkup())
currentMarkup()->changeToAttributeValue(nameMarkupIndex);
if (!handleAttributeNameToken(text, atts, specLength))
return 0;
}
}
break;
case AttributeParameter::nameToken:
{
Text text;
text.addChars(currentInput()->currentTokenStart(),
currentInput()->currentTokenLength(),
currentLocation());
text.subst(*syntax().generalSubstTable(), syntax().space());
if (!handleAttributeNameToken(text, atts, specLength))
return 0;
if (!parseAttributeParameter(inDecl, 0, curParm, netEnabling))
return 0;
}
break;
case AttributeParameter::recoverUnquoted:
{
if (!atts.recoverUnquoted(currentToken(), currentLocation(), *this)) {
// Don't treat it as an unquoted attribute value.
currentInput()->endToken(1);
if (!atts.handleAsUnterminated(*this))
message(ParserMessages::attributeSpecCharacter,
StringMessageArg(currentToken()));
return 0;
}
if (!parseAttributeParameter(inDecl, 0, curParm, netEnabling))
return 0;
}
break;
default:
CANNOT_HAPPEN();
}
}
atts.finish(*this);
if (specLength > syntax().attsplen())
message(ParserMessages::attsplen,
NumberMessageArg(syntax().attsplen()),
NumberMessageArg(specLength));
return 1;
}
Boolean Parser::handleAttributeNameToken(Text &text,
AttributeList &atts,
unsigned &specLength)
{
unsigned index;
if (!atts.tokenIndex(text.string(), index)) {
if (atts.handleAsUnterminated(*this))
return 0;
atts.noteInvalidSpec();
message(ParserMessages::noSuchAttributeToken,
StringMessageArg(text.string()));
}
else if (sd().www() && !atts.tokenIndexUnique(text.string(), index)) {
atts.noteInvalidSpec();
message(ParserMessages::attributeTokenNotUnique,
StringMessageArg(text.string()));
}
else {
if (!sd().attributeOmitName())
message(ParserMessages::attributeNameShorttag);
else if (options().warnMissingAttributeName)
message(ParserMessages::missingAttributeName);
atts.setSpec(index, *this);
atts.setValueToken(index, text, *this, specLength);
}
return 1;
}
Boolean Parser::parseAttributeValueSpec(Boolean inDecl,
const StringC &name,
AttributeList &atts,
unsigned &specLength,
Ptr<AttributeDefinitionList> &newAttDef)
{
Mode mode = inDecl ? asMode : tagMode;
Markup *markup = currentMarkup();
Token token = getToken(mode);
if (token == tokenS) {
if (markup) {
do {
markup->addS(currentChar());
token = getToken(mode);
} while (token == tokenS);
}
else {
do {
token = getToken(mode);
} while (token == tokenS);
}
}
unsigned index;
if (!atts.attributeIndex(name, index)) {
if (!implydefAttlist())
message(ParserMessages::noSuchAttribute, StringMessageArg(name));
if (newAttDef.isNull())
newAttDef = new AttributeDefinitionList(atts.def());
newAttDef
->append(new ImpliedAttributeDefinition(name,
new CdataDeclaredValue));
atts.changeDef(newAttDef);
index = atts.size() - 1;
}
atts.setSpec(index, *this);
Text text;
switch (token) {
case tokenUnrecognized:
if (reportNonSgmlCharacter())
return 0;
// fall through
case tokenEtago:
case tokenStago:
case tokenNestc:
message(ParserMessages::unquotedAttributeValue);
extendUnquotedAttributeValue();
if (markup)
markup->addAttributeValue(currentInput());
text.addChars(currentInput()->currentTokenStart(),
currentInput()->currentTokenLength(),
currentLocation());
break;
case tokenEe:
message(ParserMessages::attributeSpecEntityEnd);
return 0;
case tokenTagc:
case tokenDsc:
case tokenVi:
message(ParserMessages::attributeValueExpected);
return 0;
case tokenNameStart:
case tokenDigit:
case tokenLcUcNmchar:
if (!sd().attributeValueNotLiteral())
message(ParserMessages::attributeValueShorttag);
else if (options().warnAttributeValueNotLiteral)
message(ParserMessages::attributeValueNotLiteral);
extendNameToken(syntax().litlen() >= syntax().normsep()
? syntax().litlen() - syntax().normsep()
: 0,
ParserMessages::attributeValueLength);
if (markup)
markup->addAttributeValue(currentInput());
text.addChars(currentInput()->currentTokenStart(),
currentInput()->currentTokenLength(),
currentLocation());
break;
case tokenLit:
case tokenLita:
Boolean lita;
lita = (token == tokenLita);
if (!(atts.tokenized(index)
? parseTokenizedAttributeValueLiteral(lita, text)
: parseAttributeValueLiteral(lita, text)))
return 0;
if (markup)
markup->addLiteral(text);
break;
default:
CANNOT_HAPPEN();
}
return atts.setValue(index, text, *this, specLength);
}
Boolean Parser::parseAttributeParameter(Boolean inDecl,
Boolean allowVi,
AttributeParameter::Type &result,
Boolean &netEnabling)
{
Mode mode = inDecl ? asMode : tagMode;
Token token = getToken(mode);
Markup *markup = currentMarkup();
if (markup) {
while (token == tokenS) {
markup->addS(currentChar());
token = getToken(mode);
}
}
else {
while (token == tokenS)
token = getToken(mode);
}
switch (token) {
case tokenUnrecognized:
if (reportNonSgmlCharacter())
return 0;
extendUnquotedAttributeValue();
result = AttributeParameter::recoverUnquoted;
break;
case tokenEe:
message(ParserMessages::attributeSpecEntityEnd);
return 0;
case tokenEtago:
case tokenStago:
if (!sd().startTagUnclosed())
message(ParserMessages::unclosedStartTagShorttag);
result = AttributeParameter::end;
currentInput()->ungetToken();
netEnabling = 0;
break;
case tokenNestc:
if (markup)
markup->addDelim(Syntax::dNESTC);
switch (sd().startTagNetEnable()) {
case Sd::netEnableNo:
message(ParserMessages::netEnablingStartTagShorttag);
break;
case Sd::netEnableImmednet:
if (getToken(econnetMode) != tokenNet)
message(ParserMessages::nestcWithoutNet);
currentInput()->ungetToken();
break;
case Sd::netEnableAll:
break;
}
netEnabling = 1;
result = AttributeParameter::end;
break;
case tokenTagc:
if (markup)
markup->addDelim(Syntax::dTAGC);
netEnabling = 0;
result = AttributeParameter::end;
break;
case tokenDsc:
if (markup)
markup->addDelim(Syntax::dDSC);
result = AttributeParameter::end;
break;
case tokenNameStart:
extendNameToken(syntax().namelen(), ParserMessages::nameTokenLength);
if (markup)
markup->addName(currentInput());
result = AttributeParameter::name;
break;
case tokenDigit:
case tokenLcUcNmchar:
extendNameToken(syntax().namelen(), ParserMessages::nameTokenLength);
if (markup)
markup->addName(currentInput());
result = AttributeParameter::nameToken;
break;
case tokenLit:
case tokenLita:
message(allowVi
? ParserMessages::attributeSpecLiteral
: ParserMessages::attributeSpecNameTokenExpected);
return 0;
case tokenVi:
if (!allowVi) {
message(ParserMessages::attributeSpecNameTokenExpected);
return 0;
}
if (markup)
markup->addDelim(Syntax::dVI);
result = AttributeParameter::vi;
break;
default:
CANNOT_HAPPEN();
}
return 1;
}
void Parser::extendUnquotedAttributeValue()
{
InputSource *in = currentInput();
size_t length = in->currentTokenLength();
const Syntax &syn = syntax();
for (;;) {
Xchar c = in->tokenChar(messenger());
if (syn.isS(c)
|| !syn.isSgmlChar(c)
|| c == InputSource::eE
|| c == syn.delimGeneral(Syntax::dTAGC)[0])
break;
length++;
}
in->endToken(length);
}
Boolean Parser::parseAttributeValueLiteral(Boolean lita, Text &text)
{
size_t maxLength = (syntax().litlen() > syntax().normsep()
? syntax().litlen() - syntax().normsep()
: 0);
if (parseLiteral(lita ? alitaMode : alitMode, aliteMode,
maxLength,
ParserMessages::attributeValueLength,
literalNonSgml
| (wantMarkup() ? unsigned(literalDelimInfo) : 0),
text)) {
if (text.size() == 0
&& syntax().normsep() > syntax().litlen())
message(ParserMessages::attributeValueLengthNeg,
NumberMessageArg(syntax().normsep() - syntax().litlen()));
return 1;
}
else
return 0;
}
Boolean Parser::parseTokenizedAttributeValueLiteral(Boolean lita, Text &text)
{
size_t maxLength = (syntax().litlen() > syntax().normsep()
? syntax().litlen() - syntax().normsep()
: 0);
if (parseLiteral(lita ? talitaMode : talitMode, taliteMode,
maxLength,
ParserMessages::tokenizedAttributeValueLength,
literalSingleSpace
| (wantMarkup() ? unsigned(literalDelimInfo) : 0),
text)) {
if (text.size() == 0
&& syntax().normsep() > syntax().litlen())
message(ParserMessages::tokenizedAttributeValueLengthNeg,
NumberMessageArg(syntax().normsep() - syntax().litlen()));
return 1;
}
else
return 0;
}
Boolean Parser::skipAttributeSpec()
{
AttributeParameter::Type parm;
Boolean netEnabling;
if (!parseAttributeParameter(0, 0, parm, netEnabling))
return 0;
while (parm != AttributeParameter::end) {
if (parm == AttributeParameter::name) {
size_t nameMarkupIndex = 0;
if (currentMarkup())
nameMarkupIndex = currentMarkup()->size() - 1;
if (!parseAttributeParameter(0, 1, parm, netEnabling))
return 0;
if (parm == AttributeParameter::vi) {
Token token = getToken(tagMode);
while (token == tokenS) {
if (currentMarkup())
currentMarkup()->addS(currentChar());
token = getToken(tagMode);
}
switch (token) {
case tokenUnrecognized:
if (!reportNonSgmlCharacter())
message(ParserMessages::attributeSpecCharacter,
StringMessageArg(currentToken()));
return 0;
case tokenEe:
message(ParserMessages::attributeSpecEntityEnd);
return 0;
case tokenEtago:
case tokenStago:
case tokenNestc:
case tokenTagc:
case tokenDsc:
case tokenVi:
message(ParserMessages::attributeValueExpected);
return 0;
case tokenNameStart:
case tokenDigit:
case tokenLcUcNmchar:
if (!sd().attributeValueNotLiteral())
message(ParserMessages::attributeValueShorttag);
extendNameToken(syntax().litlen() >= syntax().normsep()
? syntax().litlen() - syntax().normsep()
: 0,
ParserMessages::attributeValueLength);
if (currentMarkup())
currentMarkup()->addAttributeValue(currentInput());
break;
case tokenLit:
case tokenLita:
{
Text text;
if (!parseLiteral(token == tokenLita ? talitaMode : talitMode,
taliteMode,
syntax().litlen(),
ParserMessages::tokenizedAttributeValueLength,
(currentMarkup() ? literalDelimInfo : 0)
| literalNoProcess,
text))
return 0;
if (currentMarkup())
currentMarkup()->addLiteral(text);
}
break;
default:
CANNOT_HAPPEN();
}
if (!parseAttributeParameter(0, 0, parm, netEnabling))
return 0;
}
else {
if (currentMarkup())
currentMarkup()->changeToAttributeValue(nameMarkupIndex);
if (!sd().attributeOmitName())
message(ParserMessages::attributeNameShorttag);
}
}
else {
// It's a name token.
if (!parseAttributeParameter(0, 0, parm, netEnabling))
return 0;
if (!sd().attributeOmitName())
message(ParserMessages::attributeNameShorttag);
}
}
if (netEnabling)
message(ParserMessages::startTagGroupNet);
return 1;
}
#ifdef SP_NAMESPACE
}
#endif