parseInstance.cxx revision 7c478bd95313f5f23a4c958a745db2134aa03244
// Copyright (c) 1994 James Clark
// See the file COPYING for copying permission.
#pragma ident "%Z%%M% %I% %E% SMI"
#include "splib.h"
#include "Parser.h"
#include "ParserMessages.h"
#include "MessageArg.h"
#include "TokenMessageArg.h"
#include "StringVectorMessageArg.h"
#include "token.h"
#include "macros.h"
#ifdef SP_NAMESPACE
namespace SP_NAMESPACE {
#endif
void Parser::doInstanceStart()
{
if (cancelled()) {
allDone();
return;
}
// FIXME check here that we have a valid dtd
compileInstanceModes();
setPhase(contentPhase);
Token token = getToken(currentMode());
switch (token) {
case tokenEe:
case tokenStagoNameStart:
case tokenStagoTagc:
case tokenStagoGrpo:
case tokenEtagoNameStart:
case tokenEtagoTagc:
case tokenEtagoGrpo:
break;
default:
if (sd().omittag()) {
unsigned startImpliedCount = 0;
unsigned attributeListIndex = 0;
IList<Undo> undoList;
IList<Event> eventList;
if (!tryImplyTag(currentLocation(),
startImpliedCount,
attributeListIndex,
undoList,
eventList))
CANNOT_HAPPEN();
queueElementEvents(eventList);
}
else
message(ParserMessages::instanceStartOmittag);
}
currentInput()->ungetToken();
}
void Parser::endInstance()
{
// Do checking before popping entity stack so that there's a
// current location for error messages.
endAllElements();
while (markedSectionLevel() > 0) {
message(ParserMessages::unclosedMarkedSection,
currentMarkedSectionStartLocation());
endMarkedSection();
}
checkIdrefs();
popInputStack();
allDone();
}
void Parser::checkIdrefs()
{
IdTableIter iter(idTableIter());
Id *id;
while ((id = iter.next()) != 0) {
for (size_t i = 0; i < id->pendingRefs().size(); i++) {
Messenger::setNextLocation(id->pendingRefs()[i]);
message(ParserMessages::missingId, StringMessageArg(id->name()));
}
}
}
void Parser::doContent()
{
do {
if (cancelled()) {
allDone();
return;
}
Token token = getToken(currentMode());
switch (token) {
case tokenEe:
if (inputLevel() == 1) {
endInstance();
return;
}
if (inputLevel() == specialParseInputLevel()) {
// FIXME have separate messages for each type of special parse
// perhaps force end of marked section or element
message(ParserMessages::specialParseEntityEnd);
}
if (eventsWanted().wantInstanceMarkup())
eventHandler().entityEnd(new (eventAllocator())
EntityEndEvent(currentLocation()));
if (afterDocumentElement())
message(ParserMessages::afterDocumentElementEntityEnd);
if (sd().integrallyStored()
&& tagLevel()
&& currentElement().index() != currentInputElementIndex())
message(ParserMessages::contentAsyncEntityRef);
popInputStack();
break;
case tokenCroDigit:
case tokenHcroHexDigit:
{
if (afterDocumentElement())
message(ParserMessages::characterReferenceAfterDocumentElement);
Char ch;
Location loc;
if (parseNumericCharRef(token == tokenHcroHexDigit, ch, loc)) {
acceptPcdata(loc);
noteData();
Boolean isSgmlChar;
if (!translateNumericCharRef(ch, isSgmlChar))
break;
if (!isSgmlChar) {
eventHandler().nonSgmlChar(new (eventAllocator())
NonSgmlCharEvent(ch, loc));
break;
}
eventHandler().data(new (eventAllocator())
ImmediateDataEvent(Event::characterData,
&ch, 1, loc, 1));
break;
}
}
break;
case tokenCroNameStart:
if (afterDocumentElement())
message(ParserMessages::characterReferenceAfterDocumentElement);
parseNamedCharRef();
break;
case tokenEroGrpo:
case tokenEroNameStart:
{
if (afterDocumentElement())
message(ParserMessages::entityReferenceAfterDocumentElement);
ConstPtr<Entity> entity;
Ptr<EntityOrigin> origin;
if (parseEntityReference(0, token == tokenEroGrpo, entity, origin)) {
if (!entity.isNull()) {
if (entity->isCharacterData())
acceptPcdata(Location(origin.pointer(), 0));
if (inputLevel() == specialParseInputLevel())
entity->rcdataReference(*this, origin);
else
entity->contentReference(*this, origin);
}
}
}
break;
case tokenEtagoNameStart:
acceptEndTag(parseEndTag());
break;
case tokenEtagoTagc:
parseEmptyEndTag();
break;
case tokenEtagoGrpo:
parseGroupEndTag();
break;
case tokenMdoNameStart:
if (startMarkup(eventsWanted().wantInstanceMarkup(), currentLocation()))
currentMarkup()->addDelim(Syntax::dMDO);
Syntax::ReservedName name;
Boolean result;
unsigned startLevel;
startLevel = inputLevel();
if (parseDeclarationName(&name)) {
switch (name) {
case Syntax::rUSEMAP:
if (afterDocumentElement())
message(ParserMessages::declarationAfterDocumentElement,
StringMessageArg(syntax().reservedName(name)));
result = parseUsemapDecl();
break;
case Syntax::rUSELINK:
if (afterDocumentElement())
message(ParserMessages::declarationAfterDocumentElement,
StringMessageArg(syntax().reservedName(name)));
result = parseUselinkDecl();
break;
case Syntax::rDOCTYPE:
case Syntax::rLINKTYPE:
case Syntax::rELEMENT:
case Syntax::rATTLIST:
case Syntax::rENTITY:
case Syntax::rNOTATION:
case Syntax::rSHORTREF:
case Syntax::rLINK:
case Syntax::rIDLINK:
message(ParserMessages::instanceDeclaration,
StringMessageArg(syntax().reservedName(name)));
result = 0;
break;
default:
message(ParserMessages::noSuchDeclarationType,
StringMessageArg(syntax().reservedName(name)));
result = 0;
break;
}
}
else
result = 0;
if (!result)
skipDeclaration(startLevel);
noteMarkup();
break;
case tokenMdoMdc:
// empty comment
emptyCommentDecl();
noteMarkup();
break;
case tokenMdoCom:
parseCommentDecl();
noteMarkup();
break;
case tokenMdoDso:
if (afterDocumentElement())
message(ParserMessages::markedSectionAfterDocumentElement);
parseMarkedSectionDeclStart();
noteMarkup();
break;
case tokenMscMdc:
handleMarkedSectionEnd();
noteMarkup();
break;
case tokenNet:
parseNullEndTag();
break;
case tokenPio:
parseProcessingInstruction();
break;
case tokenStagoNameStart:
parseStartTag();
break;
case tokenStagoTagc:
parseEmptyStartTag();
break;
case tokenStagoGrpo:
parseGroupStartTag();
break;
case tokenRe:
acceptPcdata(currentLocation());
queueRe(currentLocation());
break;
case tokenRs:
acceptPcdata(currentLocation());
noteRs();
if (eventsWanted().wantInstanceMarkup())
eventHandler().ignoredRs(new (eventAllocator())
IgnoredRsEvent(currentChar(),
currentLocation()));
break;
case tokenS:
extendContentS();
if (eventsWanted().wantInstanceMarkup())
eventHandler().sSep(new (eventAllocator())
SSepEvent(currentInput()->currentTokenStart(),
currentInput()->currentTokenLength(),
currentLocation(),
0));
break;
case tokenIgnoredChar:
extendData();
if (eventsWanted().wantMarkedSections())
eventHandler().ignoredChars(new (eventAllocator())
IgnoredCharsEvent(currentInput()->currentTokenStart(),
currentInput()->currentTokenLength(),
currentLocation(),
0));
break;
case tokenUnrecognized:
reportNonSgmlCharacter();
parsePcdata();
break;
case tokenCharDelim:
message(ParserMessages::dataCharDelim,
StringMessageArg(StringC(currentInput()->currentTokenStart(),
currentInput()->currentTokenLength())));
// fall through
case tokenChar:
parsePcdata();
break;
default:
ASSERT(token >= tokenFirstShortref);
handleShortref(token - tokenFirstShortref);
break;
}
} while (eventQueueEmpty());
}
void Parser::skipDeclaration(unsigned startLevel)
{
const unsigned skipMax = 250;
unsigned skipCount = 0;
for (;;) {
Token token = getToken(mdMode);
if (inputLevel() == startLevel)
skipCount++;
switch (token) {
case tokenUnrecognized:
(void)getChar();
break;
case tokenEe:
if (inputLevel() <= startLevel)
return;
popInputStack();
return;
case tokenMdc:
if (inputLevel() == startLevel)
return;
break;
case tokenS:
if (inputLevel() == startLevel && skipCount >= skipMax
&& currentChar() == syntax().standardFunction(Syntax::fRE))
return;
break;
default:
break;
}
}
}
void Parser::handleShortref(int index)
{
const ConstPtr<Entity> &entity
= currentElement().map()->entity(index);
if (!entity.isNull()) {
Owner<Markup> markupPtr;
if (eventsWanted().wantInstanceMarkup()) {
markupPtr = new Markup;
markupPtr->addShortref(currentInput());
}
Ptr<EntityOrigin> origin
= EntityOrigin::make(internalAllocator(),
entity,
currentLocation(),
currentInput()->currentTokenLength(),
markupPtr);
entity->contentReference(*this, origin);
return;
}
InputSource *in = currentInput();
size_t length = in->currentTokenLength();
const Char *s = in->currentTokenStart();
size_t i = 0;
if (currentMode() == econMode || currentMode() == econnetMode) {
// FIXME do this in advance (what about B sequence?)
for (i = 0; i < length && syntax().isS(s[i]); i++)
;
if (i > 0 && eventsWanted().wantInstanceMarkup())
eventHandler().sSep(new (eventAllocator())
SSepEvent(s, i, currentLocation(), 0));
}
if (i < length) {
Location location(currentLocation());
location += i;
s += i;
length -= i;
acceptPcdata(location);
if (sd().keeprsre()) {
noteData();
eventHandler().data(new (eventAllocator())
ImmediateDataEvent(Event::characterData, s, length,
location, 0));
return;
}
// FIXME speed this up
for (; length > 0; location += 1, length--, s++) {
if (*s == syntax().standardFunction(Syntax::fRS)) {
noteRs();
if (eventsWanted().wantInstanceMarkup())
eventHandler().ignoredRs(new (eventAllocator())
IgnoredRsEvent(*s, location));
}
else if (*s == syntax().standardFunction(Syntax::fRE))
queueRe(location);
else {
noteData();
eventHandler().data(new (eventAllocator())
ImmediateDataEvent(Event::characterData, s, 1,
location, 0));
}
}
}
}
void Parser::parsePcdata()
{
extendData();
acceptPcdata(currentLocation());
noteData();
eventHandler().data(new (eventAllocator())
ImmediateDataEvent(Event::characterData,
currentInput()->currentTokenStart(),
currentInput()->currentTokenLength(),
currentLocation(),
0));
}
void Parser::parseStartTag()
{
InputSource *in = currentInput();
Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(),
in->currentLocation());
in->discardInitial();
extendNameToken(syntax().namelen(), ParserMessages::nameLength);
if (markup) {
markup->addDelim(Syntax::dSTAGO);
markup->addName(in);
}
StringC &name = nameBuffer();
getCurrentToken(syntax().generalSubstTable(), name);
ElementType *e = currentDtdNonConst().lookupElementType(name);
if (sd().rank()) {
if (!e)
e = completeRankStem(name);
else if (e->isRankedElement())
handleRankedElement(e);
}
if (!e)
e = lookupCreateUndefinedElement(name, currentLocation(), currentDtdNonConst());
Boolean netEnabling;
AttributeList *attributes = allocAttributeList(e->attributeDef(), 0);
Token closeToken = getToken(tagMode);
if (closeToken == tokenTagc) {
if (name.size() > syntax().taglen())
checkTaglen(markupLocation().index());
attributes->finish(*this);
netEnabling = 0;
if (markup)
markup->addDelim(Syntax::dTAGC);
}
else {
in->ungetToken();
Ptr<AttributeDefinitionList> newAttDef;
if (parseAttributeSpec(0, *attributes, netEnabling, newAttDef)) {
// The difference between the indices will be the difference
// in offsets plus 1 for each named character reference.
if (in->currentLocation().index() - markupLocation().index()
> syntax().taglen())
checkTaglen(markupLocation().index());
}
else
netEnabling = 0;
if (!newAttDef.isNull()) {
newAttDef->setIndex(currentDtdNonConst().allocAttributeDefinitionListIndex());
e->setAttributeDef(newAttDef);
}
}
acceptStartTag(e,
new (eventAllocator())
StartElementEvent(e,
currentDtdPointer(),
attributes,
markupLocation(),
markup),
netEnabling);
}
ElementType *Parser::completeRankStem(const StringC &name)
{
const RankStem *rankStem = currentDtd().lookupRankStem(name);
if (rankStem) {
StringC name(rankStem->name());
if (!appendCurrentRank(name, rankStem))
message(ParserMessages::noCurrentRank, StringMessageArg(name));
else
return currentDtdNonConst().lookupElementType(name);
}
return 0;
}
void Parser::handleRankedElement(const ElementType *e)
{
StringC rankSuffix(e->definition()->rankSuffix());
const RankStem *rankStem = e->rankedElementRankStem();
for (size_t i = 0; i < rankStem->nDefinitions(); i++) {
const ElementDefinition *def = rankStem->definition(i);
for (size_t j = 0; j < def->nRankStems(); j++)
setCurrentRank(def->rankStem(j), rankSuffix);
}
}
void Parser::checkTaglen(Index tagStartIndex)
{
const InputSourceOrigin *origin
= currentLocation().origin()->asInputSourceOrigin();
ASSERT(origin != 0);
if (origin->startOffset(currentLocation().index())
- origin->startOffset(tagStartIndex
+ syntax().delimGeneral(Syntax::dSTAGO).size())
> syntax().taglen())
message(ParserMessages::taglen, NumberMessageArg(syntax().taglen()));
}
void Parser::parseEmptyStartTag()
{
if (options().warnEmptyTag)
message(ParserMessages::emptyStartTag);
// FIXME error if not in base.
const ElementType *e = 0;
if (!sd().omittag())
e = lastEndedElementType();
else if (tagLevel() > 0)
e = currentElement().type();
if (!e)
e = currentDtd().documentElementType();
AttributeList *attributes = allocAttributeList(e->attributeDef(), 0);
attributes->finish(*this);
Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(),
currentLocation());
if (markup) {
markup->addDelim(Syntax::dSTAGO);
markup->addDelim(Syntax::dTAGC);
}
acceptStartTag(e,
new (eventAllocator())
StartElementEvent(e,
currentDtdPointer(),
attributes,
markupLocation(),
markup),
0);
}
void Parser::parseGroupStartTag()
{
if (startMarkup(eventsWanted().wantInstanceMarkup(), currentLocation())) {
currentMarkup()->addDelim(Syntax::dSTAGO);
currentMarkup()->addDelim(Syntax::dGRPO);
}
Boolean active;
if (!parseTagNameGroup(active))
return;
InputSource *in = currentInput();
// Location startLocation = in->currentLocation();
in->startToken();
Xchar c = in->tokenChar(messenger());
if (!syntax().isNameStartCharacter(c)) {
message(ParserMessages::startTagMissingName);
return;
}
in->discardInitial();
extendNameToken(syntax().namelen(), ParserMessages::nameLength);
if (currentMarkup())
currentMarkup()->addName(currentInput());
skipAttributeSpec();
if (currentMarkup())
eventHandler().ignoredMarkup(new (eventAllocator())
IgnoredMarkupEvent(markupLocation(),
currentMarkup()));
noteMarkup();
}
void Parser::parseGroupEndTag()
{
if (startMarkup(eventsWanted().wantInstanceMarkup(), currentLocation())) {
currentMarkup()->addDelim(Syntax::dSTAGO);
currentMarkup()->addDelim(Syntax::dGRPO);
}
Boolean active;
if (!parseTagNameGroup(active))
return;
InputSource *in = currentInput();
// Location startLocation = in->currentLocation();
in->startToken();
Xchar c = in->tokenChar(messenger());
if (!syntax().isNameStartCharacter(c)) {
message(ParserMessages::endTagMissingName);
return;
}
in->discardInitial();
extendNameToken(syntax().namelen(), ParserMessages::nameLength);
if (currentMarkup())
currentMarkup()->addName(currentInput());
parseEndTagClose();
if (currentMarkup())
eventHandler().ignoredMarkup(new (eventAllocator())
IgnoredMarkupEvent(markupLocation(),
currentMarkup()));
noteMarkup();
}
void Parser::acceptPcdata(const Location &startLocation)
{
if (currentElement().tryTransitionPcdata())
return;
// Need to test here since implying tags may turn off pcdataRecovering.
if (pcdataRecovering())
return;
IList<Undo> undoList;
IList<Event> eventList;
unsigned startImpliedCount = 0;
unsigned attributeListIndex = 0;
keepMessages();
while (tryImplyTag(startLocation, startImpliedCount, attributeListIndex,
undoList, eventList))
if (currentElement().tryTransitionPcdata()) {
queueElementEvents(eventList);
return;
}
discardKeptMessages();
undo(undoList);
if (validate() || afterDocumentElement())
message(ParserMessages::pcdataNotAllowed);
pcdataRecover();
}
void Parser::acceptStartTag(const ElementType *e,
StartElementEvent *event,
Boolean netEnabling)
{
if (e->definition()->undefined() && !implydefElement())
message(ParserMessages::undefinedElement, StringMessageArg(e->name()));
if (elementIsExcluded(e)) {
keepMessages();
if (validate())
checkExclusion(e);
}
else {
if (currentElement().tryTransition(e)) {
pushElementCheck(e, event, netEnabling);
return;
}
if (elementIsIncluded(e)) {
event->setIncluded();
pushElementCheck(e, event, netEnabling);
return;
}
keepMessages();
}
IList<Undo> undoList;
IList<Event> eventList;
unsigned startImpliedCount = 0;
unsigned attributeListIndex = 1;
while (tryImplyTag(event->location(), startImpliedCount,
attributeListIndex, undoList, eventList))
if (tryStartTag(e, event, netEnabling, eventList))
return;
discardKeptMessages();
undo(undoList);
if (validate() && !e->definition()->undefined())
handleBadStartTag(e, event, netEnabling);
else {
if (validate() ? implydefElement() : afterDocumentElement())
message(ParserMessages::elementNotAllowed, StringMessageArg(e->name()));
// If element couldn't occur because it was excluded, then
// do the transition here.
(void)currentElement().tryTransition(e);
pushElementCheck(e, event, netEnabling);
}
}
void Parser::undo(IList<Undo> &undoList)
{
while (!undoList.empty()) {
Undo *p = undoList.get();
p->undo(this);
delete p;
}
}
void Parser::queueElementEvents(IList<Event> &events)
{
releaseKeptMessages();
// FIXME provide IList<T>::reverse function
// reverse it
IList<Event> tem;
while (!events.empty())
tem.insert(events.get());
while (!tem.empty()) {
Event *e = tem.get();
if (e->type() == Event::startElement) {
noteStartElement(((StartElementEvent *)e)->included());
eventHandler().startElement((StartElementEvent *)e);
}
else {
noteEndElement(((EndElementEvent *)e)->included());
eventHandler().endElement((EndElementEvent *)e);
}
}
}
void Parser::checkExclusion(const ElementType *e)
{
const LeafContentToken *token = currentElement().invalidExclusion(e);
if (token)
message(ParserMessages::invalidExclusion,
OrdinalMessageArg(token->typeIndex() + 1),
StringMessageArg(token->elementType()->name()),
StringMessageArg(currentElement().type()->name()));
}
Boolean Parser::tryStartTag(const ElementType *e,
StartElementEvent *event,
Boolean netEnabling,
IList<Event> &impliedEvents)
{
if (elementIsExcluded(e)) {
checkExclusion(e);
return 0;
}
if (currentElement().tryTransition(e)) {
queueElementEvents(impliedEvents);
pushElementCheck(e, event, netEnabling);
return 1;
}
if (elementIsIncluded(e)) {
queueElementEvents(impliedEvents);
event->setIncluded();
pushElementCheck(e, event, netEnabling);
return 1;
}
return 0;
}
Boolean Parser::tryImplyTag(const Location &loc,
unsigned &startImpliedCount,
unsigned &attributeListIndex,
IList<Undo> &undo,
IList<Event> &eventList)
{
if (!sd().omittag())
return 0;
if (currentElement().isFinished()) {
if (tagLevel() == 0)
return 0;
#if 1
const ElementDefinition *def = currentElement().type()->definition();
if (def && !def->canOmitEndTag())
return 0;
#endif
// imply an end tag
if (startImpliedCount > 0) {
message(ParserMessages::startTagEmptyElement,
StringMessageArg(currentElement().type()->name()));
startImpliedCount--;
}
#if 0
const ElementDefinition *def = currentElement().type()->definition();
if (def && !def->canOmitEndTag())
message(ParserMessages::omitEndTagDeclare,
StringMessageArg(currentElement().type()->name()),
currentElement().startLocation());
#endif
EndElementEvent *event
= new (eventAllocator()) EndElementEvent(currentElement().type(),
currentDtdPointer(),
loc,
0);
eventList.insert(event);
undo.insert(new (internalAllocator()) UndoEndTag(popSaveElement()));
return 1;
}
const LeafContentToken *token = currentElement().impliedStartTag();
if (!token)
return 0;
const ElementType *e = token->elementType();
if (elementIsExcluded(e))
message(ParserMessages::requiredElementExcluded,
OrdinalMessageArg(token->typeIndex() + 1),
StringMessageArg(e->name()),
StringMessageArg(currentElement().type()->name()));
if (tagLevel() != 0)
undo.insert(new (internalAllocator())
UndoTransition(currentElement().matchState()));
currentElement().doRequiredTransition();
const ElementDefinition *def = e->definition();
if (def->declaredContent() != ElementDefinition::modelGroup
&& def->declaredContent() != ElementDefinition::any)
message(ParserMessages::omitStartTagDeclaredContent,
StringMessageArg(e->name()));
if (def->undefined())
message(ParserMessages::undefinedElement, StringMessageArg(e->name()));
else if (!def->canOmitStartTag())
message(ParserMessages::omitStartTagDeclare, StringMessageArg(e->name()));
AttributeList *attributes
= allocAttributeList(e->attributeDef(),
attributeListIndex++);
// this will give an error if the element has a required attribute
attributes->finish(*this);
startImpliedCount++;
StartElementEvent *event
= new (eventAllocator()) StartElementEvent(e,
currentDtdPointer(),
attributes,
loc,
0);
pushElementCheck(e, event, undo, eventList);
const int implyCheckLimit = 30; // this is fairly arbitrary
if (startImpliedCount > implyCheckLimit
&& !checkImplyLoop(startImpliedCount))
return 0;
return 1;
}
void Parser::pushElementCheck(const ElementType *e, StartElementEvent *event,
Boolean netEnabling)
{
if (tagLevel() == syntax().taglvl())
message(ParserMessages::taglvlOpenElements, NumberMessageArg(syntax().taglvl()));
noteStartElement(event->included());
if (event->mustOmitEnd()) {
if (sd().emptyElementNormal()) {
Boolean included = event->included();
Location loc(event->location());
eventHandler().startElement(event);
endTagEmptyElement(e, netEnabling, included, loc);
}
else {
EndElementEvent *end
= new (eventAllocator()) EndElementEvent(e,
currentDtdPointer(),
event->location(),
0);
if (event->included()) {
end->setIncluded();
noteEndElement(1);
}
else
noteEndElement(0);
eventHandler().startElement(event);
eventHandler().endElement(end);
}
}
else {
const ShortReferenceMap *map = e->map();
if (!map)
map = currentElement().map();
pushElement(new (internalAllocator()) OpenElement(e,
netEnabling,
event->included(),
map,
event->location()));
// Can't access event after it's passed to the event handler.
eventHandler().startElement(event);
}
}
void Parser::endTagEmptyElement(const ElementType *e,
Boolean netEnabling,
Boolean included,
const Location &startLoc)
{
Token token = getToken(netEnabling ? econnetMode : econMode);
switch (token) {
case tokenNet:
if (netEnabling) {
Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(),
currentLocation());
if (markup)
markup->addDelim(Syntax::dNET);
EndElementEvent *end
= new (eventAllocator()) EndElementEvent(e,
currentDtdPointer(),
currentLocation(),
markup);
if (included)
end->setIncluded();
eventHandler().endElement(end);
noteEndElement(included);
return;
}
break;
case tokenEtagoTagc:
{
if (options().warnEmptyTag)
message(ParserMessages::emptyEndTag);
Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(),
currentLocation());
if (markup) {
markup->addDelim(Syntax::dETAGO);
markup->addDelim(Syntax::dTAGC);
}
EndElementEvent *end
= new (eventAllocator()) EndElementEvent(e,
currentDtdPointer(),
currentLocation(),
markup);
if (included)
end->setIncluded();
eventHandler().endElement(end);
noteEndElement(included);
return;
}
case tokenEtagoNameStart:
{
EndElementEvent *end = parseEndTag();
if (end->elementType() == e) {
if (included)
end->setIncluded();
eventHandler().endElement(end);
noteEndElement(included);
return;
}
if (!elementIsOpen(end->elementType())) {
message(ParserMessages::elementNotOpen,
StringMessageArg(end->elementType()->name()));
delete end;
break;
}
implyEmptyElementEnd(e, included, startLoc);
acceptEndTag(end);
return;
}
default:
break;
}
implyEmptyElementEnd(e, included, startLoc);
currentInput()->ungetToken();
}
void Parser::implyEmptyElementEnd(const ElementType *e,
Boolean included,
const Location &startLoc)
{
if (!sd().omittag())
message(ParserMessages::omitEndTagOmittag,
StringMessageArg(e->name()),
startLoc);
else {
const ElementDefinition *def = e->definition();
if (def && !def->canOmitEndTag())
message(ParserMessages::omitEndTagDeclare,
StringMessageArg(e->name()),
startLoc);
}
EndElementEvent *end
= new (eventAllocator()) EndElementEvent(e,
currentDtdPointer(),
currentLocation(),
0);
if (included)
end->setIncluded();
noteEndElement(included);
eventHandler().endElement(end);
}
void Parser::pushElementCheck(const ElementType *e, StartElementEvent *event,
IList<Undo> &undoList,
IList<Event> &eventList)
{
if (tagLevel() == syntax().taglvl())
message(ParserMessages::taglvlOpenElements, NumberMessageArg(syntax().taglvl()));
eventList.insert(event);
if (event->mustOmitEnd()) {
EndElementEvent *end
= new (eventAllocator()) EndElementEvent(e,
currentDtdPointer(),
event->location(),
0);
if (event->included())
end->setIncluded();
eventList.insert(end);
}
else {
undoList.insert(new (internalAllocator()) UndoStartTag);
const ShortReferenceMap *map = e->map();
if (!map)
map = currentElement().map();
pushElement(new (internalAllocator()) OpenElement(e,
0,
event->included(),
map,
event->location()));
}
}
EndElementEvent *Parser::parseEndTag()
{
Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(),
currentLocation());
currentInput()->discardInitial();
extendNameToken(syntax().namelen(), ParserMessages::nameLength);
if (markup) {
markup->addDelim(Syntax::dETAGO);
markup->addName(currentInput());
}
StringC &name = nameBuffer();
getCurrentToken(syntax().generalSubstTable(), name);
const ElementType *e = currentDtd().lookupElementType(name);
if (sd().rank()) {
if (!e)
e = completeRankStem(name);
}
if (!e)
e = lookupCreateUndefinedElement(name, currentLocation(), currentDtdNonConst());
parseEndTagClose();
return new (eventAllocator())
EndElementEvent(e,
currentDtdPointer(),
markupLocation(),
markup);
}
void Parser::parseEndTagClose()
{
for (;;) {
Token token = getToken(tagMode);
switch (token) {
case tokenUnrecognized:
if (!reportNonSgmlCharacter())
message(ParserMessages::endTagCharacter, StringMessageArg(currentToken()));
return;
case tokenEe:
message(ParserMessages::endTagEntityEnd);
return;
case tokenEtago:
case tokenStago:
if (!sd().endTagUnclosed())
message(ParserMessages::unclosedEndTagShorttag);
currentInput()->ungetToken();
return;
case tokenTagc:
if (currentMarkup())
currentMarkup()->addDelim(Syntax::dTAGC);
return;
case tokenS:
if (currentMarkup())
currentMarkup()->addS(currentChar());
break;
default:
message(ParserMessages::endTagInvalidToken,
TokenMessageArg(token, tagMode, syntaxPointer(), sdPointer()));
return;
}
}
}
void Parser::parseEmptyEndTag()
{
if (options().warnEmptyTag)
message(ParserMessages::emptyEndTag);
// FIXME what to do if not in base
if (tagLevel() == 0)
message(ParserMessages::emptyEndTagNoOpenElements);
else {
Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(),
currentLocation());
if (markup) {
markup->addDelim(Syntax::dETAGO);
markup->addDelim(Syntax::dTAGC);
}
acceptEndTag(new (eventAllocator()) EndElementEvent(currentElement().type(),
currentDtdPointer(),
currentLocation(),
markup));
}
}
void Parser::parseNullEndTag()
{
// If a null end tag was recognized, then there must be a net enabling
// element on the stack.
for (;;) {
ASSERT(tagLevel() > 0);
if (currentElement().netEnabling())
break;
if (!currentElement().isFinished() && validate())
message(ParserMessages::elementNotFinished,
StringMessageArg(currentElement().type()->name()));
implyCurrentElementEnd(currentLocation());
}
if (!currentElement().isFinished() && validate())
message(ParserMessages::elementEndTagNotFinished,
StringMessageArg(currentElement().type()->name()));
Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(),
currentLocation());
if (markup)
markup->addDelim(Syntax::dNET);
acceptEndTag(new (eventAllocator()) EndElementEvent(currentElement().type(),
currentDtdPointer(),
currentLocation(),
markup));
}
void Parser::endAllElements()
{
while (tagLevel() > 0) {
if (!currentElement().isFinished())
message(ParserMessages::elementNotFinishedDocumentEnd,
StringMessageArg(currentElement().type()->name()));
implyCurrentElementEnd(currentLocation());
}
if (!currentElement().isFinished() && validate())
message(ParserMessages::noDocumentElement);
}
void Parser::acceptEndTag(EndElementEvent *event)
{
const ElementType *e = event->elementType();
if (!elementIsOpen(e)) {
message(ParserMessages::elementNotOpen, StringMessageArg(e->name()));
delete event;
return;
}
for (;;){
if (currentElement().type() == e)
break;
if (!currentElement().isFinished() && validate())
message(ParserMessages::elementNotFinished,
StringMessageArg(currentElement().type()->name()));
implyCurrentElementEnd(event->location());
}
if (!currentElement().isFinished() && validate())
message(ParserMessages::elementEndTagNotFinished,
StringMessageArg(currentElement().type()->name()));
if (currentElement().included())
event->setIncluded();
noteEndElement(event->included());
eventHandler().endElement(event);
popElement();
}
void Parser::implyCurrentElementEnd(const Location &loc)
{
if (!sd().omittag())
message(ParserMessages::omitEndTagOmittag,
StringMessageArg(currentElement().type()->name()),
currentElement().startLocation());
else {
const ElementDefinition *def = currentElement().type()->definition();
if (def && !def->canOmitEndTag())
message(ParserMessages::omitEndTagDeclare,
StringMessageArg(currentElement().type()->name()),
currentElement().startLocation());
}
EndElementEvent *event
= new (eventAllocator()) EndElementEvent(currentElement().type(),
currentDtdPointer(),
loc,
0);
if (currentElement().included())
event->setIncluded();
noteEndElement(event->included());
eventHandler().endElement(event);
popElement();
}
void Parser::extendData()
{
XcharMap<PackedBoolean> isNormal(normalMap());
InputSource *in = currentInput();
size_t length = in->currentTokenLength();
// This is one of the parser's inner loops, so it needs to be fast.
while (isNormal[in->tokenChar(messenger())])
length++;
in->endToken(length);
}
void Parser::extendContentS()
{
InputSource *in = currentInput();
size_t length = in->currentTokenLength();
XcharMap<PackedBoolean> isNormal(normalMap());
for (;;) {
Xchar ch = in->tokenChar(messenger());
if (!syntax().isS(ch) || !isNormal[ch])
break;
length++;
}
in->endToken(length);
}
void Parser::handleBadStartTag(const ElementType *e,
StartElementEvent *event,
Boolean netEnabling)
{
IList<Undo> undoList;
IList<Event> eventList;
keepMessages();
for (;;) {
Vector<const ElementType *> missing;
findMissingTag(e, missing);
if (missing.size() == 1) {
queueElementEvents(eventList);
const ElementType *m = missing[0];
message(ParserMessages::missingElementInferred,
StringMessageArg(e->name()),
StringMessageArg(m->name()));
AttributeList *attributes
= allocAttributeList(m->attributeDef(), 1);
// this will give an error if the element has a required attribute
attributes->finish(*this);
StartElementEvent *inferEvent
= new (eventAllocator()) StartElementEvent(m,
currentDtdPointer(),
attributes,
event->location(),
0);
if (!currentElement().tryTransition(m))
inferEvent->setIncluded();
pushElementCheck(m, inferEvent, 0);
if (!currentElement().tryTransition(e))
event->setIncluded();
pushElementCheck(e, event, netEnabling);
return;
}
if (missing.size() > 0) {
queueElementEvents(eventList);
Vector<StringC> missingNames;
for (size_t i = 0; i < missing.size(); i++)
missingNames.push_back(missing[i]->name());
message(ParserMessages::missingElementMultiple,
StringMessageArg(e->name()),
StringVectorMessageArg(missingNames));
pushElementCheck(e, event, netEnabling);
return;
}
if (!sd().omittag()
|| !currentElement().isFinished()
|| tagLevel() == 0
|| !currentElement().type()->definition()->canOmitEndTag())
break;
EndElementEvent *endEvent
= new (eventAllocator()) EndElementEvent(currentElement().type(),
currentDtdPointer(),
event->location(),
0);
eventList.insert(endEvent);
undoList.insert(new (internalAllocator()) UndoEndTag(popSaveElement()));
}
discardKeptMessages();
undo(undoList);
message(ParserMessages::elementNotAllowed, StringMessageArg(e->name()));
// If element couldn't occur because it was excluded, then
// do the transition here.
(void)currentElement().tryTransition(e);
pushElementCheck(e, event, netEnabling);
}
void Parser::findMissingTag(const ElementType *e,
Vector<const ElementType *> &v)
{
if (!currentElement().currentPosition()) {
if (!e)
v.push_back((const ElementType *)0);
return;
}
if (elementIsExcluded(e))
return;
size_t newSize = 0;
currentElement().matchState().possibleTransitions(v);
// FIXME also get currentInclusions
for (size_t i = 0; i < v.size(); i++) {
if (v[i] && !elementIsExcluded(v[i])) {
Boolean success = 0;
switch (v[i]->definition()->declaredContent()) {
case ElementDefinition::modelGroup:
{
const CompiledModelGroup *grp
= v[i]->definition()->compiledModelGroup();
MatchState state(grp);
if (!e) {
if (state.tryTransitionPcdata())
success = 1;
}
else {
if (state.tryTransition(e))
success = 1;
if (!success) {
for (size_t j = 0; j < v[i]->definition()->nInclusions(); j++)
if (v[i]->definition()->inclusion(j) == e) {
success = 1;
break;
}
}
if (success) {
for (size_t j = 0; j < v[i]->definition()->nExclusions(); j++)
if (v[i]->definition()->exclusion(j) == e) {
success = 0;
break;
}
}
}
}
break;
#if 0
case ElementDefinition::any:
success = 1;
break;
#endif
case ElementDefinition::cdata:
case ElementDefinition::rcdata:
if (e == 0)
success = 1;
break;
default:
break;
}
if (success)
v[newSize++] = v[i];
}
}
v.resize(newSize);
// Sort them according to the order of their occurrence in the DTD.
// Do an insertion sort.
for (size_t i = 1; i < v.size(); i++) {
const ElementType *tem = v[i];
size_t j;
for (j = i; j > 0 && v[j - 1]->index() > tem->index(); j--)
v[j] = v[j - 1];
v[j] = tem;
}
}
#if 0
// This produces messages that are too verbose
// This doesn't try to be very efficient.
// 0 for #pcdata
void Parser::getAllowedElementTypes(Vector<const ElementType *> &v)
{
v.clear();
// FIXME get a list of all inclusions first
// getCurrentInclusions(v);
// x says whether each element of v was excluded
Vector<PackedBoolean> x(v.size(), 0);
unsigned startImpliedCount = 0;
IList<Undo> undoList;
for (;;) {
if (currentElement().currentPosition()) {
// have a model group
size_t i = v.size();
currentElement().matchState().possibleTransitions(v);
x.resize(v.size());
for (size_t j = i; j < v.size(); j++)
x[j] = (v[j] && elementIsExcluded(v[j]));
if (!sd().omittag())
break;
// Try to imply a tag
if (currentElement().isFinished()) {
if (tagLevel() == 0)
break;
if (startImpliedCount)
break;
const ElementDefinition *def = currentElement().type()->definition();
if (def && def->canOmitEndTag())
undoList.insert(new (internalAllocator())
UndoEndTag(popSaveElement()));
else
break;
}
else {
const LeafContentToken *token = currentElement().impliedStartTag();
if (!token)
break;
const ElementType *e = token->elementType();
if (elementIsExcluded(e))
break;
const ElementDefinition *def = e->definition();
if (!def
|| def->undefined()
|| (def->declaredContent() != ElementDefinition::modelGroup
&& def->declaredContent() != ElementDefinition::any)
|| !def->canOmitStartTag())
break;
undoList.insert(new (internalAllocator()) UndoStartTag);
startImpliedCount++;
pushElement(new (internalAllocator()) OpenElement(e,
0,
0,
0,
Location()));
if (checkImplyLoop(startImpliedCount))
break;
for (size_t i = 0; i < def->nInclusions(); i++)
if (!elementIsExcluded(def->inclusion(i))) {
v.push_back(def->inclusion(i));
x.push_back(0);
}
}
}
else {
// must be allowed #pcdata
v.push_back((const ElementType *)0);
x.push_back((PackedBoolean)0);
break;
}
}
undo(undoList);
// Remove exclusions and duplicates and undefined
size_t newSize = 0;
for (size_t i = 0; i < v.size(); i++)
if (!x[i] && (!v[i] || !v[i]->definition()->undefined())) {
Boolean dup = 0;
for (size_t j = 0; j < newSize; j++)
if (v[i] == v[j]) {
dup = 1;
break;
}
if (!dup)
v[newSize++] = v[i];
}
v.resize(newSize);
}
#endif
#ifdef SP_NAMESPACE
}
#endif