parseSd.cxx revision 7c478bd95313f5f23a4c958a745db2134aa03244
// Copyright (c) 1994, 1995, 1997 James Clark
// See the file COPYING for copying permission.
#pragma ident "%Z%%M% %I% %E% SMI"
#include "splib.h"
#include "Parser.h"
#include "macros.h"
#include "SdFormalError.h"
#include "MessageBuilder.h"
#include "ParserMessages.h"
#include "MessageArg.h"
#include "CharsetRegistry.h"
#include "ISetIter.h"
#include "token.h"
#include "TokenMessageArg.h"
#include "constant.h"
#include "SdText.h"
#include "NumericCharRefOrigin.h"
#ifdef SP_NAMESPACE
namespace SP_NAMESPACE {
#endif
class CharSwitcher {
public:
CharSwitcher();
void addSwitch(WideChar from, WideChar to);
SyntaxChar subst(WideChar c);
size_t nSwitches() const;
Boolean switchUsed(size_t i) const;
WideChar switchFrom(size_t i) const;
WideChar switchTo(size_t i) const;
private:
Vector<PackedBoolean> switchUsed_;
Vector<WideChar> switches_;
};
// Information about the SGML declaration being built.
struct SdBuilder {
SdBuilder();
void addFormalError(const Location &, const MessageType1 &, const StringC &);
Ptr<Sd> sd;
Ptr<Syntax> syntax;
CharsetDecl syntaxCharsetDecl;
CharsetInfo syntaxCharset;
CharSwitcher switcher;
Boolean externalSyntax;
Boolean enr;
Boolean www;
Boolean valid;
Boolean external;
IList<SdFormalError> formalErrorList;
};
class CharsetMessageArg : public MessageArg {
public:
CharsetMessageArg(const ISet<WideChar> &set);
MessageArg *copy() const;
void append(MessageBuilder &) const;
private:
ISet<WideChar> set_;
};
struct SdParam {
typedef unsigned char Type;
enum {
invalid,
eE,
minimumLiteral,
mdc,
minus,
number,
capacityName,
name,
paramLiteral,
systemIdentifier,
generalDelimiterName,
referenceReservedName,
quantityName,
reservedName // Sd::ReservedName is added to this
};
Type type;
StringC token;
Text literalText;
String<SyntaxChar> paramLiteralText;
union {
Number n;
Sd::Capacity capacityIndex;
Syntax::Quantity quantityIndex;
Syntax::ReservedName reservedNameIndex;
Syntax::DelimGeneral delimGeneralIndex;
};
};
class AllowedSdParams {
public:
AllowedSdParams(SdParam::Type,
SdParam::Type = SdParam::invalid,
SdParam::Type = SdParam::invalid,
SdParam::Type = SdParam::invalid,
SdParam::Type = SdParam::invalid,
SdParam::Type = SdParam::invalid);
Boolean param(SdParam::Type) const;
SdParam::Type get(int i) const;
private:
enum { maxAllow = 6 };
SdParam::Type allow_[maxAllow];
};
class AllowedSdParamsMessageArg : public MessageArg {
public:
AllowedSdParamsMessageArg(const AllowedSdParams &allow,
const ConstPtr<Sd> &sd);
MessageArg *copy() const;
void append(MessageBuilder &) const;
private:
AllowedSdParams allow_;
ConstPtr<Sd> sd_;
};
struct StandardSyntaxSpec {
struct AddedFunction {
const char *name;
Syntax::FunctionClass functionClass;
SyntaxChar syntaxChar;
};
const AddedFunction *addedFunction;
size_t nAddedFunction;
Boolean shortref;
};
static StandardSyntaxSpec::AddedFunction coreFunctions[] = {
{ "TAB", Syntax::cSEPCHAR, 9 },
};
static StandardSyntaxSpec coreSyntax = {
coreFunctions, SIZEOF(coreFunctions), 0
};
static StandardSyntaxSpec refSyntax = {
coreFunctions, SIZEOF(coreFunctions), 1
};
void Parser::doInit()
{
if (cancelled()) {
allDone();
return;
}
// When document entity doesn't exist, don't give any errors
// other than the cannot open error.
if (currentInput()->get(messenger()) == InputSource::eE) {
if (currentInput()->accessError()) {
allDone();
return;
}
}
else
currentInput()->ungetToken();
const CharsetInfo &initCharset = sd().internalCharset();
ISet<WideChar> missing;
findMissingMinimum(initCharset, missing);
if (!missing.isEmpty()) {
message(ParserMessages::sdMissingCharacters, CharsetMessageArg(missing));
giveUp();
return;
}
Boolean found = 0;
StringC systemId;
if (scanForSgmlDecl(initCharset)) {
if (options().warnExplicitSgmlDecl)
message(ParserMessages::explicitSgmlDecl);
found = 1;
}
else {
currentInput()->ungetToken();
if (entityCatalog().sgmlDecl(initCharset, messenger(), systemId)) {
InputSource *in = entityManager().open(systemId,
sd().docCharset(),
InputSourceOrigin::make(),
0,
messenger());
if (in) {
pushInput(in);
if (scanForSgmlDecl(initCharset))
found = 1;
else {
message(ParserMessages::badDefaultSgmlDecl);
popInputStack();
}
}
}
}
if (found) {
if (startMarkup(eventsWanted().wantPrologMarkup(), currentLocation())) {
size_t nS = currentInput()->currentTokenLength() - 6;
for (size_t i = 0; i < nS; i++)
currentMarkup()->addS(currentInput()->currentTokenStart()[i]);
currentMarkup()->addDelim(Syntax::dMDO);
currentMarkup()->addSdReservedName(Sd::rSGML,
currentInput()->currentTokenStart()
+ (currentInput()->currentTokenLength() - 4),
4);
}
Syntax *syntaxp = new Syntax(sd());
CharSwitcher switcher;
if (!setStandardSyntax(*syntaxp, refSyntax, sd().internalCharset(), switcher, 0)) {
giveUp();
return;
}
syntaxp->implySgmlChar(sd());
setSyntax(syntaxp);
compileSdModes();
ConstPtr<Sd> refSd(sdPointer());
ConstPtr<Syntax> refSyntax(syntaxPointer());
if (!parseSgmlDecl()) {
giveUp();
return;
}
// queue an SGML declaration event
eventHandler().sgmlDecl(new (eventAllocator())
SgmlDeclEvent(sdPointer(),
syntaxPointer(),
instanceSyntaxPointer(),
refSd,
refSyntax,
currentInput()->nextIndex(),
systemId,
markupLocation(),
currentMarkup()));
if (inputLevel() == 2) {
// FIXME perhaps check for junk after SGML declaration
popInputStack();
}
}
else {
if (!implySgmlDecl()) {
giveUp();
return;
}
currentInput()->willNotSetDocCharset();
// queue an SGML declaration event
eventHandler().sgmlDecl(new (eventAllocator())
SgmlDeclEvent(sdPointer(),
syntaxPointer()));
}
// Now we have sd and syntax set up, prepare to parse the prolog.
compilePrologModes();
setPhase(prologPhase);
}
Boolean Parser::implySgmlDecl()
{
Syntax *syntaxp = new Syntax(sd());
const StandardSyntaxSpec *spec;
if (options().shortref)
spec = &refSyntax;
else
spec = &coreSyntax;
CharSwitcher switcher;
if (!setStandardSyntax(*syntaxp, *spec, sd().internalCharset(), switcher, 0))
return 0;
syntaxp->implySgmlChar(sd());
for (int i = 0; i < Syntax::nQuantity; i++)
syntaxp->setQuantity(i, options().quantity[i]);
setSyntax(syntaxp);
return 1;
}
Boolean Parser::setStandardSyntax(Syntax &syn,
const StandardSyntaxSpec &spec,
const CharsetInfo &internalCharset,
CharSwitcher &switcher,
Boolean www)
{
static UnivCharsetDesc::Range syntaxCharsetRanges[] = {
{ 0, 128, 0 },
};
static UnivCharsetDesc syntaxCharsetDesc(syntaxCharsetRanges,
SIZEOF(syntaxCharsetRanges));
static CharsetInfo syntaxCharset(syntaxCharsetDesc);
Boolean valid = 1;
if (!checkSwitches(switcher, syntaxCharset))
valid = 0;
size_t i;
for (i = 0; i < switcher.nSwitches(); i++)
if (switcher.switchTo(i) >= 128)
message(ParserMessages::switchNotInCharset,
NumberMessageArg(switcher.switchTo(i)));
static const Char shunchar[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
127, 255
};
for (i = 0; i < SIZEOF(shunchar); i++)
syn.addShunchar(shunchar[i]);
syn.setShuncharControls();
static Syntax::StandardFunction standardFunctions[3] = {
Syntax::fRE, Syntax::fRS, Syntax::fSPACE
};
static SyntaxChar functionChars[3] = { 13, 10, 32 };
for (i = 0; i < 3; i++) {
Char docChar;
if (translateSyntax(switcher,
syntaxCharset,
internalCharset,
functionChars[i],
docChar)
&& checkNotFunction(syn, docChar))
syn.setStandardFunction(standardFunctions[i], docChar);
else
valid = 0;
}
for (i = 0; i < spec.nAddedFunction; i++) {
Char docChar;
if (translateSyntax(switcher,
syntaxCharset,
internalCharset,
spec.addedFunction[i].syntaxChar,
docChar)
&& checkNotFunction(syn, docChar))
syn.addFunctionChar(internalCharset.execToDesc(spec.addedFunction[i].name),
spec.addedFunction[i].functionClass,
docChar);
else
valid = 0;
}
static SyntaxChar nameChars[2] = { 45, 46 }; // '-' '.'
ISet<Char> nameCharSet;
for (i = 0; i < 2; i++) {
Char docChar;
if (translateSyntax(switcher,
syntaxCharset,
internalCharset,
nameChars[i],
docChar))
nameCharSet.add(docChar);
else
valid = 0;
}
if (!checkNmchars(nameCharSet, syn))
valid = 0;
else
syn.addNameCharacters(nameCharSet);
syn.setNamecaseGeneral(1);
syn.setNamecaseEntity(0);
if (!setRefDelimGeneral(syn, syntaxCharset, internalCharset, switcher))
valid = 0;
setRefNames(syn, internalCharset, www);
syn.enterStandardFunctionNames();
if (spec.shortref
&& !addRefDelimShortref(syn, syntaxCharset, internalCharset, switcher))
valid = 0;
return valid;
}
Boolean Parser::setRefDelimGeneral(Syntax &syntax,
const CharsetInfo &syntaxCharset,
const CharsetInfo &internalCharset,
CharSwitcher &switcher)
{
// Column 3 from Figure 3
static const char delims[][2] = {
{ 38 },
{ 45, 45 },
{ 38, 35 },
{ 93 },
{ 91 },
{ 93 },
{ 91 },
{ 38 },
{ 60, 47 },
{ 41 },
{ 40 },
{ 0 }, // HCRO
{ 34 },
{ 39 },
{ 62 },
{ 60, 33 },
{ 45 },
{ 93, 93 },
{ 47 },
{ 47 }, // NESTC
{ 63 },
{ 124 },
{ 37 },
{ 62 },
{ 60, 63 },
{ 43 },
{ 59 },
{ 42 },
{ 35 },
{ 44 },
{ 60 },
{ 62 },
{ 61 },
};
Boolean valid = 1;
ISet<WideChar> missing;
for (int i = 0; i < Syntax::nDelimGeneral; i++)
if (syntax.delimGeneral(i).size() == 0) {
StringC delim;
size_t j;
for (j = 0; j < 2 && delims[i][j] != '\0'; j++) {
UnivChar univChar = translateUniv(delims[i][j], switcher,
syntaxCharset);
Char c;
if (univToDescCheck(internalCharset, univChar, c))
delim += c;
else {
missing += univChar;
valid = 0;
}
}
if (delim.size() == j) {
if (checkGeneralDelim(syntax, delim))
syntax.setDelimGeneral(i, delim);
else
valid = 0;
}
}
if (!missing.isEmpty())
message(ParserMessages::missingSignificant646, CharsetMessageArg(missing));
return valid;
}
void Parser::setRefNames(Syntax &syntax, const CharsetInfo &internalCharset,
Boolean www)
{
static const char *const referenceNames[] = {
"ALL",
"ANY",
"ATTLIST",
"CDATA",
"CONREF",
"CURRENT",
"DATA",
"DEFAULT",
"DOCTYPE",
"ELEMENT",
"EMPTY",
"ENDTAG",
"ENTITIES",
"ENTITY",
"FIXED",
"ID",
"IDLINK",
"IDREF",
"IDREFS",
"IGNORE",
"IMPLICIT",
"IMPLIED",
"INCLUDE",
"INITIAL",
"LINK",
"LINKTYPE",
"MD",
"MS",
"NAME",
"NAMES",
"NDATA",
"NMTOKEN",
"NMTOKENS",
"NOTATION",
"NUMBER",
"NUMBERS",
"NUTOKEN",
"NUTOKENS",
"O",
"PCDATA",
"PI",
"POSTLINK",
"PUBLIC",
"RCDATA",
"RE",
"REQUIRED",
"RESTORE",
"RS",
"SDATA",
"SHORTREF",
"SIMPLE",
"SPACE",
"STARTTAG",
"SUBDOC",
"SYSTEM",
"TEMP",
"USELINK",
"USEMAP"
};
for (int i = 0; i < Syntax::nNames; i++) {
switch (i) {
case Syntax::rDATA:
case Syntax::rIMPLICIT:
if (!www)
break;
// fall through
case Syntax::rALL:
if (!www && options().errorAfdr)
break;
// fall through
default:
{
StringC docName(internalCharset.execToDesc(referenceNames[i]));
Syntax::ReservedName tem;
if (syntax.lookupReservedName(docName, &tem))
message(ParserMessages::nameReferenceReservedName,
StringMessageArg(docName));
if (syntax.reservedName(Syntax::ReservedName(i)).size() == 0)
syntax.setName(i, docName);
break;
}
}
}
}
Boolean Parser::addRefDelimShortref(Syntax &syntax,
const CharsetInfo &syntaxCharset,
const CharsetInfo &internalCharset,
CharSwitcher &switcher)
{
// Column 2 from Figure 4
static const char delimShortref[][3] = {
{ 9 },
{ 13 },
{ 10 },
{ 10, 66 },
{ 10, 13 },
{ 10, 66, 13 },
{ 66, 13 },
{ 32 },
{ 66, 66 },
{ 34 },
{ 35 },
{ 37 },
{ 39 },
{ 40 },
{ 41 },
{ 42 },
{ 43 },
{ 44 },
{ 45 },
{ 45, 45 },
{ 58 },
{ 59 },
{ 61 },
{ 64 },
{ 91 },
{ 93 },
{ 94 },
{ 95 },
{ 123 },
{ 124 },
{ 125 },
{ 126 },
};
ISet<WideChar> missing;
for (size_t i = 0; i < SIZEOF(delimShortref); i++) {
StringC delim;
size_t j;
for (j = 0; j < 3 && delimShortref[i][j] != '\0'; j++) {
Char c;
UnivChar univChar = translateUniv(delimShortref[i][j], switcher,
syntaxCharset);
if (univToDescCheck(internalCharset, univChar, c))
delim += c;
else
missing += univChar;
}
if (delim.size() == j) {
if (switcher.nSwitches() > 0 && syntax.isValidShortref(delim))
message(ParserMessages::duplicateDelimShortref,
StringMessageArg(delim));
else
syntax.addDelimShortref(delim, internalCharset);
}
}
if (!missing.isEmpty())
message(ParserMessages::missingSignificant646, CharsetMessageArg(missing));
return 1;
}
// Determine whether the document starts with an SGML declaration.
// There is no current syntax at this point.
Boolean Parser::scanForSgmlDecl(const CharsetInfo &initCharset)
{
Char rs;
if (!univToDescCheck(initCharset, UnivCharsetDesc::rs, rs))
return 0;
Char re;
if (!univToDescCheck(initCharset, UnivCharsetDesc::re, re))
return 0;
Char space;
if (!univToDescCheck(initCharset, UnivCharsetDesc::space, space))
return 0;
Char tab;
if (!univToDescCheck(initCharset, UnivCharsetDesc::tab, tab))
return 0;
InputSource *in = currentInput();
Xchar c = in->get(messenger());
while (c == rs || c == space || c == re || c == tab)
c = in->tokenChar(messenger());
if (c != initCharset.execToDesc('<'))
return 0;
if (in->tokenChar(messenger()) != initCharset.execToDesc('!'))
return 0;
c = in->tokenChar(messenger());
if (c != initCharset.execToDesc('S')
&& c != initCharset.execToDesc('s'))
return 0;
c = in->tokenChar(messenger());
if (c != initCharset.execToDesc('G')
&& c != initCharset.execToDesc('g'))
return 0;
c = in->tokenChar(messenger());
if (c != initCharset.execToDesc('M')
&& c != initCharset.execToDesc('m'))
return 0;
c = in->tokenChar(messenger());
if (c != initCharset.execToDesc('L')
&& c != initCharset.execToDesc('l'))
return 0;
c = in->tokenChar(messenger());
// Don't recognize this if SGML is followed by a name character.
if (c == InputSource::eE)
return 1;
in->endToken(in->currentTokenLength() - 1);
if (c == initCharset.execToDesc('-'))
return 0;
if (c == initCharset.execToDesc('.'))
return 0;
UnivChar univ;
if (!initCharset.descToUniv(c, univ))
return 1;
if (UnivCharsetDesc::a <= univ && univ < UnivCharsetDesc::a + 26)
return 0;
if (UnivCharsetDesc::A <= univ && univ < UnivCharsetDesc::A + 26)
return 0;
if (UnivCharsetDesc::zero <= univ && univ < UnivCharsetDesc::zero + 10)
return 0;
return 1;
}
void Parser::findMissingMinimum(const CharsetInfo &charset,
ISet<WideChar> &missing)
{
Char to;
size_t i;
for (i = 0; i < 26; i++) {
if (!univToDescCheck(charset, UnivCharsetDesc::A + i, to))
missing += UnivCharsetDesc::A + i;
if (!univToDescCheck(charset, UnivCharsetDesc::a + i, to))
missing += UnivCharsetDesc::a + i;
}
for (i = 0; i < 10; i++) {
Char to;
if (!univToDescCheck(charset, UnivCharsetDesc::zero + i, to))
missing += UnivCharsetDesc::zero + i;
}
static const UnivChar special[] = {
39, 40, 41, 43, 44, 45, 46, 47, 58, 61, 63
};
for (i = 0; i < SIZEOF(special); i++)
if (!univToDescCheck(charset, special[i], to))
missing += special[i];
}
Boolean Parser::parseSgmlDecl()
{
SdParam parm;
SdBuilder sdBuilder;
if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral, SdParam::name), parm))
return 0;
if (parm.type == SdParam::name) {
sdBuilder.external = 1;
Location loc(currentLocation());
StringC name;
parm.token.swap(name);
ExternalId externalId;
if (!sdParseSgmlDeclRef(sdBuilder, parm, externalId))
return 0;
ExternalEntity *entity
= new ExternalTextEntity(name, EntityDecl::sgml, loc, externalId);
ConstPtr<Entity> entityPtr(entity);
entity->generateSystemId(*this);
if (entity->externalId().effectiveSystemId().size() == 0) {
message(ParserMessages::cannotGenerateSystemIdSgml);
return 0;
}
Ptr<EntityOrigin> origin(EntityOrigin::make(internalAllocator(), entityPtr, loc));
if (currentMarkup())
currentMarkup()->addEntityStart(origin);
pushInput(entityManager().open(entity->externalId().effectiveSystemId(),
sd().docCharset(),
origin.pointer(),
0,
messenger()));
if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
return 0;
}
StringC version(sd().execToInternal("ISO 8879:1986"));
StringC enrVersion(sd().execToInternal("ISO 8879:1986 (ENR)"));
StringC wwwVersion(sd().execToInternal("ISO 8879:1986 (WWW)"));
if (parm.literalText.string() == enrVersion)
sdBuilder.enr = 1;
else if (parm.literalText.string() == wwwVersion) {
sdBuilder.enr = 1;
sdBuilder.www = 1;
}
else if (parm.literalText.string() != version)
message(ParserMessages::standardVersion,
StringMessageArg(parm.literalText.string()));
if (sdBuilder.external && !sdBuilder.www)
message(ParserMessages::sgmlDeclRefRequiresWww);
sdBuilder.sd = new Sd(entityManagerPtr());
if (sdBuilder.www)
sdBuilder.sd->setWww(1);
typedef Boolean (Parser::*SdParser)(SdBuilder &, SdParam &);
static SdParser parsers[] = {
&Parser::sdParseDocumentCharset,
&Parser::sdParseCapacity,
&Parser::sdParseScope,
&Parser::sdParseSyntax,
&Parser::sdParseFeatures,
&Parser::sdParseAppinfo,
&Parser::sdParseSeealso,
};
for (size_t i = 0; i < SIZEOF(parsers); i++) {
if (!(this->*(parsers[i]))(sdBuilder, parm))
return 0;
if (!sdBuilder.valid)
return 0;
}
setSdOverrides(*sdBuilder.sd);
if (sdBuilder.sd->formal()) {
while (!sdBuilder.formalErrorList.empty()) {
SdFormalError *p = sdBuilder.formalErrorList.get();
ParserState *state = this; // work around lcc 3.0 bug
p->send(*state);
delete p;
}
}
setSd(sdBuilder.sd.pointer());
currentInput()->setDocCharset(sd().docCharset(), entityManager().charset());
if (sdBuilder.sd->scopeInstance()) {
Syntax *proSyntax = new Syntax(sd());
CharSwitcher switcher;
setStandardSyntax(*proSyntax, refSyntax, sd().internalCharset(), switcher, sdBuilder.www);
proSyntax->setSgmlChar(*sdBuilder.syntax->charSet(Syntax::sgmlChar));
ISet<WideChar> invalidSgmlChar;
proSyntax->checkSgmlChar(*sdBuilder.sd,
sdBuilder.syntax.pointer(),
1, // get results in document character set
invalidSgmlChar);
sdBuilder.syntax->checkSgmlChar(*sdBuilder.sd,
proSyntax,
1, // get results in document character set
invalidSgmlChar);
if (!invalidSgmlChar.isEmpty())
message(ParserMessages::invalidSgmlChar, CharsetMessageArg(invalidSgmlChar));
setSyntaxes(proSyntax, sdBuilder.syntax.pointer());
}
else
setSyntax(sdBuilder.syntax.pointer());
if (syntax().multicode())
currentInput()->setMarkupScanTable(syntax().markupScanTable());
return 1;
}
Boolean Parser::sdParseSgmlDeclRef(SdBuilder &sdBuilder, SdParam &parm,
ExternalId &id)
{
id.setLocation(currentLocation());
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSYSTEM,
SdParam::reservedName + Sd::rPUBLIC,
SdParam::mdc),
parm))
return 0;
if (parm.type == SdParam::mdc)
return 1;
if (parm.type == SdParam::reservedName + Sd::rPUBLIC) {
if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
return 0;
const MessageType1 *err;
PublicId::TextClass textClass;
if (!id.setPublic(parm.literalText, sd().internalCharset(), syntax().space(), err))
sdBuilder.addFormalError(currentLocation(), *err, id.publicId()->string());
else if (id.publicId()->getTextClass(textClass)
&& textClass != PublicId::SD)
sdBuilder.addFormalError(currentLocation(),
ParserMessages::sdTextClass,
id.publicId()->string());
}
if (!parseSdParam(AllowedSdParams(SdParam::systemIdentifier, SdParam::mdc), parm))
return 0;
if (parm.type == SdParam::mdc)
return 1;
id.setSystem(parm.literalText);
return parseSdParam(AllowedSdParams(SdParam::mdc), parm);
}
Boolean Parser::sdParseDocumentCharset(SdBuilder &sdBuilder, SdParam &parm)
{
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rCHARSET),
parm))
return 0;
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rBASESET),
parm))
return 0;
CharsetDecl decl;
UnivCharsetDesc desc;
if (!sdParseCharset(sdBuilder, parm, 1, decl, desc))
return 0;
ISet<WideChar> missing;
findMissingMinimum(desc, missing);
if (!missing.isEmpty()) {
message(ParserMessages::missingMinimumChars,
CharsetMessageArg(missing));
return 0;
}
ISet<Char> sgmlChar;
decl.usedSet(sgmlChar);
sdBuilder.sd->setDocCharsetDesc(desc);
sdBuilder.sd->setDocCharsetDecl(decl);
sdBuilder.syntax = new Syntax(*sdBuilder.sd);
if (sd().internalCharsetIsDocCharset())
sdBuilder.syntax->setSgmlChar(sgmlChar);
else {
ISet<Char> internalSgmlChar;
translateDocSet(sdBuilder.sd->docCharset(), sdBuilder.sd->internalCharset(),
sgmlChar, internalSgmlChar);
sdBuilder.syntax->setSgmlChar(internalSgmlChar);
}
return 1;
}
void Parser::translateDocSet(const CharsetInfo &fromCharset,
const CharsetInfo &toCharset,
const ISet<Char> &fromSet,
ISet<Char> &toSet)
{
ISetIter<Char> iter(fromSet);
Char min, max;
while (iter.next(min, max)) {
do {
UnivChar univChar;
Char internalChar;
WideChar count2, alsoMax;
if (!fromCharset.descToUniv(min, univChar, alsoMax)) {
if (alsoMax >= max)
break;
min = alsoMax;
}
else {
// FIXME better not to use univToDescCheck here
// Maybe OK if multiple internal chars corresponding to doc char
int nMap = univToDescCheck(toCharset, univChar, internalChar, count2);
if (alsoMax > max)
alsoMax = max;
if (alsoMax - min > count2 - 1)
alsoMax = min + (count2 - 1);
if (nMap)
toSet.addRange(internalChar, internalChar + (alsoMax - min));
min = alsoMax;
}
} while (min++ != max);
}
}
Boolean Parser::sdParseCharset(SdBuilder &sdBuilder,
SdParam &parm,
Boolean isDocument,
CharsetDecl &decl,
UnivCharsetDesc &desc)
{
decl.clear();
ISet<WideChar> multiplyDeclared;
// This is for checking whether the syntax reference character set
// is ISO 646 when SCOPE is INSTANCE.
Boolean maybeISO646 = 1;
do {
if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
return 0;
UnivCharsetDesc baseDesc;
PublicId id;
Boolean found;
PublicId::TextClass textClass;
const MessageType1 *err;
if (!id.init(parm.literalText, sd().internalCharset(), syntax().space(), err))
sdBuilder.addFormalError(currentLocation(),
*err,
id.string());
else if (id.getTextClass(textClass)
&& textClass != PublicId::CHARSET)
sdBuilder.addFormalError(currentLocation(),
ParserMessages::basesetTextClass,
id.string());
Boolean givenError;
if (referencePublic(id, PublicId::CHARSET, givenError))
found = sdParseExternalCharset(*sdBuilder.sd, baseDesc);
else if (!givenError) {
found = 0;
PublicId::OwnerType ownerType;
if (id.getOwnerType(ownerType) && ownerType == PublicId::ISO) {
StringC sequence;
if (id.getDesignatingSequence(sequence)) {
CharsetRegistry::ISORegistrationNumber number
= CharsetRegistry::getRegistrationNumber(sequence, sd().internalCharset());
if (number != CharsetRegistry::UNREGISTERED) {
Owner<CharsetRegistry::Iter> iter(CharsetRegistry::makeIter(number));
if (iter) {
found = 1;
WideChar min;
WideChar max;
UnivChar univ;
while (iter->next(min, max, univ))
baseDesc.addRange(min, max, univ);
}
}
}
}
if (!found)
message(ParserMessages::unknownBaseset, StringMessageArg(id.string()));
}
else
found = 0;
if (!found)
maybeISO646 = 0;
decl.addSection(id);
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rDESCSET),
parm))
return 0;
if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
return 0;
do {
WideChar min = parm.n;
if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
return 0;
Number count = parm.n;
Number adjCount;
if (options().warnSgmlDecl && count == 0)
message(ParserMessages::zeroNumberOfCharacters);
decl.rangeDeclared(min, count, multiplyDeclared);
if (isDocument
&& count > 0
&& (min > charMax || count - 1 > charMax - min)) {
message(ParserMessages::documentCharMax, NumberMessageArg(charMax));
adjCount = min > charMax ? 0 : 1 + (charMax - min);
maybeISO646 = 0;
}
else
adjCount = count;
if (!parseSdParam(AllowedSdParams(SdParam::number,
SdParam::minimumLiteral,
SdParam::reservedName + Sd::rUNUSED),
parm))
return 0;
switch (parm.type) {
case SdParam::number:
decl.addRange(min, count, parm.n);
if (found && adjCount > 0) {
ISet<WideChar> baseMissing;
desc.addBaseRange(baseDesc, min, min + (adjCount - 1), parm.n,
baseMissing);
if (!baseMissing.isEmpty() && options().warnSgmlDecl)
message(ParserMessages::basesetCharsMissing,
CharsetMessageArg(baseMissing));
}
break;
case SdParam::reservedName + Sd::rUNUSED:
decl.addRange(min, count);
break;
case SdParam::minimumLiteral:
{
UnivChar c = charNameToUniv(*sdBuilder.sd, parm.literalText.string());
if (adjCount > 256) {
message(ParserMessages::tooManyCharsMinimumLiteral);
adjCount = 256;
}
for (Number i = 0; i < adjCount; i++)
desc.addRange(min + i, min + i, c);
}
maybeISO646 = 0;
decl.addRange(min, count, parm.literalText.string());
break;
default:
CANNOT_HAPPEN();
}
SdParam::Type follow = (isDocument
? SdParam::reservedName + Sd::rCAPACITY
: SdParam::reservedName + Sd::rFUNCTION);
if (!parseSdParam(AllowedSdParams(SdParam::number,
SdParam::reservedName + Sd::rBASESET,
follow),
parm))
return 0;
} while (parm.type == SdParam::number);
} while (parm.type == SdParam::reservedName + Sd::rBASESET);
if (!multiplyDeclared.isEmpty())
message(ParserMessages::duplicateCharNumbers,
CharsetMessageArg(multiplyDeclared));
ISet<WideChar> declaredSet;
decl.declaredSet(declaredSet);
ISetIter<WideChar> iter(declaredSet);
WideChar min, max, lastMax;
if (iter.next(min, max)) {
ISet<WideChar> holes;
lastMax = max;
while (iter.next(min, max)) {
if (min - lastMax > 1)
holes.addRange(lastMax + 1, min - 1);
lastMax = max;
}
if (!holes.isEmpty())
message(ParserMessages::codeSetHoles, CharsetMessageArg(holes));
}
if (!isDocument && sdBuilder.sd->scopeInstance()) {
// If scope is INSTANCE, syntax reference character set
// must be same as reference.
UnivCharsetDescIter iter(desc);
WideChar descMin, descMax;
UnivChar univMin;
Char nextDescMin = 0;
while (maybeISO646) {
if (!iter.next(descMin, descMax, univMin)) {
if (nextDescMin != 128)
maybeISO646 = 0;
break;
}
if (descMin != nextDescMin || univMin != descMin)
maybeISO646 = 0;
nextDescMin = descMax + 1;
}
if (!maybeISO646)
message(ParserMessages::scopeInstanceSyntaxCharset);
}
return 1;
}
Boolean Parser::sdParseExternalCharset(Sd &sd, UnivCharsetDesc &desc)
{
SdParam parm;
for (;;) {
if (!parseSdParam(AllowedSdParams(SdParam::number, SdParam::eE),
parm))
break;
if (parm.type == SdParam::eE)
return 1;
WideChar min = parm.n;
if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
break;
Number count = parm.n;
if (!parseSdParam(AllowedSdParams(SdParam::number,
SdParam::minimumLiteral,
SdParam::reservedName + Sd::rUNUSED),
parm))
break;
if (parm.type == SdParam::number) {
if (count > 0)
desc.addRange(min, min + (count - 1), parm.n);
}
else if (parm.type == SdParam::minimumLiteral) {
UnivChar c = charNameToUniv(sd, parm.literalText.string());
if (count > 256) {
message(ParserMessages::tooManyCharsMinimumLiteral);
count = 256;
}
for (Number i = 0; i < count; i++)
desc.addRange(min + i, min + i, c);
}
}
popInputStack();
return 0;
}
UnivChar Parser::charNameToUniv(Sd &sd, const StringC &name)
{
UnivChar univ;
if (entityCatalog().lookupChar(name, sd.internalCharset(), messenger(), univ))
return univ;
else
return sd.nameToUniv(name);
}
Boolean Parser::sdParseCapacity(SdBuilder &sdBuilder, SdParam &parm)
{
if (!parseSdParam(sdBuilder.www
? AllowedSdParams(SdParam::reservedName + Sd::rNONE,
SdParam::reservedName + Sd::rPUBLIC,
SdParam::reservedName + Sd::rSGMLREF)
: AllowedSdParams(SdParam::reservedName + Sd::rPUBLIC,
SdParam::reservedName + Sd::rSGMLREF),
parm))
return 0;
#if _MSC_VER == 1100
// Workaround for Visual C++ 5.0 bug
int
#else
Boolean
#endif
pushed = 0;
if (parm.type == SdParam::reservedName + Sd::rNONE)
return parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSCOPE),
parm);
if (parm.type == SdParam::reservedName + Sd::rPUBLIC) {
if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
return 0;
PublicId id;
PublicId::TextClass textClass;
const MessageType1 *err;
if (!id.init(parm.literalText, sd().internalCharset(), syntax().space(), err))
sdBuilder.addFormalError(currentLocation(),
*err,
id.string());
else if (id.getTextClass(textClass)
&& textClass != PublicId::CAPACITY)
sdBuilder.addFormalError(currentLocation(),
ParserMessages::capacityTextClass,
id.string());
const StringC &str = id.string();
if (str != sd().execToInternal("ISO 8879-1986//CAPACITY Reference//EN")
&& str != sd().execToInternal("ISO 8879:1986//CAPACITY Reference//EN")) {
Boolean givenError;
if (referencePublic(id, PublicId::CAPACITY, givenError))
pushed = 1;
else if (!givenError)
message(ParserMessages::unknownCapacitySet, StringMessageArg(str));
}
if (!pushed)
return parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSCOPE),
parm);
}
PackedBoolean capacitySpecified[Sd::nCapacity];
int i;
for (i = 0; i < Sd::nCapacity; i++)
capacitySpecified[i] = 0;
if (!parseSdParam(AllowedSdParams(SdParam::capacityName), parm))
return 0;
do {
Sd::Capacity capacityIndex = parm.capacityIndex;
if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
return 0;
if (!capacitySpecified[capacityIndex]) {
sdBuilder.sd->setCapacity(capacityIndex, parm.n);
capacitySpecified[capacityIndex] = 1;
}
else if (options().warnSgmlDecl)
message(ParserMessages::duplicateCapacity,
StringMessageArg(sd().capacityName(i)));
int final = pushed ? int(SdParam::eE) : SdParam::reservedName + Sd::rSCOPE;
if (!parseSdParam(AllowedSdParams(SdParam::capacityName, final),
parm))
return 0;
} while (parm.type == SdParam::capacityName);
Number totalcap = sdBuilder.sd->capacity(0);
for (i = 1; i < Sd::nCapacity; i++)
if (sdBuilder.sd->capacity(i) > totalcap)
message(ParserMessages::capacityExceedsTotalcap,
StringMessageArg(sd().capacityName(i)));
if (pushed)
return parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSCOPE),
parm);
return 1;
}
Boolean Parser::referencePublic(const PublicId &id,
PublicId::TextClass entityType,
Boolean &givenError)
{
givenError = 0;
StringC sysid;
if (entityCatalog().lookupPublic(id.string(),
sd().internalCharset(),
messenger(),
sysid)) {
Location loc = currentLocation();
eventHandler().sgmlDeclEntity(new (eventAllocator())
SgmlDeclEntityEvent(id,
entityType,
sysid,
loc));
Ptr<EntityOrigin> origin(EntityOrigin::make(internalAllocator(),
ConstPtr<Entity>(0),
loc));
if (currentMarkup())
currentMarkup()->addEntityStart(origin);
InputSource *in = entityManager().open(sysid,
sd().docCharset(),
origin.pointer(),
0,
messenger());
if (!in) {
givenError = 1;
return 0;
}
pushInput(in);
return 1;
}
return 0;
}
Boolean Parser::sdParseScope(SdBuilder &sdBuilder, SdParam &parm)
{
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rINSTANCE,
SdParam::reservedName + Sd::rDOCUMENT),
parm))
return 0;
if (parm.type == SdParam::reservedName + Sd::rINSTANCE)
sdBuilder.sd->setScopeInstance();
return 1;
}
Boolean Parser::sdParseSyntax(SdBuilder &sdBuilder, SdParam &parm)
{
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSYNTAX),
parm))
return 0;
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSHUNCHAR,
SdParam::reservedName + Sd::rPUBLIC),
parm))
return 0;
if (parm.type == SdParam::reservedName + Sd::rPUBLIC) {
if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
return 0;
PublicId id;
const MessageType1 *err;
PublicId::TextClass textClass;
if (!id.init(parm.literalText, sd().internalCharset(), syntax().space(), err))
sdBuilder.addFormalError(currentLocation(),
*err,
id.string());
else if (id.getTextClass(textClass)
&& textClass != PublicId::SYNTAX)
sdBuilder.addFormalError(currentLocation(),
ParserMessages::syntaxTextClass,
id.string());
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rFEATURES,
SdParam::reservedName + Sd::rSWITCHES),
parm))
return 0;
Vector<UnivChar> charSwitches;
if (parm.type == SdParam::reservedName + Sd::rSWITCHES) {
if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
return 0;
for (;;) {
SyntaxChar c = parm.n;
if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
return 0;
sdBuilder.switcher.addSwitch(c, parm.n);
if (!parseSdParam(AllowedSdParams(SdParam::number,
SdParam::reservedName
+ Sd::rFEATURES),
parm))
return 0;
if (parm.type != SdParam::number)
break;
}
}
const StandardSyntaxSpec *spec = lookupSyntax(id);
if (spec) {
if (!setStandardSyntax(*sdBuilder.syntax,
*spec,
sdBuilder.sd->internalCharset(),
sdBuilder.switcher,
sdBuilder.www))
sdBuilder.valid = 0;
}
else {
Boolean givenError;
if (referencePublic(id, PublicId::SYNTAX, givenError)) {
sdBuilder.externalSyntax = 1;
SdParam parm2;
if (!parseSdParam(AllowedSdParams(SdParam::reservedName
+ Sd::rSHUNCHAR),
parm2))
return 0;
if (!sdParseExplicitSyntax(sdBuilder, parm2))
return 0;
}
else {
if (!givenError)
message(ParserMessages::unknownPublicSyntax,
StringMessageArg(id.string()));
sdBuilder.valid = 0;
}
}
}
else {
if (!sdParseExplicitSyntax(sdBuilder, parm))
return 0;
}
if (!sdBuilder.sd->scopeInstance()) {
// we know the significant chars now
ISet<WideChar> invalidSgmlChar;
sdBuilder.syntax->checkSgmlChar(*sdBuilder.sd,
0,
1,
invalidSgmlChar);
if (!invalidSgmlChar.isEmpty())
message(ParserMessages::invalidSgmlChar, CharsetMessageArg(invalidSgmlChar));
}
checkSyntaxNamelen(*sdBuilder.syntax);
checkSwitchesMarkup(sdBuilder.switcher);
return 1;
}
Boolean Parser::sdParseExplicitSyntax(SdBuilder &sdBuilder,
SdParam &parm)
{
typedef Boolean (Parser::*SdParser)(SdBuilder &, SdParam &);
static SdParser parsers[] = {
&Parser::sdParseShunchar,
&Parser::sdParseSyntaxCharset,
&Parser::sdParseFunction,
&Parser::sdParseNaming,
&Parser::sdParseDelim,
&Parser::sdParseNames,
&Parser::sdParseQuantity
};
for (size_t i = 0; i < SIZEOF(parsers); i++)
if (!(this->*(parsers[i]))(sdBuilder, parm))
return 0;
return 1;
}
const StandardSyntaxSpec *Parser::lookupSyntax(const PublicId &id)
{
PublicId::OwnerType ownerType;
if (!id.getOwnerType(ownerType) || ownerType != PublicId::ISO)
return 0;
StringC str;
if (!id.getOwner(str))
return 0;
if (str != sd().execToInternal("ISO 8879:1986")
&& str != sd().execToInternal("ISO 8879-1986"))
return 0;
PublicId::TextClass textClass;
if (!id.getTextClass(textClass) || textClass != PublicId::SYNTAX)
return 0;
if (!id.getDescription(str))
return 0;
if (str == sd().execToInternal("Reference"))
return &refSyntax;
if (str == sd().execToInternal("Core"))
return &coreSyntax;
return 0;
}
Boolean Parser::sdParseSyntaxCharset(SdBuilder &sdBuilder, SdParam &parm)
{
UnivCharsetDesc desc;
if (!sdParseCharset(sdBuilder, parm, 0, sdBuilder.syntaxCharsetDecl, desc))
return 0;
sdBuilder.syntaxCharset.set(desc);
checkSwitches(sdBuilder.switcher, sdBuilder.syntaxCharset);
for (size_t i = 0; i < sdBuilder.switcher.nSwitches(); i++)
if (!sdBuilder.syntaxCharsetDecl.charDeclared(sdBuilder.switcher.switchTo(i)))
message(ParserMessages::switchNotInCharset,
NumberMessageArg(sdBuilder.switcher.switchTo(i)));
ISet<WideChar> missing;
findMissingMinimum(sdBuilder.syntaxCharset, missing);
if (!missing.isEmpty())
message(ParserMessages::missingMinimumChars,
CharsetMessageArg(missing));
return 1;
}
Boolean Parser::sdParseShunchar(SdBuilder &sdBuilder, SdParam &parm)
{
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNONE,
SdParam::reservedName + Sd::rCONTROLS,
SdParam::number), parm))
return 0;
if (parm.type == SdParam::reservedName + Sd::rNONE) {
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rBASESET),
parm))
return 0;
return 1;
}
if (parm.type == SdParam::reservedName + Sd::rCONTROLS)
sdBuilder.syntax->setShuncharControls();
else {
if (parm.n <= charMax)
sdBuilder.syntax->addShunchar(Char(parm.n));
}
for (;;) {
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rBASESET,
SdParam::number), parm))
return 0;
if (parm.type != SdParam::number)
break;
if (parm.n <= charMax)
sdBuilder.syntax->addShunchar(Char(parm.n));
}
return 1;
}
Boolean Parser::sdParseFunction(SdBuilder &sdBuilder, SdParam &parm)
{
static Sd::ReservedName standardNames[3] = {
Sd::rRE, Sd::rRS, Sd::rSPACE
};
for (int i = 0; i < 3; i++) {
if (!parseSdParam(AllowedSdParams(SdParam::reservedName
+ standardNames[i]),
parm))
return 0;
if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
return 0;
Char c;
if (translateSyntax(sdBuilder, parm.n, c)) {
if (checkNotFunction(*sdBuilder.syntax, c))
sdBuilder.syntax->setStandardFunction(Syntax::StandardFunction(i), c);
else
sdBuilder.valid = 0;
}
}
Boolean haveMsichar = 0;
Boolean haveMsochar = 0;
for (;;) {
if (!parseSdParam(sdBuilder.externalSyntax
? AllowedSdParams(SdParam::name, SdParam::paramLiteral)
: AllowedSdParams(SdParam::name),
parm))
return 0;
Boolean nameWasLiteral;
size_t nameMarkupIndex;
if (currentMarkup())
nameMarkupIndex = currentMarkup()->size() - 1;
Boolean invalidName = 0;
StringC name;
if (parm.type == SdParam::paramLiteral) {
nameWasLiteral = 1;
if (!translateSyntax(sdBuilder, parm.paramLiteralText, name))
invalidName = 1;
}
else {
parm.token.swap(name);
nameWasLiteral = 0;
}
if (!parseSdParam(nameWasLiteral
? AllowedSdParams(SdParam::reservedName + Sd::rFUNCHAR,
SdParam::reservedName + Sd::rMSICHAR,
SdParam::reservedName + Sd::rMSOCHAR,
SdParam::reservedName + Sd::rMSSCHAR,
SdParam::reservedName + Sd::rSEPCHAR)
: AllowedSdParams(SdParam::reservedName + Sd::rFUNCHAR,
SdParam::reservedName + Sd::rMSICHAR,
SdParam::reservedName + Sd::rMSOCHAR,
SdParam::reservedName + Sd::rMSSCHAR,
SdParam::reservedName + Sd::rSEPCHAR,
SdParam::reservedName + Sd::rLCNMSTRT),
parm))
return 0;
if (parm.type == SdParam::reservedName + Sd::rLCNMSTRT) {
if (name != sd().reservedName(Sd::rNAMING))
message(ParserMessages::namingBeforeLcnmstrt,
StringMessageArg(name));
else if (currentMarkup())
currentMarkup()->changeToSdReservedName(nameMarkupIndex, Sd::rNAMING);
break;
}
if (!nameWasLiteral) {
StringC tem;
name.swap(tem);
if (!translateName(sdBuilder, tem, name))
invalidName = 1;
}
Syntax::FunctionClass functionClass;
switch (parm.type) {
case SdParam::reservedName + Sd::rFUNCHAR:
functionClass = Syntax::cFUNCHAR;
break;
case SdParam::reservedName + Sd::rMSICHAR:
haveMsichar = 1;
functionClass = Syntax::cMSICHAR;
break;
case SdParam::reservedName + Sd::rMSOCHAR:
haveMsochar = 1;
functionClass = Syntax::cMSOCHAR;
break;
case SdParam::reservedName + Sd::rMSSCHAR:
functionClass = Syntax::cMSSCHAR;
break;
case SdParam::reservedName + Sd::rSEPCHAR:
functionClass = Syntax::cSEPCHAR;
break;
default:
CANNOT_HAPPEN();
}
if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
return 0;
Char c;
if (translateSyntax(sdBuilder, parm.n, c)
&& checkNotFunction(*sdBuilder.syntax, c)
&& !invalidName) {
Char tem;
if (sdBuilder.syntax->lookupFunctionChar(name, &tem))
message(ParserMessages::duplicateFunctionName, StringMessageArg(name));
else
sdBuilder.syntax->addFunctionChar(name, functionClass, c);
}
}
if (haveMsochar && !haveMsichar)
message(ParserMessages::msocharRequiresMsichar);
return 1;
}
Boolean Parser::sdParseNaming(SdBuilder &sdBuilder, SdParam &parm)
{
static Sd::ReservedName keys[6] = {
Sd::rUCNMSTRT, Sd::rNAMESTRT, Sd::rLCNMCHAR, Sd::rUCNMCHAR, Sd::rNAMECHAR,
Sd::rNAMECASE
};
int isNamechar = 0;
ISet<Char> nameStartChar;
ISet<Char> nameChar;
do {
String<SyntaxChar> lc;
Vector<size_t> rangeIndex;
enum PrevParam {
paramNone,
paramNumber,
paramOther
} prevParam = paramNone;
for (;;) {
switch (prevParam) {
case paramNone:
if (!parseSdParam(AllowedSdParams(SdParam::paramLiteral, SdParam::number),
parm))
return 0;
break;
case paramNumber:
if (!parseSdParam(AllowedSdParams(SdParam::reservedName
+ keys[isNamechar * 3],
SdParam::paramLiteral,
SdParam::number,
SdParam::minus),
parm))
return 0;
break;
case paramOther:
if (!parseSdParam(AllowedSdParams(SdParam::reservedName
+ keys[isNamechar * 3],
SdParam::paramLiteral,
SdParam::number),
parm))
return 0;
break;
}
switch (parm.type) {
case SdParam::paramLiteral:
if (prevParam == paramNone)
break;
// fall through
case SdParam::number:
if (!sdBuilder.externalSyntax && !sdBuilder.enr) {
message(ParserMessages::enrRequired);
sdBuilder.enr = 1;
}
break;
default:
break;
}
prevParam = (parm.type == SdParam::number ? paramNumber : paramOther);
if (parm.type == SdParam::minus) {
if (!parseSdParam(AllowedSdParams(SdParam::number),
parm))
return 0;
if (parm.n < lc[lc.size() - 1])
message(ParserMessages::sdInvalidRange);
else {
if (parm.n > lc[lc.size() - 1] + 1)
rangeIndex.push_back(lc.size() - 1);
lc += SyntaxChar(parm.n);
}
}
else {
sdParamConvertToLiteral(parm);
if (parm.type != SdParam::paramLiteral)
break;
lc += parm.paramLiteralText;
}
}
size_t lcPos = 0;
size_t rangeIndexPos = 0;
unsigned long rangeLeft = 0;
SyntaxChar nextRangeChar;
ISet<Char> &set = isNamechar ? nameChar : nameStartChar;
String<SyntaxChar> chars;
Boolean runOut = 0;
prevParam = paramNone;
for (;;) {
switch (prevParam) {
case paramNone:
if (!parseSdParam(AllowedSdParams(SdParam::paramLiteral, SdParam::number),
parm))
return 0;
break;
case paramNumber:
if (!parseSdParam(AllowedSdParams(SdParam::reservedName
+ keys[isNamechar * 3 + 1],
SdParam::reservedName
+ keys[isNamechar * 3 + 2],
SdParam::paramLiteral,
SdParam::number,
SdParam::minus),
parm))
return 0;
break;
case paramOther:
if (!parseSdParam(AllowedSdParams(SdParam::reservedName
+ keys[isNamechar * 3 + 1],
SdParam::reservedName
+ keys[isNamechar * 3 + 2],
SdParam::paramLiteral,
SdParam::number),
parm))
return 0;
break;
}
switch (parm.type) {
case SdParam::paramLiteral:
if (prevParam == paramNone)
break;
// fall through
case SdParam::number:
if (!sdBuilder.externalSyntax && !sdBuilder.enr) {
message(ParserMessages::enrRequired);
sdBuilder.enr = 1;
}
break;
default:
break;
}
prevParam = (parm.type == SdParam::number ? paramNumber : paramOther);
if (parm.type == SdParam::minus) {
if (!parseSdParam(AllowedSdParams(SdParam::number),
parm))
return 0;
ASSERT(chars.size() == 1);
SyntaxChar start = chars[0];
SyntaxChar end = parm.n;
if (start > end)
message(ParserMessages::sdInvalidRange);
else {
size_t count = end + 1 - start;
while (count > 0) {
if (rangeLeft == 0
&& rangeIndexPos < rangeIndex.size()
&& rangeIndex[rangeIndexPos] == lcPos) {
rangeLeft = 1 + lc[lcPos + 1] - lc[lcPos];
nextRangeChar = lc[lcPos];
lcPos += 2;
rangeIndexPos += 1;
}
Char c;
if (rangeLeft > 0) {
rangeLeft--;
c = nextRangeChar++;
}
else if (lcPos < lc.size())
c = lc[lcPos++];
else {
c = start;
runOut = 1;
}
if (c == start && count > 1 && (runOut || rangeLeft > 0)) {
size_t n;
if (runOut)
n = count;
else if (rangeLeft < count) {
// rangeLeft + 1 <= count
n = rangeLeft + 1;
rangeLeft = 0;
}
else {
// count < rangeLeft + 1
n = count;
rangeLeft -= n - 1;
nextRangeChar += n - 1;
}
translateRange(sdBuilder, start, start + (n - 1), set);
count -= n;
start += n;
}
else {
Char transLc, transUc;
if (translateSyntax(sdBuilder, c, transLc)
&& translateSyntax(sdBuilder, start, transUc)) {
set.add(transLc);
if (transLc != transUc) {
set.add(transUc);
sdBuilder.syntax->addSubst(transLc, transUc);
}
}
count--;
start++;
}
}
}
chars.resize(0);
}
else {
for (size_t i = 0; i < chars.size(); i++) {
if (rangeLeft == 0
&& rangeIndexPos < rangeIndex.size()
&& rangeIndex[rangeIndexPos] == lcPos) {
rangeLeft = 1 + lc[lcPos + 1] - lc[lcPos];
nextRangeChar = lc[lcPos];
lcPos += 2;
rangeIndexPos += 1;
}
Char c;
if (rangeLeft > 0) {
rangeLeft--;
c = nextRangeChar++;
}
else if (lcPos < lc.size())
c = lc[lcPos++];
else {
runOut = 1;
c = chars[i];
}
// map from c to chars[i]
Char transLc, transUc;
if (translateSyntax(sdBuilder, c, transLc)
&& translateSyntax(sdBuilder, chars[i], transUc)) {
set.add(transLc);
if (transLc != transUc) {
set.add(transUc);
sdBuilder.syntax->addSubst(transLc, transUc);
}
}
}
sdParamConvertToLiteral(parm);
if (parm.type != SdParam::paramLiteral)
break;
parm.paramLiteralText.swap(chars);
}
}
if ((runOut && !sdBuilder.externalSyntax)
|| rangeLeft > 0 || lcPos < lc.size())
message(isNamechar
? ParserMessages::nmcharLength
: ParserMessages::nmstrtLength);
if (parm.type == SdParam::reservedName + keys[isNamechar * 3 + 1]) {
if (!sdBuilder.externalSyntax && !sdBuilder.enr) {
message(ParserMessages::enrRequired);
sdBuilder.enr = 1;
}
prevParam = paramNone;
for (;;) {
switch (prevParam) {
case paramNone:
if (!parseSdParam(AllowedSdParams(SdParam::paramLiteral, SdParam::number),
parm))
return 0;
break;
case paramNumber:
if (!parseSdParam(AllowedSdParams(SdParam::reservedName
+ keys[isNamechar * 3 + 2],
SdParam::paramLiteral,
SdParam::number,
SdParam::minus),
parm))
return 0;
break;
case paramOther:
if (!parseSdParam(AllowedSdParams(SdParam::reservedName
+ keys[isNamechar * 3 + 2],
SdParam::paramLiteral,
SdParam::number),
parm))
return 0;
break;
}
prevParam = (parm.type == SdParam::number ? paramNumber : paramOther);
if (parm.type == SdParam::minus) {
SyntaxChar prevNumber = parm.n;
if (!parseSdParam(AllowedSdParams(SdParam::number),
parm))
return 0;
if (parm.n < prevNumber)
message(ParserMessages::sdInvalidRange);
else if (parm.n > prevNumber)
translateRange(sdBuilder, prevNumber + 1, parm.n, set);
}
else {
sdParamConvertToLiteral(parm);
if (parm.type != SdParam::paramLiteral)
break;
for (size_t i = 0; i < parm.paramLiteralText.size(); i++) {
Char trans;
if (translateSyntax(sdBuilder, parm.paramLiteralText[i], trans))
set.add(trans);
}
}
}
}
if (!checkNmchars(set, *sdBuilder.syntax))
sdBuilder.valid = 0;
} while (!isNamechar++);
ISet<WideChar> bad;
intersectCharSets(nameStartChar, nameChar, bad);
if (!bad.isEmpty()) {
sdBuilder.valid = 0;
message(ParserMessages::nmcharNmstrt, CharsetMessageArg(bad));
}
sdBuilder.syntax->addNameStartCharacters(nameStartChar);
sdBuilder.syntax->addNameCharacters(nameChar);
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rGENERAL),
parm))
return 0;
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNO,
SdParam::reservedName + Sd::rYES),
parm))
return 0;
sdBuilder.syntax->setNamecaseGeneral(parm.type
== SdParam::reservedName + Sd::rYES);
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rENTITY),
parm))
return 0;
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNO,
SdParam::reservedName + Sd::rYES),
parm))
return 0;
sdBuilder.syntax->setNamecaseEntity(parm.type
== SdParam::reservedName + Sd::rYES);
return 1;
}
Boolean Parser::checkNmchars(const ISet<Char> &set, const Syntax &syntax)
{
Boolean valid = 1;
ISet<WideChar> bad;
intersectCharSets(set, *syntax.charSet(Syntax::nameStart), bad);
if (!bad.isEmpty()) {
message(ParserMessages::nmcharLetter, CharsetMessageArg(bad));
valid = 0;
bad.clear();
}
intersectCharSets(set, *syntax.charSet(Syntax::digit), bad);
if (!bad.isEmpty()) {
message(ParserMessages::nmcharDigit, CharsetMessageArg(bad));
valid = 0;
bad.clear();
}
Char funChar;
if (syntax.getStandardFunction(Syntax::fRE, funChar)
&& set.contains(funChar)) {
message(ParserMessages::nmcharRe, NumberMessageArg(funChar));
valid = 0;
}
if (syntax.getStandardFunction(Syntax::fRS, funChar)
&& set.contains(funChar)) {
message(ParserMessages::nmcharRs, NumberMessageArg(funChar));
valid = 0;
}
if (syntax.getStandardFunction(Syntax::fSPACE, funChar)
&& set.contains(funChar)) {
message(ParserMessages::nmcharSpace, NumberMessageArg(funChar));
valid = 0;
}
intersectCharSets(set, *syntax.charSet(Syntax::sepchar), bad);
if (!bad.isEmpty()) {
message(ParserMessages::nmcharSepchar, CharsetMessageArg(bad));
valid = 0;
}
return valid;
}
// Result is a ISet<WideChar>, so it can be used with CharsetMessageArg.
void Parser::intersectCharSets(const ISet<Char> &s1, const ISet<Char> &s2,
ISet<WideChar> &inter)
{
ISetIter<Char> i1(s1);
ISetIter<Char> i2(s2);
Char min1, max1, min2, max2;
if (!i1.next(min1, max1))
return;
if (!i2.next(min2, max2))
return;
for (;;) {
if (max1 < min2) {
if (!i1.next(min1, max1))
break;
}
else if (max2 < min1) {
if (!i2.next(min2, max2))
break;
}
else {
// min2 <= max1
// min1 <= max2
Char min = min1 > min2 ? min1 : min2;
Char max = max1 < max2 ? max1 : max2;
inter.addRange(min, max);
if (max2 > max) {
if (!i1.next(min1, max1))
break;
}
else {
if (!i2.next(min2, max2))
break;
}
}
}
}
Boolean Parser::sdParseDelim(SdBuilder &sdBuilder, SdParam &parm)
{
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rDELIM),
parm))
return 0;
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rGENERAL),
parm))
return 0;
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF),
parm))
return 0;
PackedBoolean delimGeneralSpecified[Syntax::nDelimGeneral];
for (int i = 0; i < Syntax::nDelimGeneral; i++)
delimGeneralSpecified[i] = 0;
for (;;) {
if (!parseSdParam(AllowedSdParams(SdParam::generalDelimiterName,
SdParam::reservedName + Sd::rSHORTREF),
parm))
return 0;
if (parm.type == SdParam::reservedName + Sd::rSHORTREF)
break;
Syntax::DelimGeneral delimGeneral = parm.delimGeneralIndex;
if (delimGeneralSpecified[delimGeneral])
message(ParserMessages::duplicateDelimGeneral,
StringMessageArg(sd().generalDelimiterName(delimGeneral)));
switch (delimGeneral) {
case Syntax::dHCRO:
case Syntax::dNESTC:
requireWWW(sdBuilder);
break;
default:
break;
}
if (!parseSdParam(sdBuilder.externalSyntax
? AllowedSdParams(SdParam::paramLiteral,
SdParam::number)
: AllowedSdParams(SdParam::paramLiteral),
parm))
return 0;
sdParamConvertToLiteral(parm);
StringC str;
if (parm.paramLiteralText.size() == 0)
message(ParserMessages::sdEmptyDelimiter);
else if (translateSyntax(sdBuilder, parm.paramLiteralText, str)) {
const SubstTable<Char> *table = sdBuilder.syntax->generalSubstTable();
for (size_t i = 0; i < str.size(); i++)
table->subst(str[i]);
if (checkGeneralDelim(*sdBuilder.syntax, str)
&& !delimGeneralSpecified[delimGeneral])
sdBuilder.syntax->setDelimGeneral(delimGeneral, str);
else
sdBuilder.valid = 0;
}
delimGeneralSpecified[delimGeneral] = 1;
}
if (sdBuilder.syntax->delimGeneral(Syntax::dNET).size()
&& !sdBuilder.syntax->delimGeneral(Syntax::dNESTC).size())
sdBuilder.syntax->setDelimGeneral(Syntax::dNESTC,
sdBuilder.syntax->delimGeneral(Syntax::dNET));
if (!setRefDelimGeneral(*sdBuilder.syntax,
sdBuilder.syntaxCharset,
sdBuilder.sd->internalCharset(),
sdBuilder.switcher))
sdBuilder.valid = 0;
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF,
SdParam::reservedName + Sd::rNONE),
parm))
return 0;
if (parm.type == SdParam::reservedName + Sd::rSGMLREF) {
if (!addRefDelimShortref(*sdBuilder.syntax,
sdBuilder.syntaxCharset,
sdBuilder.sd->internalCharset(),
sdBuilder.switcher))
sdBuilder.valid = 0;
}
String<SyntaxChar> lastLiteral;
for (;;) {
if (!parseSdParam(sdBuilder.externalSyntax
? AllowedSdParams(SdParam::paramLiteral,
SdParam::number,
SdParam::minus,
SdParam::reservedName + Sd::rNAMES)
: AllowedSdParams(SdParam::paramLiteral,
SdParam::reservedName + Sd::rNAMES),
parm))
return 0;
sdParamConvertToLiteral(parm);
if (parm.type == SdParam::minus) {
if (!parseSdParam(AllowedSdParams(SdParam::paramLiteral,
SdParam::number),
parm))
return 0;
sdParamConvertToLiteral(parm);
if (parm.paramLiteralText.size() == 0)
message(ParserMessages::sdEmptyDelimiter);
else if (lastLiteral.size() != 1
|| parm.paramLiteralText.size() != 1)
message(ParserMessages::sdRangeNotSingleChar);
else if (parm.paramLiteralText[0] < lastLiteral[0])
message(ParserMessages::sdInvalidRange);
else if (parm.paramLiteralText[0] != lastLiteral[0]) {
ISet<Char> shortrefChars;
translateRange(sdBuilder,
lastLiteral[0] + 1,
parm.paramLiteralText[0],
shortrefChars);
ISet<WideChar> duplicates;
intersectCharSets(shortrefChars,
sdBuilder.syntax->delimShortrefSimple(),
duplicates);
int nComplexShortrefs = sdBuilder.syntax->nDelimShortrefComplex();
for (int i = 0; i < nComplexShortrefs; i++) {
const StringC &delim = sdBuilder.syntax->delimShortrefComplex(i);
if (delim.size() == 1 && shortrefChars.contains(delim[0]))
duplicates.add(delim[0]);
}
if (!duplicates.isEmpty())
message(ParserMessages::duplicateDelimShortrefSet,
CharsetMessageArg(duplicates));
sdBuilder.syntax->addDelimShortrefs(shortrefChars,
sdBuilder.sd->internalCharset());
}
lastLiteral.resize(0);
}
else if (parm.type == SdParam::paramLiteral) {
parm.paramLiteralText.swap(lastLiteral);
StringC str;
if (lastLiteral.size() == 0)
message(ParserMessages::sdEmptyDelimiter);
else if (translateSyntax(sdBuilder, lastLiteral, str)) {
const SubstTable<Char> *table = sdBuilder.syntax->generalSubstTable();
for (size_t i = 0; i < str.size(); i++)
table->subst(str[i]);
if (str.size() == 1
|| checkShortrefDelim(*sdBuilder.syntax,
sdBuilder.sd->internalCharset(),
str)) {
if (sdBuilder.syntax->isValidShortref(str))
message(ParserMessages::duplicateDelimShortref,
StringMessageArg(str));
else
sdBuilder.syntax->addDelimShortref(str,
sdBuilder.sd->internalCharset());
}
}
}
else
break;
}
return 1;
}
Boolean Parser::sdParseNames(SdBuilder &sdBuilder, SdParam &parm)
{
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF),
parm))
return 0;
for (;;) {
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rQUANTITY,
SdParam::referenceReservedName),
parm))
return 0;
if (parm.type == SdParam::reservedName + Sd::rQUANTITY)
break;
Syntax::ReservedName reservedName = parm.reservedNameIndex;
switch (reservedName) {
case Syntax::rALL:
case Syntax::rDATA:
case Syntax::rIMPLICIT:
requireWWW(sdBuilder);
break;
default:
break;
}
if (!parseSdParam(sdBuilder.externalSyntax
? AllowedSdParams(SdParam::name, SdParam::paramLiteral)
: AllowedSdParams(SdParam::name),
parm))
return 0;
StringC transName;
if (parm.type == SdParam::name
? translateName(sdBuilder, parm.token, transName)
: translateSyntax(sdBuilder, parm.paramLiteralText, transName)) {
Syntax::ReservedName tem;
if (sdBuilder.syntax->lookupReservedName(transName, &tem))
message(ParserMessages::ambiguousReservedName,
StringMessageArg(transName));
else {
if (transName.size() == 0
|| !sdBuilder.syntax->isNameStartCharacter(transName[0])) {
message(ParserMessages::reservedNameSyntax,
StringMessageArg(transName));
transName.resize(0);
}
size_t i;
// Check that its a valid name in the declared syntax
// (- and . might not be name characters).
for (i = 1; i < transName.size(); i++)
if (!sdBuilder.syntax->isNameCharacter(transName[i])) {
message(ParserMessages::reservedNameSyntax,
StringMessageArg(transName));
transName.resize(0);
break;
}
for (i = 0; i < transName.size(); i++)
sdBuilder.syntax->generalSubstTable()->subst(transName[i]);
if (sdBuilder.syntax->reservedName(reservedName).size() > 0)
message(ParserMessages::duplicateReservedName,
StringMessageArg(syntax().reservedName(reservedName)));
else if (transName.size() > 0)
sdBuilder.syntax->setName(reservedName, transName);
else
sdBuilder.valid = 0;
}
}
}
setRefNames(*sdBuilder.syntax, sdBuilder.sd->internalCharset(), sdBuilder.www);
static Syntax::ReservedName functionNameIndex[3] = {
Syntax::rRE, Syntax::rRS, Syntax::rSPACE
};
for (int i = 0; i < 3; i++) {
const StringC &functionName
= sdBuilder.syntax->reservedName(functionNameIndex[i]);
Char tem;
if (sdBuilder.syntax->lookupFunctionChar(functionName, &tem))
message(ParserMessages::duplicateFunctionName, StringMessageArg(functionName));
}
sdBuilder.syntax->enterStandardFunctionNames();
return 1;
}
Boolean Parser::sdParseQuantity(SdBuilder &sdBuilder, SdParam &parm)
{
if (!parseSdParam(sdBuilder.www
? AllowedSdParams(SdParam::reservedName + Sd::rNONE,
SdParam::reservedName + Sd::rSGMLREF)
: AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF),
parm))
return 0;
int final = (sdBuilder.externalSyntax
? int(SdParam::eE)
: SdParam::reservedName + Sd::rFEATURES);
if (parm.type == SdParam::reservedName + Sd::rNONE) {
for (int i = 0; i < Syntax::nQuantity; i++) {
if (i != Syntax::qNORMSEP)
sdBuilder.syntax->setQuantity(Syntax::Quantity(i), Syntax::unlimited);
}
if (!parseSdParam(AllowedSdParams(final, SdParam::reservedName + Sd::rENTITIES), parm))
return 0;
}
else {
for (;;) {
if (!parseSdParam(sdBuilder.www
? AllowedSdParams(SdParam::quantityName,
final,
SdParam::reservedName + Sd::rENTITIES)
: AllowedSdParams(SdParam::quantityName, final),
parm))
return 0;
if (parm.type != SdParam::quantityName)
break;
Syntax::Quantity quantity = parm.quantityIndex;
if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
return 0;
sdBuilder.syntax->setQuantity(quantity, parm.n);
}
if (sdBuilder.sd->scopeInstance()) {
for (int i = 0; i < Syntax::nQuantity; i++)
if (sdBuilder.syntax->quantity(Syntax::Quantity(i))
< syntax().quantity(Syntax::Quantity(i)))
message(ParserMessages::scopeInstanceQuantity,
StringMessageArg(sd().quantityName(Syntax::Quantity(i))));
}
}
if (parm.type == SdParam::reservedName + Sd::rENTITIES)
return sdParseEntities(sdBuilder, parm);
else
return 1;
}
Boolean Parser::sdParseEntities(SdBuilder &sdBuilder, SdParam &parm)
{
int final = (sdBuilder.externalSyntax
? int(SdParam::eE)
: SdParam::reservedName + Sd::rFEATURES);
for (;;) {
if (!parseSdParam(AllowedSdParams(final, SdParam::paramLiteral), parm))
return 0;
if (parm.type != SdParam::paramLiteral)
break;
StringC name;
if (!translateSyntax(sdBuilder, parm.paramLiteralText, name))
name.resize(0);
else if (name.size() == 0
|| !sdBuilder.syntax->isNameStartCharacter(name[0])) {
message(ParserMessages::entityNameSyntax, StringMessageArg(name));
name.resize(0);
}
else {
// Check that its a valid name in the declared syntax
for (size_t i = 1; i < name.size(); i++)
if (!sdBuilder.syntax->isNameCharacter(name[i])) {
message(ParserMessages::entityNameSyntax, StringMessageArg(name));
name.resize(0);
break;
}
}
if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
return 0;
Char c;
if (translateSyntax(sdBuilder, parm.n, c) && name.size())
sdBuilder.syntax->addEntity(name, c);
}
return 1;
}
Boolean Parser::sdParseFeatures(SdBuilder &sdBuilder, SdParam &parm)
{
struct FeatureInfo {
Sd::ReservedName name;
enum {
none,
boolean,
number,
netenabl
} arg;
};
static FeatureInfo features[] = {
{ Sd::rMINIMIZE, FeatureInfo::none },
{ Sd::rDATATAG, FeatureInfo::boolean },
{ Sd::rOMITTAG, FeatureInfo::boolean },
{ Sd::rRANK, FeatureInfo::boolean },
{ Sd::rSHORTTAG, FeatureInfo::none },
{ Sd::rSTARTTAG, FeatureInfo::none },
{ Sd::rEMPTY, FeatureInfo::boolean },
{ Sd::rUNCLOSED, FeatureInfo::boolean },
{ Sd::rNETENABL, FeatureInfo::netenabl },
{ Sd::rENDTAG, FeatureInfo::none },
{ Sd::rEMPTY, FeatureInfo::boolean },
{ Sd::rUNCLOSED, FeatureInfo::boolean },
{ Sd::rATTRIB, FeatureInfo::none },
{ Sd::rDEFAULT, FeatureInfo::boolean },
{ Sd::rOMITNAME, FeatureInfo::boolean },
{ Sd::rVALUE, FeatureInfo::boolean },
{ Sd::rEMPTYNRM, FeatureInfo::boolean },
{ Sd::rIMPLYDEF, FeatureInfo::none },
{ Sd::rATTLIST, FeatureInfo::boolean },
{ Sd::rDOCTYPE, FeatureInfo::boolean },
{ Sd::rELEMENT, FeatureInfo::boolean },
{ Sd::rENTITY, FeatureInfo::boolean },
{ Sd::rNOTATION, FeatureInfo::boolean },
{ Sd::rLINK, FeatureInfo::none },
{ Sd::rSIMPLE, FeatureInfo::number },
{ Sd::rIMPLICIT, FeatureInfo::boolean },
{ Sd::rEXPLICIT, FeatureInfo::number },
{ Sd::rOTHER, FeatureInfo::none },
{ Sd::rCONCUR, FeatureInfo::number },
{ Sd::rSUBDOC, FeatureInfo::number },
{ Sd::rFORMAL, FeatureInfo::boolean },
{ Sd::rURN, FeatureInfo::boolean },
{ Sd::rKEEPRSRE, FeatureInfo::boolean },
{ Sd::rVALIDITY, FeatureInfo::none },
};
int booleanFeature = 0;
int numberFeature = 0;
for (size_t i = 0; i < SIZEOF(features); i++) {
switch (features[i].name) {
case Sd::rSTARTTAG:
// SHORTTAG
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSTARTTAG,
SdParam::reservedName + Sd::rNO,
SdParam::reservedName + Sd::rYES),
parm))
return 0;
if (parm.type == SdParam::reservedName + Sd::rSTARTTAG)
break;
sdBuilder.sd->setShorttag(parm.type == SdParam::reservedName + Sd::rYES);
while (features[++i].name != Sd::rEMPTYNRM)
if (features[i].arg == FeatureInfo::boolean)
booleanFeature++;
// fall through
case Sd::rEMPTYNRM:
if (!parseSdParam(AllowedSdParams(SdParam::reservedName
+ features[i].name,
SdParam::reservedName
+ features[i + 7].name), parm))
return 0;
if (parm.type == SdParam::reservedName + features[i].name)
requireWWW(sdBuilder);
else {
booleanFeature += 6;
i += 7;
}
break;
case Sd::rURN:
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + features[i].name,
SdParam::reservedName + Sd::rAPPINFO), parm))
return 0;
if (parm.type == SdParam::reservedName + Sd::rAPPINFO)
return 1;
requireWWW(sdBuilder);
break;
default:
if (!parseSdParam(AllowedSdParams(SdParam::reservedName
+ features[i].name), parm))
return 0;
break;
}
switch (features[i].arg) {
case FeatureInfo::number:
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNO,
SdParam::reservedName + Sd::rYES),
parm))
return 0;
if (parm.type == SdParam::reservedName + Sd::rYES) {
if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
return 0;
sdBuilder.sd->setNumberFeature(Sd::NumberFeature(numberFeature++),
parm.n);
}
else
sdBuilder.sd->setNumberFeature(Sd::NumberFeature(numberFeature++),
0);
break;
case FeatureInfo::netenabl:
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNO,
SdParam::reservedName + Sd::rIMMEDNET,
SdParam::reservedName + Sd::rALL),
parm))
return 0;
switch (parm.type) {
case SdParam::reservedName + Sd::rNO:
sdBuilder.sd->setStartTagNetEnable(Sd::netEnableNo);
break;
case SdParam::reservedName + Sd::rIMMEDNET:
sdBuilder.sd->setStartTagNetEnable(Sd::netEnableImmednet);
break;
case SdParam::reservedName + Sd::rALL:
sdBuilder.sd->setStartTagNetEnable(Sd::netEnableAll);
break;
}
break;
case FeatureInfo::boolean:
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNO,
SdParam::reservedName + Sd::rYES),
parm))
return 0;
switch (features[i].name) {
#if 0
case Sd::rDATATAG:
if (parm.type == SdParam::reservedName + Sd::rYES)
message(ParserMessages::datatagNotImplemented);
break;
#endif
case Sd::rEMPTYNRM:
if (parm.type == SdParam::reservedName + Sd::rNO
&& sdBuilder.sd->startTagNetEnable() == Sd::netEnableImmednet) {
message(ParserMessages::immednetRequiresEmptynrm);
sdBuilder.valid = 0;
}
break;
}
sdBuilder.sd->setBooleanFeature(Sd::BooleanFeature(booleanFeature++),
parm.type == (SdParam::reservedName
+ Sd::rYES));
break;
}
}
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNOASSERT,
SdParam::reservedName + Sd::rTYPE),
parm))
return 0;
switch (parm.type) {
case SdParam::reservedName + Sd::rNOASSERT:
sdBuilder.sd->setTypeValid(0);
break;
case SdParam::reservedName + Sd::rTYPE:
sdBuilder.sd->setTypeValid(1);
break;
}
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rENTITIES), parm))
return 0;
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNOASSERT,
SdParam::reservedName + Sd::rREF),
parm))
return 0;
if (parm.type == SdParam::reservedName + Sd::rNOASSERT) {
sdBuilder.sd->setIntegrallyStored(0);
sdBuilder.sd->setEntityRef(Sd::entityRefAny);
}
else {
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNONE,
SdParam::reservedName + Sd::rINTERNAL,
SdParam::reservedName + Sd::rANY),
parm))
return 0;
switch (parm.type) {
case SdParam::reservedName + Sd::rNONE:
sdBuilder.sd->setEntityRef(Sd::entityRefNone);
break;
case SdParam::reservedName + Sd::rINTERNAL:
sdBuilder.sd->setEntityRef(Sd::entityRefInternal);
break;
case SdParam::reservedName + Sd::rANY:
sdBuilder.sd->setEntityRef(Sd::entityRefAny);
break;
}
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rINTEGRAL), parm))
return 0;
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNO,
SdParam::reservedName + Sd::rYES),
parm))
return 0;
sdBuilder.sd->setIntegrallyStored(parm.type == (SdParam::reservedName + Sd::rYES));
}
return parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rAPPINFO),
parm);
}
Boolean Parser::sdParseAppinfo(SdBuilder &, SdParam &parm)
{
Location location(currentLocation());
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNONE,
SdParam::minimumLiteral),
parm))
return 0;
AppinfoEvent *event;
if (parm.type == SdParam::minimumLiteral)
event = new (eventAllocator()) AppinfoEvent(parm.literalText, location);
else
event = new (eventAllocator()) AppinfoEvent(location);
eventHandler().appinfo(event);
return 1;
}
Boolean Parser::sdParseSeealso(SdBuilder &sdBuilder, SdParam &parm)
{
SdParam::Type final = sdBuilder.external ? SdParam::eE : SdParam::mdc;
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSEEALSO, final), parm))
return 0;
if (parm.type == final)
return 1;
requireWWW(sdBuilder);
if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral,
SdParam::reservedName + Sd::rNONE), parm))
return 0;
if (parm.type == SdParam::reservedName + Sd::rNONE)
return parseSdParam(AllowedSdParams(final), parm);
do {
if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral, final), parm))
return 0;
} while (parm.type != final);
return 1;
}
Boolean Parser::translateSyntax(CharSwitcher &switcher,
const CharsetInfo &syntaxCharset,
const CharsetInfo &internalCharset,
WideChar syntaxChar,
Char &docChar)
{
syntaxChar = switcher.subst(syntaxChar);
UnivChar univChar;
if (syntaxCharset.descToUniv(syntaxChar, univChar)
&& univToDescCheck(internalCharset, univChar, docChar))
return 1;
message(sd().internalCharsetIsDocCharset()
? ParserMessages::translateSyntaxCharDoc
: ParserMessages::translateSyntaxCharInternal,
NumberMessageArg(syntaxChar));
return 0;
}
void Parser::translateRange(SdBuilder &sdBuilder, SyntaxChar start,
SyntaxChar end, ISet<Char> &chars)
{
#if 0
do {
Char docChar;
if (!translateSyntax(sdBuilder, start, docChar))
break;
chars.add(docChar);
} while (start++ != end);
#endif
for (;;) {
SyntaxChar doneUpTo = end;
Boolean gotSwitch = 0;
WideChar firstSwitch;
for (size_t i = 0; i < sdBuilder.switcher.nSwitches(); i++) {
WideChar c = sdBuilder.switcher.switchFrom(i);
if (start <= c && c <= end) {
if (!gotSwitch) {
gotSwitch = 1;
firstSwitch = c;
}
else if (c < firstSwitch)
firstSwitch = c;
}
}
if (gotSwitch && firstSwitch == start) {
doneUpTo = start;
Char docChar;
if (translateSyntax(sdBuilder, start, docChar))
chars.add(docChar);
}
else {
if (gotSwitch)
doneUpTo = firstSwitch - 1;
Char docChar;
Number count;
if (translateSyntaxNoSwitch(sdBuilder, start, docChar, count)) {
if (count - 1 < doneUpTo - start)
doneUpTo = start + (count - 1);
chars.addRange(docChar, docChar + (doneUpTo - start));
}
}
if (doneUpTo == end)
break;
start = doneUpTo + 1;
}
}
Boolean Parser::translateSyntax(SdBuilder &sdBuilder,
WideChar syntaxChar, Char &docChar)
{
Number count;
return translateSyntaxNoSwitch(sdBuilder,
sdBuilder.switcher.subst(syntaxChar),
docChar,
count);
}
Boolean Parser::translateSyntaxNoSwitch(SdBuilder &sdBuilder,
WideChar syntaxChar, Char &docChar,
Number &count)
{
Number n;
StringC str;
CharsetDeclRange::Type type;
const PublicId *id;
if (sdBuilder.sd->internalCharsetIsDocCharset()
&& sdBuilder.syntaxCharsetDecl.getCharInfo(syntaxChar,
id,
type,
n,
str,
count)) {
ISet<WideChar> docChars;
switch (type) {
case CharsetDeclRange::unused:
break;
case CharsetDeclRange::string:
sdBuilder.sd->docCharsetDecl().stringToChar(str, docChars);
break;
case CharsetDeclRange::number:
{
Number count2;
sdBuilder.sd->docCharsetDecl().numberToChar(id, n, docChars, count2);
if (!docChars.isEmpty() && count2 < count)
count = count2;
}
break;
default:
CANNOT_HAPPEN();
}
if (!docChars.isEmpty()) {
if (!docChars.isSingleton() && options().warnSgmlDecl)
message(ParserMessages::ambiguousDocCharacter,
CharsetMessageArg(docChars));
ISetIter<WideChar> iter(docChars);
WideChar min, max;
if (iter.next(min, max) && min <= charMax) {
docChar = Char(min);
return 1;
}
}
}
UnivChar univChar;
WideChar alsoMax, count2;
if (sdBuilder.syntaxCharset.descToUniv(syntaxChar, univChar, alsoMax)
&& univToDescCheck(sdBuilder.sd->internalCharset(), univChar, docChar,
count2)) {
count = (alsoMax - syntaxChar) + 1;
if (count2 < count)
count = count2;
return 1;
}
sdBuilder.valid = 0;
message(sd().internalCharsetIsDocCharset()
? ParserMessages::translateSyntaxCharDoc
: ParserMessages::translateSyntaxCharInternal,
NumberMessageArg(syntaxChar));
return 0;
}
Boolean Parser::translateSyntax(SdBuilder &sdBuilder,
const String<SyntaxChar> &syntaxString,
StringC &docString)
{
docString.resize(0);
int ret = 1;
for (size_t i = 0; i < syntaxString.size(); i++) {
Char c;
if (translateSyntax(sdBuilder, syntaxString[i], c))
docString += c;
else
ret = 0;
}
return ret;
}
Boolean Parser::translateName(SdBuilder &sdBuilder,
const StringC &name,
StringC &str)
{
str.resize(name.size());
for (size_t i = 0; i < name.size(); i++) {
UnivChar univChar;
Boolean ret = sd().internalCharset().descToUniv(name[i], univChar);
// Might switch hyphen or period.
univChar = translateUniv(univChar, sdBuilder.switcher,
sdBuilder.syntaxCharset);
ASSERT(ret != 0);
if (!univToDescCheck(sdBuilder.sd->internalCharset(), univChar, str[i])) {
message(ParserMessages::translateDocChar, NumberMessageArg(univChar));
sdBuilder.valid = 0;
return 0;
}
}
return 1;
}
UnivChar Parser::translateUniv(UnivChar univChar,
CharSwitcher &switcher,
const CharsetInfo &syntaxCharset)
{
WideChar syntaxChar;
ISet<WideChar> syntaxChars;
if (syntaxCharset.univToDesc(univChar, syntaxChar, syntaxChars) != 1) {
message(ParserMessages::missingSyntaxChar,
NumberMessageArg(univChar));
return univChar;
}
SyntaxChar tem = switcher.subst(syntaxChar);
if (tem != syntaxChar && !syntaxCharset.descToUniv(tem, univChar))
message(sd().internalCharsetIsDocCharset()
? ParserMessages::translateSyntaxCharDoc
: ParserMessages::translateSyntaxCharInternal,
NumberMessageArg(tem));
return univChar;
}
Boolean Parser::checkNotFunction(const Syntax &syn, Char c)
{
if (syn.charSet(Syntax::functionChar)->contains(c)) {
message(ParserMessages::oneFunction, NumberMessageArg(c));
return 0;
}
else
return 1;
}
// Check that it has at most one B sequence and that it
// is not adjacent to a blank sequence.
Boolean Parser::checkShortrefDelim(const Syntax &syn,
const CharsetInfo &charset,
const StringC &delim)
{
Boolean hadB = 0;
Char letterB = charset.execToDesc('B');
const ISet<Char> *bSet = syn.charSet(Syntax::blank);
for (size_t i = 0; i < delim.size(); i++)
if (delim[i] == letterB) {
if (hadB) {
message(ParserMessages::multipleBSequence, StringMessageArg(delim));
return 0;
}
hadB = 1;
if (i > 0 && bSet->contains(delim[i - 1])) {
message(ParserMessages::blankAdjacentBSequence,
StringMessageArg(delim));
return 0;
}
while (i + 1 < delim.size() && delim[i + 1] == letterB)
i++;
if (i < delim.size() - 1 && bSet->contains(delim[i + 1])) {
message(ParserMessages::blankAdjacentBSequence,
StringMessageArg(delim));
return 0;
}
}
return 1;
}
Boolean Parser::checkGeneralDelim(const Syntax &syn, const StringC &delim)
{
const ISet<Char> *functionSet = syn.charSet(Syntax::functionChar);
if (delim.size() > 0) {
Boolean allFunction = 1;
for (size_t i = 0; i < delim.size(); i++)
if (!functionSet->contains(delim[i]))
allFunction = 0;
if (allFunction) {
message(ParserMessages::generalDelimAllFunction,
StringMessageArg(delim));
return 0;
}
}
return 1;
}
Boolean Parser::checkSwitches(CharSwitcher &switcher,
const CharsetInfo &syntaxCharset)
{
Boolean valid = 1;
for (size_t i = 0; i < switcher.nSwitches(); i++) {
WideChar c[2];
c[0] = switcher.switchFrom(i);
c[1] = switcher.switchTo(i);
for (int j = 0; j < 2; j++) {
UnivChar univChar;
if (syntaxCharset.descToUniv(c[j], univChar)) {
// Check that it is not Digit Lcletter or Ucletter
if ((UnivCharsetDesc::a <= univChar
&& univChar < UnivCharsetDesc::a + 26)
|| (UnivCharsetDesc::A <= univChar
&& univChar < UnivCharsetDesc::A + 26)
|| (UnivCharsetDesc::zero <= univChar
&& univChar < UnivCharsetDesc::zero + 10)) {
message(ParserMessages::switchLetterDigit,
NumberMessageArg(univChar));
valid = 0;
}
}
}
}
return valid;
}
Boolean Parser::checkSwitchesMarkup(CharSwitcher &switcher)
{
Boolean valid = 1;
size_t nSwitches = switcher.nSwitches();
for (size_t i = 0; i < nSwitches; i++)
if (!switcher.switchUsed(i)) {
// If the switch wasn't used,
// then the character wasn't a markup character.
message(ParserMessages::switchNotMarkup,
NumberMessageArg(switcher.switchFrom(i)));
valid = 0;
}
return valid;
}
void Parser::checkSyntaxNamelen(const Syntax &syn)
{
size_t namelen = syn.namelen();
int i;
for (i = 0; i < Syntax::nDelimGeneral; i++)
if (syn.delimGeneral(i).size() > namelen)
message(ParserMessages::delimiterLength,
StringMessageArg(syn.delimGeneral(i)),
NumberMessageArg(namelen));
for (i = 0; i < syn.nDelimShortrefComplex(); i++)
if (syn.delimShortrefComplex(i).size() > namelen)
message(ParserMessages::delimiterLength,
StringMessageArg(syn.delimShortrefComplex(i)),
NumberMessageArg(namelen));
for (i = 0; i < Syntax::nNames; i++)
if (syn.reservedName(Syntax::ReservedName(i)).size() > namelen
&& options().warnSgmlDecl)
message(ParserMessages::reservedNameLength,
StringMessageArg(syn.reservedName(Syntax::ReservedName(i))),
NumberMessageArg(namelen));
}
Boolean Parser::univToDescCheck(const CharsetInfo &charset, UnivChar from,
Char &to)
{
WideChar c;
ISet<WideChar> descSet;
unsigned ret = charset.univToDesc(from, c, descSet);
if (ret > 1) {
if (options().warnSgmlDecl)
message(ParserMessages::ambiguousDocCharacter,
CharsetMessageArg(descSet));
ret = 1;
}
if (ret && c <= charMax) {
to = Char(c);
return 1;
}
return 0;
}
Boolean Parser::univToDescCheck(const CharsetInfo &charset, UnivChar from,
Char &to, WideChar &count)
{
WideChar c;
ISet<WideChar> descSet;
unsigned ret = charset.univToDesc(from, c, descSet, count);
if (ret > 1) {
if (options().warnSgmlDecl)
message(ParserMessages::ambiguousDocCharacter,
CharsetMessageArg(descSet));
ret = 1;
}
if (ret && c <= charMax) {
to = Char(c);
return 1;
}
return 0;
}
Boolean Parser::parseSdParam(const AllowedSdParams &allow,
SdParam &parm)
{
for (;;) {
Token token = getToken(sdMode);
switch (token) {
case tokenUnrecognized:
if (reportNonSgmlCharacter())
break;
{
message(ParserMessages::markupDeclarationCharacter,
StringMessageArg(currentToken()),
AllowedSdParamsMessageArg(allow, sdPointer()));
}
return 0;
case tokenEe:
if (allow.param(SdParam::eE)) {
parm.type = SdParam::eE;
if (currentMarkup())
currentMarkup()->addEntityEnd();
popInputStack();
return 1;
}
message(ParserMessages::sdEntityEnd,
AllowedSdParamsMessageArg(allow, sdPointer()));
return 0;
case tokenS:
if (currentMarkup())
currentMarkup()->addS(currentChar());
break;
case tokenCom:
if (!parseComment(sdcomMode))
return 0;
break;
case tokenDso:
case tokenGrpo:
case tokenMinusGrpo:
case tokenPlusGrpo:
case tokenRni:
case tokenPeroNameStart:
case tokenPeroGrpo:
sdParamInvalidToken(token, allow);
return 0;
case tokenMinus:
if (allow.param(SdParam::minus)) {
parm.type = SdParam::minus;
return 1;
}
sdParamInvalidToken(tokenMinus, allow);
return 0;
case tokenLita:
case tokenLit:
{
Boolean lita = (token == tokenLita);
if (allow.param(SdParam::minimumLiteral)) {
if (!parseMinimumLiteral(lita, parm.literalText))
return 0;
parm.type = SdParam::minimumLiteral;
if (currentMarkup())
currentMarkup()->addLiteral(parm.literalText);
}
else if (allow.param(SdParam::paramLiteral)) {
if (!parseSdParamLiteral(lita, parm.paramLiteralText))
return 0;
parm.type = SdParam::paramLiteral;
}
else if (allow.param(SdParam::systemIdentifier)) {
if (!parseSdSystemIdentifier(lita, parm.literalText))
return 0;
parm.type = SdParam::systemIdentifier;
}
else {
sdParamInvalidToken(token, allow);
return 0;
}
return 1;
}
case tokenMdc:
if (allow.param(SdParam::mdc)) {
parm.type = SdParam::mdc;
if (currentMarkup())
currentMarkup()->addDelim(Syntax::dMDC);
return 1;
}
sdParamInvalidToken(tokenMdc, allow);
return 0;
case tokenNameStart:
{
extendNameToken(syntax().namelen(), ParserMessages::nameLength);
getCurrentToken(syntax().generalSubstTable(), parm.token);
if (allow.param(SdParam::capacityName)) {
if (sd().lookupCapacityName(parm.token, parm.capacityIndex)) {
parm.type = SdParam::capacityName;
if (currentMarkup())
currentMarkup()->addName(currentInput());
return 1;
}
}
if (allow.param(SdParam::referenceReservedName)) {
if (syntax().lookupReservedName(parm.token,
&parm.reservedNameIndex)) {
parm.type = SdParam::referenceReservedName;
if (currentMarkup())
currentMarkup()->addName(currentInput());
return 1;
}
}
if (allow.param(SdParam::generalDelimiterName)) {
if (sd().lookupGeneralDelimiterName(parm.token,
parm.delimGeneralIndex)) {
parm.type = SdParam::generalDelimiterName;
if (currentMarkup())
currentMarkup()->addName(currentInput());
return 1;
}
}
if (allow.param(SdParam::quantityName)) {
if (sd().lookupQuantityName(parm.token, parm.quantityIndex)) {
parm.type = SdParam::quantityName;
if (currentMarkup())
currentMarkup()->addName(currentInput());
return 1;
}
}
for (int i = 0;; i++) {
SdParam::Type t = allow.get(i);
if (t == SdParam::invalid)
break;
if (t >= SdParam::reservedName) {
Sd::ReservedName sdReservedName
= Sd::ReservedName(t - SdParam::reservedName);
if (parm.token == sd().reservedName(sdReservedName)) {
parm.type = t;
if (currentMarkup())
currentMarkup()->addSdReservedName(sdReservedName,
currentInput());
return 1;
}
}
}
if (allow.param(SdParam::name)) {
parm.type = SdParam::name;
if (currentMarkup())
currentMarkup()->addName(currentInput());
return 1;
}
{
message(ParserMessages::sdInvalidNameToken,
StringMessageArg(parm.token),
AllowedSdParamsMessageArg(allow, sdPointer()));
}
return 0;
}
case tokenDigit:
if (allow.param(SdParam::number)) {
extendNumber(syntax().namelen(), ParserMessages::numberLength);
parm.type = SdParam::number;
unsigned long n;
if (!stringToNumber(currentInput()->currentTokenStart(),
currentInput()->currentTokenLength(),
n)
|| n > Number(-1)) {
message(ParserMessages::numberTooBig,
StringMessageArg(currentToken()));
parm.n = Number(-1);
}
else {
if (currentMarkup())
currentMarkup()->addNumber(currentInput());
parm.n = Number(n);
}
Token token = getToken(sdMode);
if (token == tokenNameStart)
message(ParserMessages::psRequired);
currentInput()->ungetToken();
return 1;
}
sdParamInvalidToken(tokenDigit, allow);
return 0;
default:
CANNOT_HAPPEN();
}
}
}
// This is a separate function, because we might want SyntaxChar
// to be bigger than Char.
Boolean Parser::parseSdParamLiteral(Boolean lita, String<SyntaxChar> &str)
{
Location loc(currentLocation());
loc += 1;
SdText text(loc, lita); // first character of content
str.resize(0);
const unsigned refLitlen = Syntax::referenceQuantity(Syntax::qLITLEN);
Mode mode = lita ? sdplitaMode : sdplitMode;
for (;;) {
Token token = getToken(mode);
switch (token) {
case tokenEe:
message(ParserMessages::literalLevel);
return 0;
case tokenUnrecognized:
if (reportNonSgmlCharacter())
break;
if (options().errorSignificant)
message(ParserMessages::sdLiteralSignificant,
StringMessageArg(currentToken()));
text.addChar(currentChar(), currentLocation());
break;
case tokenCroDigit:
{
InputSource *in = currentInput();
Location startLocation = currentLocation();
in->discardInitial();
extendNumber(syntax().namelen(), ParserMessages::numberLength);
unsigned long n;
Boolean valid;
if (!stringToNumber(in->currentTokenStart(),
in->currentTokenLength(),
n)
|| n > syntaxCharMax) {
message(ParserMessages::syntaxCharacterNumber,
StringMessageArg(currentToken()));
valid = 0;
}
else
valid = 1;
Owner<Markup> markupPtr;
if (eventsWanted().wantPrologMarkup()) {
markupPtr = new Markup;
markupPtr->addDelim(Syntax::dCRO);
markupPtr->addNumber(in);
switch (getToken(refMode)) {
case tokenRefc:
markupPtr->addDelim(Syntax::dREFC);
break;
case tokenRe:
markupPtr->addRefEndRe();
if (options().warnRefc)
message(ParserMessages::refc);
break;
default:
if (options().warnRefc)
message(ParserMessages::refc);
break;
}
}
else if (options().warnRefc) {
if (getToken(refMode) != tokenRefc)
message(ParserMessages::refc);
}
else
(void)getToken(refMode);
if (valid)
text.addChar(SyntaxChar(n),
Location(new NumericCharRefOrigin(startLocation,
currentLocation().index()
+ currentInput()->currentTokenLength()
- startLocation.index(),
markupPtr),
0));
}
break;
case tokenCroNameStart:
if (!parseNamedCharRef())
return 0;
break;
case tokenLit:
case tokenLita:
goto done;
case tokenPeroNameStart:
case tokenPeroGrpo:
message(ParserMessages::sdParameterEntity);
{
Location loc(currentLocation());
const Char *p = currentInput()->currentTokenStart();
for (size_t count = currentInput()->currentTokenLength();
count > 0;
count--) {
text.addChar(*p++, loc);
loc += 1;
}
}
break;
case tokenChar:
if (text.string().size() > refLitlen
&& currentChar() == syntax().standardFunction(Syntax::fRE)) {
message(ParserMessages::parameterLiteralLength, NumberMessageArg(refLitlen));
// guess that the closing delimiter has been omitted
message(ParserMessages::literalClosingDelimiter);
return 0;
}
text.addChar(currentChar(), currentLocation());
break;
}
}
done:
if (text.string().size() > refLitlen)
message(ParserMessages::parameterLiteralLength,
NumberMessageArg(refLitlen));
str = text.string();
if (currentMarkup())
currentMarkup()->addSdLiteral(text);
return 1;
}
Boolean Parser::parseSdSystemIdentifier(Boolean lita, Text &text)
{
text.addStartDelim(currentLocation());
const unsigned refLitlen = Syntax::referenceQuantity(Syntax::qLITLEN);
Mode mode = lita ? sdslitaMode : sdslitMode;
for (;;) {
Token token = getToken(mode);
switch (token) {
case tokenEe:
message(ParserMessages::literalLevel);
return 0;
case tokenUnrecognized:
if (reportNonSgmlCharacter())
break;
if (options().errorSignificant)
message(ParserMessages::sdLiteralSignificant,
StringMessageArg(currentToken()));
text.addChar(currentChar(), currentLocation());
break;
case tokenLit:
case tokenLita:
text.addEndDelim(currentLocation(), token == tokenLita);
goto done;
case tokenChar:
text.addChar(currentChar(), currentLocation());
break;
default:
CANNOT_HAPPEN();
}
}
done:
if (text.string().size() > refLitlen)
message(ParserMessages::systemIdentifierLength,
NumberMessageArg(refLitlen));
if (currentMarkup())
currentMarkup()->addLiteral(text);
return 1;
}
Boolean Parser::stringToNumber(const Char *s, size_t length,
unsigned long &result)
{
unsigned long n = 0;
for (; length > 0; length--, s++) {
int val = sd().digitWeight(*s);
if (n <= ULONG_MAX/10 && (n *= 10) <= ULONG_MAX - val)
n += val;
else
return 0;
}
result = n;
return 1;
}
void Parser::sdParamInvalidToken(Token token,
const AllowedSdParams &allow)
{
message(ParserMessages::sdParamInvalidToken,
TokenMessageArg(token, sdMode, syntaxPointer(), sdPointer()),
AllowedSdParamsMessageArg(allow, sdPointer()));
}
void Parser::sdParamConvertToLiteral(SdParam &parm)
{
if (parm.type == SdParam::number) {
parm.type = SdParam::paramLiteral;
parm.paramLiteralText.resize(1);
parm.paramLiteralText[0] = parm.n;
}
}
void Parser::requireWWW(SdBuilder &sdBuilder)
{
if (!sdBuilder.www) {
message(ParserMessages::wwwRequired);
sdBuilder.www = 1;
}
}
AllowedSdParams::AllowedSdParams(SdParam::Type arg1, SdParam::Type arg2,
SdParam::Type arg3, SdParam::Type arg4,
SdParam::Type arg5, SdParam::Type arg6)
{
allow_[0] = arg1;
allow_[1] = arg2;
allow_[2] = arg3;
allow_[3] = arg4;
allow_[4] = arg5;
allow_[5] = arg6;
}
Boolean AllowedSdParams::param(SdParam::Type t) const
{
for (int i = 0; i < maxAllow && allow_[i] != SdParam::invalid; i++)
if (t == allow_[i])
return 1;
return 0;
}
SdParam::Type AllowedSdParams::get(int i) const
{
return i < 0 || i >= maxAllow ? SdParam::Type(SdParam::invalid) : allow_[i];
}
AllowedSdParamsMessageArg::AllowedSdParamsMessageArg(
const AllowedSdParams &allow,
const ConstPtr<Sd> &sd)
: allow_(allow), sd_(sd)
{
}
MessageArg *AllowedSdParamsMessageArg::copy() const
{
return new AllowedSdParamsMessageArg(*this);
}
void AllowedSdParamsMessageArg::append(MessageBuilder &builder) const
{
for (int i = 0;; i++) {
SdParam::Type type = allow_.get(i);
if (type == SdParam::invalid)
break;
if (i != 0)
builder.appendFragment(ParserMessages::listSep);
switch (type) {
case SdParam::eE:
builder.appendFragment(ParserMessages::entityEnd);
break;
case SdParam::minimumLiteral:
builder.appendFragment(ParserMessages::minimumLiteral);
break;
case SdParam::mdc:
{
builder.appendFragment(ParserMessages::delimStart);
Char c = sd_->execToInternal('>');
builder.appendChars(&c, 1);
builder.appendFragment(ParserMessages::delimEnd);
}
break;
case SdParam::number:
builder.appendFragment(ParserMessages::number);
break;
case SdParam::name:
builder.appendFragment(ParserMessages::name);
break;
case SdParam::paramLiteral:
builder.appendFragment(ParserMessages::parameterLiteral);
break;
case SdParam::systemIdentifier:
builder.appendFragment(ParserMessages::systemIdentifier);
break;
case SdParam::capacityName:
builder.appendFragment(ParserMessages::capacityName);
break;
case SdParam::generalDelimiterName:
builder.appendFragment(ParserMessages::generalDelimiteRoleName);
break;
case SdParam::referenceReservedName:
builder.appendFragment(ParserMessages::referenceReservedName);
break;
case SdParam::quantityName:
builder.appendFragment(ParserMessages::quantityName);
break;
case SdParam::minus:
{
StringC str(sd_->execToInternal("..."));
builder.appendChars(str.data(), str.size());
break;
}
default:
{
StringC str(sd_->reservedName(type - SdParam::reservedName));
builder.appendChars(str.data(), str.size());
break;
}
}
}
}
SdBuilder::SdBuilder()
: valid(1), externalSyntax(0), enr(0), www(0), external(0)
{
}
void SdBuilder::addFormalError(const Location &location,
const MessageType1 &message,
const StringC &id)
{
formalErrorList.insert(new SdFormalError(location, message, id));
}
SdFormalError::SdFormalError(const Location &location,
const MessageType1 &message,
const StringC &id)
: location_(location),
message_(&message),
id_(id)
{
}
void SdFormalError::send(ParserState &parser)
{
parser.Messenger::setNextLocation(location_);
parser.message(*message_, StringMessageArg(id_));
}
CharSwitcher::CharSwitcher()
{
}
void CharSwitcher::addSwitch(WideChar from, WideChar to)
{
switches_.push_back(from);
switches_.push_back(to);
switchUsed_.push_back(0);
}
SyntaxChar CharSwitcher::subst(WideChar c)
{
for (size_t i = 0; i < switches_.size(); i += 2)
if (switches_[i] == c) {
switchUsed_[i/2] = 1;
return switches_[i + 1];
}
return c;
}
size_t CharSwitcher::nSwitches() const
{
return switchUsed_.size();
}
Boolean CharSwitcher::switchUsed(size_t i) const
{
return switchUsed_[i];
}
WideChar CharSwitcher::switchFrom(size_t i) const
{
return switches_[i*2];
}
WideChar CharSwitcher::switchTo(size_t i) const
{
return switches_[i*2 + 1];
}
CharsetMessageArg::CharsetMessageArg(const ISet<WideChar> &set)
: set_(set)
{
}
MessageArg *CharsetMessageArg::copy() const
{
return new CharsetMessageArg(*this);
}
void CharsetMessageArg::append(MessageBuilder &builder) const
{
ISetIter<WideChar> iter(set_);
WideChar min, max;
Boolean first = 1;
while (iter.next(min, max)) {
if (first)
first = 0;
else
builder.appendFragment(ParserMessages::listSep);
builder.appendNumber(min);
if (max != min) {
builder.appendFragment(max == min + 1
? ParserMessages::listSep
: ParserMessages::rangeSep);
builder.appendNumber(max);
}
}
}
#ifdef SP_NAMESPACE
}
#endif