parseSd.cxx revision 7c478bd95313f5f23a4c958a745db2134aa03244
// Copyright (c) 1994, 1995, 1997 James Clark
// See the file COPYING for copying permission.
#pragma ident "%Z%%M% %I% %E% SMI"
#include "splib.h"
#include "Parser.h"
#include "macros.h"
#include "SdFormalError.h"
#include "MessageBuilder.h"
#include "ParserMessages.h"
#include "MessageArg.h"
#include "CharsetRegistry.h"
#include "ISetIter.h"
#include "token.h"
#include "TokenMessageArg.h"
#include "constant.h"
#include "SdText.h"
#include "NumericCharRefOrigin.h"
#ifdef SP_NAMESPACE
namespace SP_NAMESPACE {
#endif
class CharSwitcher {
public:
CharSwitcher();
private:
};
// Information about the SGML declaration being built.
struct SdBuilder {
SdBuilder();
};
class CharsetMessageArg : public MessageArg {
public:
MessageArg *copy() const;
void append(MessageBuilder &) const;
private:
};
struct SdParam {
typedef unsigned char Type;
enum {
eE,
mdc,
name,
reservedName // Sd::ReservedName is added to this
};
union {
Number n;
};
};
class AllowedSdParams {
public:
private:
enum { maxAllow = 6 };
};
class AllowedSdParamsMessageArg : public MessageArg {
public:
MessageArg *copy() const;
void append(MessageBuilder &) const;
private:
};
struct StandardSyntaxSpec {
struct AddedFunction {
const char *name;
};
const AddedFunction *addedFunction;
};
};
static StandardSyntaxSpec coreSyntax = {
};
static StandardSyntaxSpec refSyntax = {
};
{
if (cancelled()) {
allDone();
return;
}
// When document entity doesn't exist, don't give any errors
// other than the cannot open error.
if (currentInput()->accessError()) {
allDone();
return;
}
}
else
currentInput()->ungetToken();
giveUp();
return;
}
if (scanForSgmlDecl(initCharset)) {
if (options().warnExplicitSgmlDecl)
found = 1;
}
else {
currentInput()->ungetToken();
sd().docCharset(),
0,
messenger());
if (in) {
if (scanForSgmlDecl(initCharset))
found = 1;
else {
}
}
}
}
if (found) {
4);
}
giveUp();
return;
}
if (!parseSgmlDecl()) {
giveUp();
return;
}
// queue an SGML declaration event
currentInput()->nextIndex(),
currentMarkup()));
if (inputLevel() == 2) {
// FIXME perhaps check for junk after SGML declaration
}
}
else {
if (!implySgmlDecl()) {
giveUp();
return;
}
// queue an SGML declaration event
syntaxPointer()));
}
// Now we have sd and syntax set up, prepare to parse the prolog.
}
{
const StandardSyntaxSpec *spec;
else
spec = &coreSyntax;
return 0;
return 1;
}
const StandardSyntaxSpec &spec,
const CharsetInfo &internalCharset,
{
{ 0, 128, 0 },
};
valid = 0;
size_t i;
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
127, 255
};
};
for (i = 0; i < 3; i++) {
if (translateSyntax(switcher,
functionChars[i],
else
valid = 0;
}
for (i = 0; i < spec.nAddedFunction; i++) {
if (translateSyntax(switcher,
docChar);
else
valid = 0;
}
for (i = 0; i < 2; i++) {
if (translateSyntax(switcher,
nameChars[i],
docChar))
else
valid = 0;
}
valid = 0;
else
syn.setNamecaseEntity(0);
valid = 0;
valid = 0;
return valid;
}
const CharsetInfo &syntaxCharset,
const CharsetInfo &internalCharset,
{
// Column 3 from Figure 3
static const char delims[][2] = {
{ 38 },
{ 45, 45 },
{ 38, 35 },
{ 93 },
{ 91 },
{ 93 },
{ 91 },
{ 38 },
{ 60, 47 },
{ 41 },
{ 40 },
{ 0 }, // HCRO
{ 34 },
{ 39 },
{ 62 },
{ 60, 33 },
{ 45 },
{ 93, 93 },
{ 47 },
{ 47 }, // NESTC
{ 63 },
{ 124 },
{ 37 },
{ 62 },
{ 60, 63 },
{ 43 },
{ 59 },
{ 42 },
{ 35 },
{ 44 },
{ 60 },
{ 62 },
{ 61 },
};
for (int i = 0; i < Syntax::nDelimGeneral; i++)
size_t j;
Char c;
delim += c;
else {
valid = 0;
}
}
else
valid = 0;
}
}
return valid;
}
{
static const char *const referenceNames[] = {
"ALL",
"ANY",
"ATTLIST",
"CDATA",
"CONREF",
"CURRENT",
"DATA",
"DEFAULT",
"DOCTYPE",
"ELEMENT",
"EMPTY",
"ENDTAG",
"ENTITIES",
"ENTITY",
"FIXED",
"ID",
"IDLINK",
"IDREF",
"IDREFS",
"IGNORE",
"IMPLICIT",
"IMPLIED",
"INCLUDE",
"INITIAL",
"LINK",
"LINKTYPE",
"MD",
"MS",
"NAME",
"NAMES",
"NDATA",
"NMTOKEN",
"NMTOKENS",
"NOTATION",
"NUMBER",
"NUMBERS",
"NUTOKEN",
"NUTOKENS",
"O",
"PCDATA",
"PI",
"POSTLINK",
"PUBLIC",
"RCDATA",
"RE",
"REQUIRED",
"RESTORE",
"RS",
"SDATA",
"SHORTREF",
"SIMPLE",
"SPACE",
"STARTTAG",
"SUBDOC",
"SYSTEM",
"TEMP",
"USELINK",
"USEMAP"
};
switch (i) {
if (!www)
break;
// fall through
break;
// fall through
default:
{
break;
}
}
}
}
const CharsetInfo &syntaxCharset,
const CharsetInfo &internalCharset,
{
// Column 2 from Figure 4
static const char delimShortref[][3] = {
{ 9 },
{ 13 },
{ 10 },
{ 10, 66 },
{ 10, 13 },
{ 10, 66, 13 },
{ 66, 13 },
{ 32 },
{ 66, 66 },
{ 34 },
{ 35 },
{ 37 },
{ 39 },
{ 40 },
{ 41 },
{ 42 },
{ 43 },
{ 44 },
{ 45 },
{ 45, 45 },
{ 58 },
{ 59 },
{ 61 },
{ 64 },
{ 91 },
{ 93 },
{ 94 },
{ 95 },
{ 123 },
{ 124 },
{ 125 },
{ 126 },
};
size_t j;
Char c;
delim += c;
else
}
else
}
}
return 1;
}
// Determine whether the document starts with an SGML declaration.
// There is no current syntax at this point.
{
return 0;
return 0;
return 0;
return 0;
return 0;
return 0;
return 0;
return 0;
return 0;
return 0;
// Don't recognize this if SGML is followed by a name character.
if (c == InputSource::eE)
return 1;
return 0;
return 0;
return 1;
return 0;
return 0;
return 0;
return 1;
}
{
size_t i;
for (i = 0; i < 26; i++) {
missing += UnivCharsetDesc::A + i;
missing += UnivCharsetDesc::a + i;
}
for (i = 0; i < 10; i++) {
}
39, 40, 41, 43, 44, 45, 46, 47, 58, 61, 63
};
}
{
return 0;
return 0;
entity->generateSystemId(*this);
return 0;
}
if (currentMarkup())
sd().docCharset(),
0,
messenger()));
return 0;
}
}
};
return 0;
return 0;
}
delete p;
}
}
1, // get results in document character set
1, // get results in document character set
if (!invalidSgmlChar.isEmpty())
}
else
return 1;
}
ExternalId &id)
{
parm))
return 0;
return 1;
return 0;
const MessageType1 *err;
}
return 0;
return 1;
}
{
parm))
return 0;
parm))
return 0;
return 0;
return 0;
}
if (sd().internalCharsetIsDocCharset())
else {
}
return 1;
}
const CharsetInfo &toCharset,
{
do {
break;
}
else {
// FIXME better not to use univToDescCheck here
// Maybe OK if multiple internal chars corresponding to doc char
if (nMap)
}
}
}
{
// This is for checking whether the syntax reference character set
// is ISO 646 when SCOPE is INSTANCE.
do {
return 0;
const MessageType1 *err;
*err,
else if (!givenError) {
found = 0;
if (iter) {
found = 1;
}
}
}
}
if (!found)
}
else
found = 0;
if (!found)
maybeISO646 = 0;
parm))
return 0;
return 0;
do {
return 0;
if (isDocument
&& count > 0
maybeISO646 = 0;
}
else
parm))
return 0;
}
break;
break;
case SdParam::minimumLiteral:
{
if (adjCount > 256) {
adjCount = 256;
}
}
maybeISO646 = 0;
break;
default:
}
follow),
parm))
return 0;
if (!multiplyDeclared.isEmpty())
}
}
// If scope is INSTANCE, syntax reference character set
// must be same as reference.
Char nextDescMin = 0;
while (maybeISO646) {
if (nextDescMin != 128)
maybeISO646 = 0;
break;
}
maybeISO646 = 0;
}
if (!maybeISO646)
}
return 1;
}
{
for (;;) {
parm))
break;
return 1;
break;
parm))
break;
if (count > 0)
}
if (count > 256) {
count = 256;
}
}
}
return 0;
}
{
return univ;
else
}
{
parm))
return 0;
#if _MSC_VER == 1100
// Workaround for Visual C++ 5.0 bug
int
#else
#endif
pushed = 0;
parm);
return 0;
const MessageType1 *err;
*err,
pushed = 1;
else if (!givenError)
}
if (!pushed)
parm);
}
int i;
capacitySpecified[i] = 0;
return 0;
do {
return 0;
if (!capacitySpecified[capacityIndex]) {
}
else if (options().warnSgmlDecl)
parm))
return 0;
if (pushed)
parm);
return 1;
}
{
givenError = 0;
sd().internalCharset(),
messenger(),
sysid)) {
loc));
loc));
if (currentMarkup())
sd().docCharset(),
0,
messenger());
if (!in) {
givenError = 1;
return 0;
}
return 1;
}
return 0;
}
{
parm))
return 0;
return 1;
}
{
parm))
return 0;
parm))
return 0;
return 0;
const MessageType1 *err;
*err,
parm))
return 0;
return 0;
for (;;) {
SyntaxChar c = parm.n;
return 0;
parm))
return 0;
break;
}
}
if (spec) {
*spec,
}
else {
parm2))
return 0;
return 0;
}
else {
if (!givenError)
}
}
}
else {
return 0;
}
// we know the significant chars now
0,
1,
if (!invalidSgmlChar.isEmpty())
}
return 1;
}
{
};
return 0;
return 1;
}
{
return 0;
return 0;
return 0;
return 0;
return 0;
return &refSyntax;
return &coreSyntax;
return 0;
}
{
return 0;
return 1;
}
{
return 0;
parm))
return 0;
return 1;
}
else {
}
for (;;) {
return 0;
break;
}
return 1;
}
{
};
for (int i = 0; i < 3; i++) {
+ standardNames[i]),
parm))
return 0;
return 0;
Char c;
else
}
}
Boolean haveMsichar = 0;
Boolean haveMsochar = 0;
for (;;) {
parm))
return 0;
if (currentMarkup())
Boolean invalidName = 0;
nameWasLiteral = 1;
invalidName = 1;
}
else {
nameWasLiteral = 0;
}
parm))
return 0;
else if (currentMarkup())
break;
}
if (!nameWasLiteral) {
invalidName = 1;
}
break;
haveMsichar = 1;
break;
haveMsochar = 1;
break;
break;
break;
default:
}
return 0;
Char c;
&& !invalidName) {
else
}
}
if (haveMsochar && !haveMsichar)
return 1;
}
{
};
int isNamechar = 0;
do {
enum PrevParam {
for (;;) {
switch (prevParam) {
case paramNone:
parm))
return 0;
break;
case paramNumber:
parm))
return 0;
break;
case paramOther:
parm))
return 0;
break;
}
case SdParam::paramLiteral:
break;
// fall through
}
break;
default:
break;
}
parm))
return 0;
else {
}
}
else {
break;
}
}
size_t rangeIndexPos = 0;
unsigned long rangeLeft = 0;
for (;;) {
switch (prevParam) {
case paramNone:
parm))
return 0;
break;
case paramNumber:
parm))
return 0;
break;
case paramOther:
parm))
return 0;
break;
}
case SdParam::paramLiteral:
break;
// fall through
}
break;
default:
break;
}
parm))
return 0;
else {
while (count > 0) {
if (rangeLeft == 0
lcPos += 2;
rangeIndexPos += 1;
}
Char c;
if (rangeLeft > 0) {
rangeLeft--;
c = nextRangeChar++;
}
else {
c = start;
runOut = 1;
}
size_t n;
if (runOut)
n = count;
// rangeLeft + 1 <= count
n = rangeLeft + 1;
rangeLeft = 0;
}
else {
// count < rangeLeft + 1
n = count;
rangeLeft -= n - 1;
nextRangeChar += n - 1;
}
count -= n;
start += n;
}
else {
}
}
count--;
start++;
}
}
}
}
else {
if (rangeLeft == 0
lcPos += 2;
rangeIndexPos += 1;
}
Char c;
if (rangeLeft > 0) {
rangeLeft--;
c = nextRangeChar++;
}
else {
runOut = 1;
c = chars[i];
}
// map from c to chars[i]
}
}
}
break;
}
}
}
for (;;) {
switch (prevParam) {
case paramNone:
parm))
return 0;
break;
case paramNumber:
parm))
return 0;
break;
case paramOther:
parm))
return 0;
break;
}
parm))
return 0;
if (parm.n < prevNumber)
else if (parm.n > prevNumber)
}
else {
break;
}
}
}
}
} while (!isNamechar++);
}
parm))
return 0;
parm))
return 0;
parm))
return 0;
parm))
return 0;
return 1;
}
{
valid = 0;
}
valid = 0;
}
valid = 0;
}
valid = 0;
}
valid = 0;
}
valid = 0;
}
return valid;
}
// Result is a ISet<WideChar>, so it can be used with CharsetMessageArg.
{
return;
return;
for (;;) {
break;
}
break;
}
else {
// min2 <= max1
// min1 <= max2
break;
}
else {
break;
}
}
}
}
{
parm))
return 0;
parm))
return 0;
parm))
return 0;
for (int i = 0; i < Syntax::nDelimGeneral; i++)
delimGeneralSpecified[i] = 0;
for (;;) {
parm))
return 0;
break;
switch (delimGeneral) {
break;
default:
break;
}
parm))
return 0;
else
}
}
parm))
return 0;
}
for (;;) {
parm))
return 0;
parm))
return 0;
lastLiteral[0] + 1,
parm.paramLiteralText[0],
for (int i = 0; i < nComplexShortrefs; i++) {
}
if (!duplicates.isEmpty())
}
lastLiteral.resize(0);
}
if (lastLiteral.size() == 0)
str)) {
else
}
}
}
else
break;
}
return 1;
}
{
parm))
return 0;
for (;;) {
parm))
return 0;
break;
switch (reservedName) {
break;
default:
break;
}
parm))
return 0;
else {
}
size_t i;
// Check that its a valid name in the declared syntax
// (- and . might not be name characters).
break;
}
else
}
}
}
};
for (int i = 0; i < 3; i++) {
const StringC &functionName
}
return 1;
}
{
parm))
return 0;
}
return 0;
}
else {
for (;;) {
parm))
return 0;
break;
return 0;
}
}
}
else
return 1;
}
{
for (;;) {
return 0;
break;
}
else {
// Check that its a valid name in the declared syntax
break;
}
}
return 0;
Char c;
}
return 1;
}
{
struct FeatureInfo {
enum {
none,
} arg;
};
static FeatureInfo features[] = {
};
int booleanFeature = 0;
int numberFeature = 0;
// SHORTTAG
parm))
return 0;
break;
// fall through
return 0;
else {
booleanFeature += 6;
i += 7;
}
break;
return 0;
return 1;
break;
default:
return 0;
break;
}
case FeatureInfo::number:
parm))
return 0;
return 0;
parm.n);
}
else
0);
break;
case FeatureInfo::netenabl:
parm))
return 0;
break;
break;
break;
}
break;
case FeatureInfo::boolean:
parm))
return 0;
#if 0
break;
#endif
}
break;
}
break;
}
}
parm))
return 0;
break;
break;
}
return 0;
parm))
return 0;
}
else {
parm))
return 0;
break;
break;
break;
}
return 0;
parm))
return 0;
}
parm);
}
{
parm))
return 0;
else
return 1;
}
{
return 0;
return 1;
return 0;
do {
return 0;
return 1;
}
const CharsetInfo &syntaxCharset,
const CharsetInfo &internalCharset,
{
return 1;
return 0;
}
{
#if 0
do {
break;
#endif
for (;;) {
if (!gotSwitch) {
gotSwitch = 1;
firstSwitch = c;
}
else if (c < firstSwitch)
firstSwitch = c;
}
}
}
else {
if (gotSwitch)
}
}
break;
}
}
{
return translateSyntaxNoSwitch(sdBuilder,
count);
}
{
Number n;
id,
type,
n,
str,
count)) {
switch (type) {
case CharsetDeclRange::unused:
break;
case CharsetDeclRange::string:
break;
case CharsetDeclRange::number:
{
}
break;
default:
}
return 1;
}
}
}
count2)) {
return 1;
}
return 0;
}
{
int ret = 1;
Char c;
docString += c;
else
ret = 0;
}
return ret;
}
{
// Might switch hyphen or period.
return 0;
}
}
return 1;
}
const CharsetInfo &syntaxCharset)
{
return univChar;
}
return univChar;
}
{
return 0;
}
else
return 1;
}
// Check that it has at most one B sequence and that it
// is not adjacent to a blank sequence.
const CharsetInfo &charset,
{
if (hadB) {
return 0;
}
hadB = 1;
return 0;
}
i++;
return 0;
}
}
return 1;
}
{
allFunction = 0;
if (allFunction) {
return 0;
}
}
return 1;
}
const CharsetInfo &syntaxCharset)
{
WideChar c[2];
c[0] = switcher.switchFrom(i);
for (int j = 0; j < 2; j++) {
// Check that it is not Digit Lcletter or Ucletter
if ((UnivCharsetDesc::a <= univChar
|| (UnivCharsetDesc::A <= univChar
valid = 0;
}
}
}
}
return valid;
}
{
if (!switcher.switchUsed(i)) {
// If the switch wasn't used,
// then the character wasn't a markup character.
valid = 0;
}
return valid;
}
{
int i;
for (i = 0; i < Syntax::nDelimGeneral; i++)
for (i = 0; i < syn.nDelimShortrefComplex(); i++)
&& options().warnSgmlDecl)
}
{
WideChar c;
if (ret > 1) {
if (options().warnSgmlDecl)
ret = 1;
}
return 1;
}
return 0;
}
{
WideChar c;
if (ret > 1) {
if (options().warnSgmlDecl)
ret = 1;
}
return 1;
}
return 0;
}
{
for (;;) {
switch (token) {
case tokenUnrecognized:
if (reportNonSgmlCharacter())
break;
{
}
return 0;
case tokenEe:
if (currentMarkup())
currentMarkup()->addEntityEnd();
return 1;
}
return 0;
case tokenS:
if (currentMarkup())
break;
case tokenCom:
if (!parseComment(sdcomMode))
return 0;
break;
case tokenDso:
case tokenGrpo:
case tokenMinusGrpo:
case tokenPlusGrpo:
case tokenRni:
case tokenPeroNameStart:
case tokenPeroGrpo:
return 0;
case tokenMinus:
return 1;
}
return 0;
case tokenLita:
case tokenLit:
{
return 0;
if (currentMarkup())
}
return 0;
}
return 0;
}
else {
return 0;
}
return 1;
}
case tokenMdc:
if (currentMarkup())
return 1;
}
return 0;
case tokenNameStart:
{
if (currentMarkup())
return 1;
}
}
&parm.reservedNameIndex)) {
if (currentMarkup())
return 1;
}
}
if (currentMarkup())
return 1;
}
}
if (currentMarkup())
return 1;
}
}
for (int i = 0;; i++) {
break;
if (t >= SdParam::reservedName) {
if (currentMarkup())
currentInput());
return 1;
}
}
}
if (currentMarkup())
return 1;
}
{
}
return 0;
}
case tokenDigit:
unsigned long n;
n)
|| n > Number(-1)) {
}
else {
if (currentMarkup())
}
if (token == tokenNameStart)
currentInput()->ungetToken();
return 1;
}
return 0;
default:
}
}
}
// This is a separate function, because we might want SyntaxChar
// to be bigger than Char.
{
loc += 1;
for (;;) {
switch (token) {
case tokenEe:
return 0;
case tokenUnrecognized:
if (reportNonSgmlCharacter())
break;
if (options().errorSignificant)
break;
case tokenCroDigit:
{
in->discardInitial();
unsigned long n;
in->currentTokenLength(),
n)
|| n > syntaxCharMax) {
valid = 0;
}
else
valid = 1;
if (eventsWanted().wantPrologMarkup()) {
case tokenRefc:
break;
case tokenRe:
markupPtr->addRefEndRe();
break;
default:
break;
}
}
}
else
if (valid)
currentLocation().index()
+ currentInput()->currentTokenLength()
- startLocation.index(),
0));
}
break;
case tokenCroNameStart:
if (!parseNamedCharRef())
return 0;
break;
case tokenLit:
case tokenLita:
goto done;
case tokenPeroNameStart:
case tokenPeroGrpo:
{
count > 0;
count--) {
loc += 1;
}
}
break;
case tokenChar:
// guess that the closing delimiter has been omitted
return 0;
}
break;
}
}
done:
if (currentMarkup())
return 1;
}
{
for (;;) {
switch (token) {
case tokenEe:
return 0;
case tokenUnrecognized:
if (reportNonSgmlCharacter())
break;
if (options().errorSignificant)
break;
case tokenLit:
case tokenLita:
goto done;
case tokenChar:
break;
default:
}
}
done:
if (currentMarkup())
return 1;
}
unsigned long &result)
{
unsigned long n = 0;
n += val;
else
return 0;
}
result = n;
return 1;
}
const AllowedSdParams &allow)
{
}
{
}
}
{
}
}
{
}
{
if (t == allow_[i])
return 1;
return 0;
}
{
}
const AllowedSdParams &allow,
{
}
{
return new AllowedSdParamsMessageArg(*this);
}
{
for (int i = 0;; i++) {
break;
if (i != 0)
switch (type) {
break;
case SdParam::minimumLiteral:
break;
{
}
break;
break;
break;
case SdParam::paramLiteral:
break;
case SdParam::systemIdentifier:
break;
case SdParam::capacityName:
break;
case SdParam::generalDelimiterName:
break;
case SdParam::referenceReservedName:
break;
case SdParam::quantityName:
break;
{
break;
}
default:
{
break;
}
}
}
}
{
}
const MessageType1 &message,
{
}
const MessageType1 &message,
{
}
{
}
{
}
{
switchUsed_.push_back(0);
}
{
if (switches_[i] == c) {
return switches_[i + 1];
}
return c;
}
{
return switchUsed_.size();
}
{
return switchUsed_[i];
}
{
return switches_[i*2];
}
{
}
{
}
{
return new CharsetMessageArg(*this);
}
{
if (first)
first = 0;
else
: ParserMessages::rangeSep);
}
}
}
#ifdef SP_NAMESPACE
}
#endif