xmlreader.cpp revision 6cd2e86330e1049942b9ce57d4f10bbe2542067d
/*
* Phoebe DOM Implementation.
*
* This is a C++ approximation of the W3C DOM model, which follows
* fairly closely the specifications in the various .idl files, copies of
* which are provided for reference. Most important is this one:
*
*
* Authors:
* Bob Jamison
*
* Copyright (C) 2005-2008 Bob Jamison
*
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xmlreader.h"
#include "ucd.h"
#include "domimpl.h"
#include <stdio.h>
#include <stdarg.h>
namespace org
{
namespace w3c
{
namespace dom
{
//#########################################################################
//# E N T I T Y T A B L E
//#########################################################################
struct EntityInfo
{
const char *escape;
int escapeLength;
const char *value;
};
static EntityInfo entityTable[] =
{
{ "&" , 5 , "&" },
{ "<" , 4 , "<" },
{ ">" , 4 , ">" },
{ "'" , 6 , "'" },
{ """ , 6 , "\"" },
{ NULL , 0 , "\0" }
};
//#########################################################################
//# M E S S A G E S
//#########################################################################
/**
*
*/
{
}
//#########################################################################
//# U T I L I T Y
//#########################################################################
{
if (len<1)
return;
int start = 0;
int end = 0;
{
break;
}
{
break;
}
{
}
}
//#########################################################################
//# P A R S I N G
//#########################################################################
/**
* Get the character at the position and record the fact
*/
{
if (p >= len)
return -1;
//printf("%c", ch);
{
colNr = 0;
lineNr++;
}
else
colNr++;
return ch;
}
/**
* Look at the character at the position, but don't note the fact
*/
{
if (p >= len)
return -1;
return ch;
}
/**
* Test if the given substring exists at the given position
* in parsebuf. Use peek() in case of out-of-bounds
*/
{
while (*str)
{
return false;
}
return true;
}
/**
* Test if the given substring exists at the given position
* in a given buffer
*/
/*
static bool bufMatch(const DOMString &buf, int pos, char *str)
{
while (*str)
{
if (buf[pos++] != *str++)
return false;
}
return true;
}
*/
/**
*
*/
{
while (p < len)
{
int b = get(p);
if (!uni_is_space(b))
break;
p++;
}
return p;
}
/**
* modify this to allow all chars for an element or attribute name
*/
{
while (p<len)
{
int b = get(p);
if (b<=' ' || b=='/' || b=='>' || b=='=')
break;
p++;
}
return p;
}
/**
* get a name and prefix, if any
*/
{
while (p<len)
{
int b = get(p);
if (b<=' ' || b=='/' || b=='>' || b=='=')
break;
else if (b == ':')
{
shortWord = "";
}
else
p++;
}
else
return p;
}
/**
* Assume that we are starting on a quote. Ends on the char
* after the final '"'
*/
{
int p = p0;
return p0;
int b = get(p++); //go to next char
while (p<len )
{
b = get(p++);
if (b=='"' || b=='\'')
break;
else if (b=='&')
{
p = parseEntity(p, result);
if (p < 0)
return p0;
}
else
{
}
}
//printf("quoted text:'%s'\n", buf.c_str());
return p;
}
/**
* Parse a <!xml> tag. Node may be null. Assumes current char is '<'
* ends on char after '>'
*/
{
int p = p0;
if (!match(p, "<?xml"))
return p0;
p += 5;
colNr += 5;
bool quickCloseDummy;
if (p2 < p)
{
//smart ptr!!do not delete node;
return p0;
}
p = p2;
//get the attributes that we need
{ /*document->setXmlEncoding(attr->getNodeValue());*/ }
//#now we should be pointing at '?>'
if (!match(p, "?>"))
{
return p0;
}
//skip over '?>'
get(p++);
get(p++);
return p;
}
/**
* Parse a <!DOCTYPE> tag. doctype may be null. Expects '<'
* on start. Ends pointing at char after '>'
*/
{
int p = p0;
if (!match(p, "<!DOCTYPE"))
return p0;
p += 9;
colNr += 9;
if (!doctype)
return p0;
//### get the root name of the document
p = skipwhite(p);
if (p2 <= p)
return p0;
p = p2;
//printf("doctype root '%s'\n", rootName.c_str());
while (p < len)
{
p = skipwhite(p);
if (peek(p) == '>')
break;
{
while (p < len)
{
if (ch == ']')
break;
}
p++;
}
else if (match(p, "PUBLIC"))
{
p += 6;
colNr += 6;
p = skipwhite(p);
if (p2 <= p)
return p0;
p = p2;
p = skipwhite(p);
if (p2 <= p)
return p0;
p = p2;
//printf("PUBLIC \"%s\" \"%s\" \n",
// pubIdLiteral.c_str(), systemLiteral.c_str());
}
else if (match(p, "SYSTEM"))
{
p += 6;
colNr += 6;
p = skipwhite(p);
if (p2 <= p)
return p0;
p = p2;
//printf("SYSTEM \"%s\" \n", systemLiteral.c_str());
}
}
//skip over '>'
get(p++);
return p;
}
/**
* Expects '<' on startup, ends on char after '>'
*/
{
int p = p0;
if (!match(p, "<!--"))
return p0;
colNr += 4;
p += 4;
while (p<len-3)
{
if (match(p, "-->"))
{
p += 3;
colNr += 3;
break;
}
}
return p;
}
/**
*
*/
{
int p = p0;
if (!match(p, "<![CDATA["))
return p0;
colNr += 9;
p += 9;
while (p<len)
{
if (match(p, "]]>"))
{
p +=3;
colNr += 3;
break;
}
}
/*printf("Got CDATA:%s\n",buf.c_str());*/
return p;
}
/**
*
*/
{
int p = p0;
while (p<len)
{
if (peek(p) == '&')
{
p = parseEntity(p, buf);
if (p < 0) //error?
return p0;
}
else if (peek(p) == '<')
{
break;
}
else
{
}
}
/*printf("Got Text:%s\n",buf.c_str());*/
return p;
}
/**
* Parses attributes of a node. Should end pointing at either the
* '?' of a version or doctype tag, or a '>' of a normal tag
*/
{
*quickClose = false;
int p = p0;
while (p<len)
{
/*printf("ch:%c\n",ch);*/
p = skipwhite(p);
/*printf("ch:%c\n",ch);*/
break;
{
p++;
p = skipwhite(p);
if (ch == '>')
{
p++;
*quickClose = true;
/*printf("quick close\n");*/
return p;
}
}
if (p2 <= p)
break;
/*printf("name:%s",buf);*/
p = p2;
p = skipwhite(p);
/*printf("ch:%c\n",ch);*/
if (ch != '=')
break;
p++;
p = skipwhite(p);
/*ch = parsebuf[p];*/
/*printf("ch:%c\n",ch);*/
p = p2;
/*printf("name:'%s' value:'%s'\n",buf,buf2);*/
//## Now let us make the attribute and give it to the node
}//while p<len
return p;
}
/**
* Appends the value of an entity to the buffer
*/
{
int p = p0;
{
{
p += info->escapeLength;
return p;
}
}
error("unterminated entity");
return -1;
}
//#########################################################################
//# P A R S E A N O D E
//#########################################################################
/**
* Parse as a document, preserving the original structure as much as
* possible
*/
{
int p = p0;
//### OPEN TAG
if (ch != '<')
return p0;
p = skipwhite(p);
if (p2 <= p)
return p0;
p = p2;
p = skipwhite(p);
//printf("qualifiedName:%s\n", openTagQualifiedName.c_str());
//### ATTRIBUTES
bool quickClose;
if (quickClose) //trivial tag: <name/>
return p;
p++; //skip over '>'
/* ### Get intervening data ### */
{
//### COMMENT
if (match(p, "<!--"))
{
if (p2 <= p)
return p0;
p = p2;
if (parseAsData)
{ //throw away
//delete comment;
}
else
{
}
}
//### VERSION
else if (match(p, "<?xml"))
{
p2 = parseVersion(p);
if (p2 <= p)
return p0;
}
//### DOCTYPE
else if (match(p, "<!DOCTYPE"))
{
p2 = parseDoctype(p);
if (p2 <= p)
return p0;
}
//### CDATA
else if (match(p, "<![CDATA["))
{
if (p2 <= p)
return p0;
p = p2;
if (parseAsData)
{
//delete cdata;
}
else
{
}
}
//### OPEN OR CLOSE TAG
else if (peek(p) == '<')
{
{
p = p2;
break;
}
else
{
/*Add element to tree*/
if (p2 <= p)
{
/*printf("problem on element:%ls. p2:%d p:%d\n",n->name, p2, p);*/
return p0;
}
p = p2;
}
}
//### TEXT
else
{
if (p2 <= p)
return p0;
p = p2;
if (parseAsData)
{
//delete text;
}
else
{
}
}
}//while (p<len)
//printf("%d : nodeValue:'%s'\n", p, nodeValue.c_str());
//### get close tag. we should be pointing at '/'
p = skipwhite(p);
if (ch != '/')
{
error("no / on end tag");
return p0;
}
p++;
//### get word after '/'
p = skipwhite(p);
{
error("Mismatched closing tag. Expected </%s>. Got '%s'.",
return p0;
}
p = skipwhite(p);
if (parsebuf[p] != '>')
{
error("no > on end tag");
return p0;
}
p++;
/*printf("close element:%ls\n",buf);*/
return p;
}
/**
*
*/
{
keepGoing = true;
//document = new svg::SVGDocumentImpl(domImpl, "", "", NULL);
int p = bufferOffset;
int p2 = 0;
{
p = skipwhite(p);
//### COMMENT
if (match(p, "<!--"))
{
if (p2 <= p)
return document;
p = p2;
if (parseAsData)
{ //throw away
//delete comment;
}
else
{
}
}
//### VERSION
else if (match(p, "<?xml"))
{
p2 = parseVersion(p);
if (p2 <= p)
return document;
p = p2;
}
//### DOCTYPE
else if (match(p, "<!DOCTYPE"))
{
p2 = parseDoctype(p);
if (p2 <= p)
return document;
p = p2;
}
else
{
break;
}
}
p = skipwhite(p);
keepGoing = false;
return document;
}
/**
*
*/
{
if (!doc)
return doc;
doc->normalizeDocument();
return doc;
}
/**
*
*/
{
return doc; /*doc still null*/
return doc;
}
//#########################################################################
//# S T R E A M R E A D I N G
//#########################################################################
/**
*
*/
{
return buf;
if (!f)
{
//error here
return buf;
}
while (!feof(f))
{
if (ch<0)
break;
}
fclose(f);
return buf;
}
//#########################################################################
//# C O N S T R U C T O R / D E S T R U C T O R
//#########################################################################
/**
*
*/
document(),
parsebuf(),
keepGoing(false),
parseAsData(false),
pos(0),
len(0),
lineNr(1),
colNr(0)
{
}
/**
*
*/
document(),
parsebuf(),
keepGoing(false),
pos(0),
len(0),
lineNr(1),
colNr(0)
{
}
/**
*
*/
{
}
} //namespace dom
} //namespace w3c
} //namespace org
//#########################################################################
//# E N D O F F I L E
//#########################################################################