cssreader.cpp revision 0c6b51649b501e4e378921d918c6a113ab8e2bce
/*
* Phoebe DOM Implementation.
*
* This is a C++ approximation of the W3C DOM model, which follows
* fairly closely the specifications in the various .idl files, copies of
* which are provided for reference. Most important is this one:
*
*
* Authors:
* Bob Jamison
*
* Copyright (C) 2005-2008 Bob Jamison
*
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "cssreader.h"
#include "ucd.h"
#include <stdio.h>
#include <stdarg.h>
namespace org
{
namespace w3c
{
namespace dom
{
namespace css
{
//#########################################################################
//# M E S S A G E S
//#########################################################################
/**
* Get the column and row number of the given character position
*/
{
int col = 1;
int row = 1;
int lastnl = 0;
for (int i=0 ; i<p ; i++)
{
if (ch == '\n')
{
lastnl = i;
row++;
col=0;
}
else
col++;
}
}
/**
*
*/
{
int lineNr;
int colNr;
int lastNL;
/*
int lineLen = lastPosition - lastNL;
printf("lineLen:%d lastNL:%d\n", lineLen, lastNL);
for (int i=lastNL+1 ; i<lastPosition ; i++)
fprintf(stderr, "%c", parsebuf[i]);
fprintf(stderr, "\n");
for (int i=0 ; i<lineLen-1 ; i++)
fprintf(stderr, " ");
fprintf(stderr, "^\n");
*/
for (int i=0 ; i<lastPosition ; i++)
}
//#########################################################################
//# P A R S I N G
//#########################################################################
/**
* Get the character at the position and record the fact
*/
{
if (p >= parselen)
return 0;
//printf("%c", ch);
lastPosition = p;
return ch;
}
/**
* Test if the given substring exists at the given position
* in parsebuf. Use get() in case of out-of-bounds
*/
{
while (*str)
{
return false;
}
return true;
}
/**
*
*/
{
while (p < parselen)
{
//# XML COMMENT
if (match(p, "<!--"))
{
p+=4;
bool done=false;
while (p<parselen)
{
if (match(p, "-->"))
{
p+=3;
done=true;
break;
}
p++;
}
lastPosition = p;
if (!done)
{
error("unterminated <!-- .. --> comment");
return -1;
}
}
//# C comment
else if (match(p, "/*"))
{
p+=2;
bool done=false;
while (p<parselen)
{
if (match(p, "*/"))
{
p+=2;
done=true;
break;
}
p++;
}
lastPosition = p;
if (!done)
{
error("unterminated /* .. */ comment");
return -1;
}
}
else if (!uni_is_space(get(p)))
break;
else
p++;
}
lastPosition = p;
return p;
}
/**
* get a word from the buffer
*/
{
if (!uni_is_letter(ch))
return p;
p++;
while (p < parselen)
{
{
p++;
}
else if (ch == '\\')
{
p+=2;
}
else
break;
}
return p;
}
/**
* get a word from the buffer
*/
{
int p=p0;
while (p < parselen)
{
break;
p++;
}
{
p++;
while (p < parselen)
{
break;
p++;
}
}
if (p>p0)
{
{
return p;
}
}
//not a number
return p0;
}
/**
* Assume that we are starting on a quote. Ends on the char
* after the final '"'
*/
{
int p = p0;
return p0;
p++;
bool done = false;
while (p<parselen )
{
{
done = true;
p++;
break;
}
else
{
}
p++;
}
if (!done)
{
error("unterminated quoted string");
return -1;
}
return p;
}
/**
* Not in api. replaces URI return by lexer
*/
{
int p = p0;
if (!match(p, "url("))
return p0;
p+=4;
p = skipwhite(p);
while (p < parselen)
{
break;
p++;
}
p = skipwhite(p);
if (ch != ')')
{
error("no closing ')' on url spec");
return -1;
}
p++;
return p;
}
/**
* Skip to the end of the block
*/
{
int p = p0;
while (p < parselen)
{
if (ch == '}')
{
p++;
break;
}
else
{
p++;
}
}
return p;
}
//#########################################################################
//# P R O D U C T I O N S
//#########################################################################
/**
* stylesheet
* : [ CHARSET_SYM S* STRING S* ';' ]?
* [S|CDO|CDC]* [ import [S|CDO|CDC]* ]*
* [ [ ruleset | media | page ] [S|CDO|CDC]* ]*
* ;
*/
{
int p = p0;
int p2 = p;
//# CHARSET 0 or 1
if (match(p, "@charset"))
{
p+=8;
p = skipwhite(p);
if (p2<=p)
{
error("quoted string required after @charset");
return -1;
}
if (ch !=';')
{
error("';' required after @charset declaration");
return -1;
}
p++;
p = skipwhite(p);
}
//# IMPORT 0 to many
while (true)
{
if (p2<0)
{
return -1;
}
if (p2<=p)
break;
p = p2;
}
//# RULESET | MEDIA | PAGE 0 to many
while (true)
{
//Ruleset
p2 = getRuleSet(p);
if (p2<0)
{
return -1;
}
if (p2>p)
{
p = p2;
continue;
}
//Media
if (p2<0)
{
return -1;
}
if (p2>p)
{
p = p2;
continue;
}
//Page
if (p2<0)
{
return -1;
}
if (p2>p)
{
p = p2;
continue;
}
//none of the above
break;
}
return p;
}
/**
* import
* : IMPORT_SYM S*
* [STRING|URI] S* [ medium [ COMMA S* medium]* ]? ';' S*
* ;
*/
{
int p = p0;
if (!match(p, "@import"))
return p0;
p+=7;
p = skipwhite(p);
//# STRING | URI
if (p2<0)
{
return -1;
}
if (p2<=p)
{
if (p2<0)
{
return -1;
}
if (p2<=p)
{
error("quoted string or URI required after @import");
return -1;
}
}
p = p2;
if (p2<0)
return -1;
p = p2;
p = skipwhite(p);
if (ch != ';')
{
error("@import must be terminated with ';'");
return -1;
}
p++;
return p;
}
/**
* media
* : MEDIA_SYM S* medium [ COMMA S* medium ]* LBRACE S* ruleset* '}' S*
* ;
*/
{
int p = p0;
if (!match(p, "@media"))
return p0;
p+=6;
p = skipwhite(p);
//# MEDIUM LIST
if (p2<0)
return -1;
if (p2<=p)
{
error("@media must be followed by medium");
return -1;
}
p = p2;
while (true)
{
if (ch != ',')
break;
if (p2<0)
return -1;
if (p2<=p)
{
error("',' in medium list must be followed by medium");
return -1;
}
p = p2;
}
p = skipwhite(p);
if (ch!='{')
{
error("@media requires '{' for ruleset");
return -1;
}
p++;
p2 = getRuleSet(p);
if (p2<0)
return -1;
if (p2<=p)
{
error("@media requires ruleset after '{'");
return -1;
}
p = p2;
if (ch != '}')
{
error("@media requires '}' after ruleset");
return -1;
}
p++;
return p0;
}
/**
* medium
* : IDENT S*
* ;
*/
{
int p = p0;
p = skipwhite(p);
DOMString ident;
if (p2<0)
return -1;
if (p2<=p)
return p0;
p = p2;
return p;
}
/**
* page
* : PAGE_SYM S* pseudo_page? S*
* LBRACE S* declaration [ ';' S* declaration ]* '}' S*
* ;
*/
{
int p = p0;
//# @PAGE
p = skipwhite(p);
if (!match(p, "@page"))
return p0;
p+= 5;
//#PSEUDO PAGE 0 or 1
p = skipwhite(p);
int p2 = getPseudoPage(p);
if (p2<0)
return -1;
if (p2>p)
{
p = p2;
}
//# {
p=skipwhite(p);
if (p != '{')
{
error("@page requires '{' before declarations");
}
p++;
//# DECLARATION LIST
p = skipwhite(p);
if (p2<0)
return -1;
if (p2<=p)
{
error("@page requires declaration(s) after '{'");
return -1;
}
while (true)
{
if (ch != ';')
break;
p++;
p = skipwhite(p);
if (p2<0)
return -1;
if (p2<= p)
{
error("@page requires declaration after ';'");
return -1;
}
}
//# }
p=skipwhite(p);
if (p != '}')
{
error("@page requires '}' after declarations");
}
p++;
return p;
}
/**
* pseudo_page
* : ':' IDENT
* ;
*/
{
int p = p0;
if (!match(p, ":"))
return p0;
p++;
if (p2<0)
return -1;
if (p2<=p)
{
error("pseudo-page requires identifier after ':'");
return -1;
}
p = p2;
return p;
}
/**
* ruleset
* : selector [ COMMA S* selector ]*
* LBRACE S* declaration [ ';' S* declaration ]* '}' S*
* ;
*/
{
int p = p0;
//## SELECTOR
p = skipwhite(p);
int p2 = getSelector(p);
if (p2<0)
return -1;
if (p2<=p) //no selector
{
return p0;//not me
}
p = p2;
while (true)
{
p = skipwhite(p);
if (ch != ',')
break;
p++;
p = skipwhite(p);
int p2 = getSelector(p);
if (p2<0)
return -1;
if (p2<=p)
{
error("selector required after ',' in list");
return -1;
}
p = p2;
}
//## {
if (ch != '{')
{
error("'{' required before declarations of ruleset");
return -1;
}
p++;
//## DECLARATIONS ( 0 to many )
p = skipwhite(p);
if (p2<0)
return -1;
if (p2>p)
{
p = p2;
while (true)
{
p = skipwhite(p);
if (ch != ';')
break;
p++;
p = skipwhite(p);
if (p2<0)
return -1;
if (p2<=p)
{
//apparently this is ok
//error("declaration required after ';' in ruleset");
//return -1;
break;
}
p = p2;
}
}
//## }
if (ch != '}')
{
error("ruleset requires closing '}'");
return -1;
}
p++;
p = skipwhite(p);
return p;
}
/**
* selector
* : simple_selector [ combinator simple_selector ]*
* ;
*/
{
int p = p0;
//## SIMPLE SELECTOR
p = skipwhite(p);
int p2 = getSimpleSelector(p);
if (p2<0)
return -1;
if (p2<=p)
return p0; //not me
p = p2;
//## COMBINATORS + MORE SELECTORS
while (true)
{
p = skipwhite(p);
//# Combinators
//easier to do here than have a getCombinator()
int visibleCombinator = false;
if (ch == '+')
{
visibleCombinator = true;
p++;
}
else if (ch == '>')
{
visibleCombinator = true;
p++;
}
else if (wasSpace)
{
}
else
{
break;
}
p = skipwhite(p);
p2 = getSimpleSelector(p);
if (p2<0)
return -1;
if (p2<=p)
{
if (visibleCombinator)
{
error("need simple selector after combinator");
return -1;
}
else
{
break;
}
}
p = p2;
}
return p;
}
/**
* simple_selector
* : element_name [ HASH | class | attrib | pseudo ]*
* | [ HASH | class | attrib | pseudo ]+
* ;
*/
{
int p = p0;
int p2;
p = skipwhite(p);
int selectorItems = 0;
//######################
//# Note: do NOT skipwhite between items. Only within the
//# pseudo function and attrib below
//######################
//#Element name 0 or 1
if (uni_is_letter(ch))
{
if (p2<0)
return -1;
if (p2<=p)
{
error("null element name");
return -1;
}
p = p2;
}
else if (ch == '*')
{
str = "*";
p++;
}
//## HASH, CLASS, ATTRIB, PSEUDO (0 to many with elem name, 1 to many without)
while (true)
{
//# HASH
if (ch == '#')
{
p++;
if (p2<0)
return -1;
if (p2<=p)
{
error("no name for hash");
return -1;
}
p = p2;
}
//# CLASS
else if (ch == '.')
{
p++;
if (p2<0)
return -1;
if (p2<=p)
{
error("no name for class");
return -1;
}
p = p2;
}
//# ATTRIB
else if (ch == '[')
{
p++;
p = skipwhite(p);
if (p2<0)
return -1;
if (p2<=p)
{
error("no name for class");
return -1;
}
bool getRHS=false;
if (match(p, "="))
{
p++;
getRHS=true;
}
else if (match(p, "~="))
{
p+=2;
getRHS=true;
}
else if (match(p, "|="))
{
p+=2;
getRHS=true;
}
if (getRHS)
{
p = skipwhite(p);
if (uni_is_letter(ch))
{
if (p2<0)
return -1;
if (p2<=p)
{
error("null ident on rhs of attrib");
return -1;
}
p = p2;
}
{
if (p2<0)
return -1;
if (p2<=p)
{
error("null literal string on rhs of attrib");
return -1;
}
p = p2;
}
}//getRHS
p = skipwhite(p);
if (ch != ']')
{
error("attrib needs closing ']'");
//return -1;
p = skipBlock(p);
return p;
}
p++;
}
//# PSEUDO
else if (ch == ':')
{
p++;
if (p2<0)
return -1;
if (p2<=p)
{
error("no name for pseudo");
return -1;
}
p = p2;
if (ch == '(')
{
p++;
p = skipwhite(p);
if (uni_is_letter(ch))
{
if (p2<0)
return -1;
if (p2<=p)
{
error("null function parameter in pseudo");
return -1;
}
}
if (ch != ')')
{
error("function in pseudo needs ')'");
return -1;
}
p++;
}// ch==( -function-
}//pseudo
//# none of the above
else
{
break;
}
}//while
if (selectorItems > 0)
return p;
return p0;
}
/**
* declaration
* : property ':' S* expr prio?
* | {empty}
* ;
*/
{
int p = p0;
//## PROPERTY
p = skipwhite(p);
if (!uni_is_letter(ch))
return p0; //not me
if (p2<0)
return -1;
//## ':'
if (ch != ':')
{
error("declaration requires ':' between name and value");
return -1;
}
p++;
//## EXPR
p = skipwhite(p);
if (p2<0)
return -1;
if (p2<=p)
{
error("declaration requires value after ':'");
return -1;
}
for (int i=p ; i<p2 ; i++) //get our substring
p = p2;
//## PRIO (optional)
p = skipwhite(p);
if (p2<0)
return -1;
if (p2>p)
{
//do something
p = p2;
}
return p;
}
/**
* prio
* : IMPORTANT_SYM S*
* ;
*/
{
int p = p0;
//## '!"
p = skipwhite(p);
if (ch != '!')
return p0;
p++;
//## "important"
p = skipwhite(p);
if (!match(p, "important"))
{
error("priority symbol is 'important'");
return -1;
}
p += 9;
val = "important";
return p;
}
/**
* expr
* : term [ operator term ]*
* ;
*/
{
int p = p0;
//## TERM
p = skipwhite(p);
if (p2<0)
return -1;
if (p2<=p)
return p0; //not me
p = p2;
while (p < parselen)
{
p = skipwhite(p);
//#Operator. do this instead of getOperator()
int visibleTerm = false;
if (ch == '/')
{
visibleTerm = true;
p++;
}
else if (ch == ',')
{
visibleTerm = true;
p++;
}
else
{
//just space. this is allowable between terms,
// so we still need to check for another term
}
p = skipwhite(p);
if (p2<0)
return -1;
if (p2<=p)
{
if (visibleTerm)
{
error("expression requires term after operator");
return -1;
}
else
{
break;
}
}
p = p2;
}
return p;
}
/**
* term
* : unary_operator?
* [ NUMBER S* | PERCENTAGE S* | LENGTH S* | EMS S* | EXS S* | ANGLE S* |
* TIME S* | FREQ S* | function ]
* | STRING S* | IDENT S* | URI S* | hexcolor
* ;
*/
{
int p = p0;
p = skipwhite(p);
//# Unary operator
bool hasUnary = false;
if (ch == '-')
{
p++;
hasUnary = true;
}
else if (ch == '+')
{
p++;
hasUnary = true;
}
//# NUMERIC
double numVal;
if (p2<0)
return -1;
if (p2>p)
{
p = p2;
if (match(p, "%"))
{
p++;
}
else if (match(p, "em"))
{
p+=2;
}
else if (match(p, "ex"))
{
p+=2;
}
else if (match(p, "px"))
{
p+=2;
}
else if (match(p, "cm"))
{
p+=2;
}
else if (match(p, "mm"))
{
p+=2;
}
else if (match(p, "in"))
{
p+=2;
}
else if (match(p, "pt"))
{
p+=2;
}
else if (match(p, "pc"))
{
p+=2;
}
else if (match(p, "deg"))
{
p+=3;
}
else if (match(p, "rad"))
{
p+=3;
}
else if (match(p, "grad"))
{
p+=4;
}
else if (match(p, "ms"))
{
p+=2;
}
else if (match(p, "s"))
{
p+=1;
}
else if (match(p, "Hz"))
{
p+=2;
}
else if (match(p, "kHz"))
{
p+=2;
}
{
if (p2<0)
return -1;
p = p2;
}
else //plain number
{
}
return p;
}
//## URI --do before function, as syntax is similar
if (p2<0)
return -1;
if (p2>p)
{
if (hasUnary)
{
error("+ or - not allowed on URI");
return -1;
}
p = p2;
return p;
}
//## FUNCTION
p2 = getFunction(p);
if (p2<0)
return -1;
if (p2>p)
{
p = p2;
return p;
}
//## STRING
{
if (p2<0)
return -1;
if (p2>p)
{
if (hasUnary)
{
error("+ or - not allowed on a string");
return -1;
}
p = p2;
return p;
}
}
//## IDENT
if (uni_is_letter(ch))
{
if (p2<0)
return -1;
if (p2>p)
{
if (hasUnary)
{
error("+ or - not allowed on an identifier");
return -1;
}
p = p2;
return p;
}
}
//## HEXCOLOR
p2 = getHexColor(p);
if (p2<0)
return -1;
if (p2>p)
{
if (hasUnary)
{
error("+ or - not allowed on hex color");
return -1;
}
p = p2;
return p;
}
return p0;
}
/**
* function
* : FUNCTION S* expr ')' S*
* ;
*/
{
int p = p0;
//## IDENT + ( (both)
p = skipwhite(p);
if (p2<0)
return -1;
if (p2<=p)
return p0; //not me
return p0; //not me
if (ch != '(')
return p0; //still not me
p++;
//## EXPR
p = skipwhite(p);
if (p2<0)
return -1;
if (p2<=p)
{
error("function requires expression");
return -1;
}
p = p2;
//## ')'
p = skipwhite(p);
if (ch != ')')
{
error("function requires closing ')'");
return -1;
}
p++;
p = skipwhite(p);
return p;
}
/**
* There is a constraint on the color that it must
* have either 3 or 6 hex-digits (i.e., [0-9a-fA-F])
* after the "#"; e.g., "#000" is OK, but "#abcd" is not.
* hexcolor
* : HASH S*
* ;
*/
{
int p = p0;
//## '#'
p = skipwhite(p);
if (!match(p, "#"))
return p0;
p++;
//## HEX
long hexVal = 0;
while (p < parselen)
{
if (b>='0' && b<='9')
{
p++;
}
else if (b>='a' && b<='f')
{
p++;
}
else if (b>='A' && b<='F')
{
p++;
}
else
{
break;
}
}
{
error("exactly 3 or 6 hex digits are required after '#'");
return -1;
}
return p;
}
/**
*
*/
{
/*
int len = str.size();
for (int i=0 ; i<len ; i++)
{
XMLCh ch = str[i];
if (ch == '\\' && i<(len-1)) //escape!
{
i++;
}
else
parsebuf.push_back(ch);
}
*/
//printf("==============================\n%s\n========================\n", str.c_str());
lastPosition = 0;
int p = getStyleSheet(0);
if (p < parselen)
{
error("Not everything parsed");
return false;
}
return true;
}
/**
*
*/
{
if (!f)
{
return false;
}
while (!feof(f))
{
if (ch<0)
break;
}
fclose(f);
return ret;
}
} // namespace css
} // namespace dom
} // namespace w3c
} // namespace org
#ifdef CSSTEST
static const char *fileNames[] =
{
"001.css",
"acid.css",
"base.css",
"inkscape.css",
"meyerweb.css",
};
bool doTests()
{
{
{
printf("Test failed\n");
return false;
}
}
return true;
}
{
if (!doTests())
return 1;
return 0;
}
#endif /* CSSTEST */
//#########################################################################
//# E N D O F F I L E
//#########################################################################