uri.cpp revision 92eb663500564c06258bc74260952f9fe89258d1
/*
* Phoebe DOM Implementation.
*
* This is a C++ approximation of the W3C DOM model, which follows
* fairly closely the specifications in the various .idl files, copies of
* which are provided for reference. Most important is this one:
*
*
* Authors:
* Bob Jamison
*
* Copyright (C) 2005-2008 Bob Jamison
*
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "uri.h"
#include "ucd.h"
#include <cstdarg>
#include <cstdio>
#include <cstdlib>
#include <vector>
namespace org
{
namespace w3c
{
namespace dom
{
typedef struct
{
int ival;
char const *sval;
int port;
} LookupEntry;
static LookupEntry schemes[] =
{
{ 0, NULL, 0 }
};
//#########################################################################
//# C O N S T R U C T O R
//#########################################################################
/**
*
*/
{
init();
}
/**
*
*/
{
init();
}
/**
*
*/
{
init();
}
/**
*
*/
{
init();
}
/**
*
*/
{
init();
return *this;
}
/**
*
*/
{
}
/**
*
*/
{
parselen = 0;
port = 0;
portSpecified = false;
absolute = false;
opaque = false;
}
/**
*
*/
{
}
//#########################################################################
//#A T T R I B U T E S
//#########################################################################
static const char *hexChars = "0123456789abcdef";
{
{
else
{
}
}
return buf;
}
{
{
}
{
}
{
}
return str;
}
{
return scheme;
}
{
return schemeStr;
}
{
if (portSpecified && port>=0)
{
char buf[7];
}
return ret;
}
{
return str;
}
{
return port;
}
{
return str;
}
{
#ifdef __WIN32__
unsigned int firstChar = 0;
{
if (pathStr[0] == '/' &&
firstChar++;
}
{
if (ch == '/')
else
}
#else
#endif
return npath;
}
bool URI::isAbsolute() const
{
return absolute;
}
{
return opaque;
}
{
return str;
}
{
return str;
}
{
{
return i;
}
return -1;
}
{
/**
* Fixed. Originally I used an unsigned int for str.size(),
* which was dumb, since i>=0 would always be true.
*/
{
return i;
}
return -1;
}
{
char *c = (char *)key;
{
if (! (*c))
return false;
if (*c != str[i])
return false;
}
return true;
}
{
for (int i=0 ; i<len ; i++)
{
break;
}
return buf;
}
{
//### According to w3c, this is handled in 3 cases
//## 1
return other;
//## 2
{
return fragUri;
}
//## 3 http://www.ietf.org/rfc/rfc2396.txt, section 5.2
//# 3.1
{
//# 3.2
}
else
{
//# 3.3
{
}
else
{
if (pos >= 0)
{
//# append my path up to and including the '/'
for (int i = 0; i<=pos ; i++)
//# append other path
}
else
}
}
return newUri;
}
/**
* This follows the Java URI algorithm:
* 1. All "." segments are removed.
* 2. If a ".." segment is preceded by a non-".." segment
* then both of these segments are removed. This step
* is repeated until it is no longer applicable.
* 3. If the path is relative, and if its first segment
* contains a colon character (':'), then a "." segment
* is prepended. This prevents a relative URI with a path
* such as "a:b/c/d" from later being re-parsed as an
* opaque URI with a scheme of "a" and a scheme-specific
* part of "b/c/d". (Deviation from RFC 2396)
*/
{
//## Collect segments
return;
bool abs = false;
int pos=0;
if (path[0]=='/')
{
abs = true;
pos++;
}
{
if (pos2 < 0)
{
//printf("last segment:%s\n", toStr(seg).c_str());
break;
}
{
//printf("segment:%s\n", toStr(seg).c_str());
}
pos++;
}
//## Clean up (normalize) segments
bool edited = false;
{
if (sequ(s,"."))
{
edited = true;
}
{
--iter; //back up, then erase two entries
edited = true;
}
else
++iter;
}
//## Rebuild path, if necessary
if (edited)
{
if (abs)
{
}
{
}
}
}
//#########################################################################
//# M E S S A G E S
//#########################################################################
{
}
{
}
//#########################################################################
//# P A R S I N G
//#########################################################################
{
if (p<0 || p>=parselen)
return -1;
return parsebuf[p];
}
{
int p = p0;
while (p < parselen)
{
if (*key == '\0')
return p;
break;
p++; key++;
}
return p0;
}
//#########################################################################
//# Parsing is performed according to:
//# http://www.gbiv.com/protocols/uri/rfc/rfc3986.html#components
//#########################################################################
{
int p = p0;
int val = 0;
//# Upper 4
else
{
return -1;
}
p++;
val <<= 4;
//# Lower 4
else
{
return -1;
}
p++;
return p;
}
{
int p = p0;
if (ch != '&')
return p0;
p++;
if (!match(p, "#x"))
{
error("parseEntity: expected '#x'");
return -1;
}
p += 2;
int val;
if (p<0)
return -1;
if (ch != ';')
{
error("parseEntity: expected ';'");
return -1;
}
p++;
return p;
}
{
int p = p0;
if (ch != '%')
return p0;
p++;
int val;
if (p<0)
return -1;
return p;
}
{
int p = p0;
{
if (p2 > p)
{
p = p2;
return p;
}
}
return p;
}
{
int p = p0;
int ch;
//# Authority field (host and port, for example)
if (p2 > p)
{
p = p2;
portSpecified = false;
while (p < parselen)
{
if (ch == '/')
break;
{
int val;
if (p2<p)
{
return -1;
}
p = p2;
}
{
int val;
if (p2<p)
{
return -1;
}
p = p2;
}
else if (ch == ':')
{
portSpecified = true;
p++;
}
else if (portSpecified)
{
p++;
}
else
{
p++;
}
}
{
char *endStr;
}
}
//# Are we absolute?
{
absolute = true;
}
else if (ch == '/')
{
absolute = true;
if (p>p0) //in other words, if '/' is not the first char
opaque = true;
p++;
}
while (p < parselen)
{
break;
{
int val;
if (p2<p)
{
return -1;
}
p = p2;
}
{
int val;
if (p2<p)
{
return -1;
}
p = p2;
}
else
{
p++;
}
}
//trace("path:%s", toStr(path).c_str());
return p;
}
{
int p = p0;
if (ch != '?')
return p0;
p++;
while (p < parselen)
{
if (ch == '#')
break;
p++;
}
return p;
}
{
int p = p0;
if (ch != '#')
return p0;
p++;
while (p < parselen)
{
if (ch == '?')
break;
p++;
}
return p;
}
{
int p = p0;
int p2 = parseScheme(p);
if (p2 < 0)
{
error("Scheme");
return -1;
}
p = p2;
p2 = parseHierarchicalPart(p);
if (p2 < 0)
{
error("Hierarchical part");
return -1;
}
p = p2;
p2 = parseQuery(p);
if (p2 < 0)
{
error("Query");
return -1;
}
p = p2;
p2 = parseFragment(p);
if (p2 < 0)
{
error("Fragment");
return -1;
}
p = p2;
return p;
}
{
if (!parsebuf)
{
error("parse : could not allocate parsebuf");
return false;
}
unsigned int i=0;
{
if (ch == '\\')
parsebuf[i++] = '/';
else
}
int p = parse(0);
normalize();
delete[] parsebuf;
if (p < 0)
{
error("Syntax error");
return false;
}
//printf("uri:%s\n", toString().c_str());
//printf("parse:%s\n", toStr(path).c_str());
return true;
}
} //namespace dom
} //namespace w3c
} //namespace org
//#########################################################################
//# E N D O F F I L E
//#########################################################################