uri-util.c revision 14383bf2be0296954609df5afd3c63c6555815f9
/* Copyright (c) 2010-2016 Dovecot authors, see the included COPYING file */
#include "lib.h"
#include "array.h"
#include "str.h"
#include "net.h"
#include "uri-util.h"
#include <ctype.h>
/* [URI-GEN] RFC3986 Appendix A:
URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
absolute-URI = scheme ":" hier-part [ "?" query ]
scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
URI-reference = URI / relative-ref
relative-ref = relative-part [ "?" query ] [ "#" fragment ]
relative-part = "//" authority path-abempty
/ path-absolute
/ path-noscheme
/ path-empty
hier-part = "//" authority path-abempty
/ path-absolute
/ path-rootless
/ path-empty
authority = [ userinfo "@" ] host [ ":" port ]
userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
host = IP-literal / IPv4address / reg-name
port = *DIGIT
IP-literal = "[" ( IPv6address / IPvFuture ) "]"
IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
IPv6address = 6( h16 ":" ) ls32
/ "::" 5( h16 ":" ) ls32
/ [ h16 ] "::" 4( h16 ":" ) ls32
/ [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
/ [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
/ [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
/ [ *4( h16 ":" ) h16 ] "::" ls32
/ [ *5( h16 ":" ) h16 ] "::" h16
/ [ *6( h16 ":" ) h16 ] "::"
h16 = 1*4HEXDIG
ls32 = ( h16 ":" h16 ) / IPv4address
IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
dec-octet = DIGIT ; 0-9
/ %x31-39 DIGIT ; 10-99
/ "1" 2DIGIT ; 100-199
/ "2" %x30-34 DIGIT ; 200-249
/ "25" %x30-35 ; 250-255
reg-name = *( unreserved / pct-encoded / sub-delims )
path = path-abempty ; begins with "/" or is empty
/ path-absolute ; begins with "/" but not "//"
/ path-noscheme ; begins with a non-colon segment
/ path-rootless ; begins with a segment
/ path-empty ; zero characters
path-abempty = *( "/" segment )
path-absolute = "/" [ segment-nz *( "/" segment ) ]
path-noscheme = segment-nz-nc *( "/" segment )
path-rootless = segment-nz *( "/" segment )
path-empty = 0<pchar>
segment = *pchar
segment-nz = 1*pchar
segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
; non-zero-length segment without any colon ":"
pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
query = *( pchar / "/" / "?" )
fragment = *( pchar / "/" / "?" )
pct-encoded = "%" HEXDIG HEXDIG
unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
reserved = gen-delims / sub-delims
gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
/ "*" / "+" / "," / ";" / "="
*/
#define URI_MAX_SCHEME_NAME_LEN 64
/* Character lookup table
*
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" [bit0]
* sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
* / "*" / "+" / "," / ";" / "=" [bit1]
* gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" [bit2]
* pchar = unreserved / sub-delims / ":" / "@" [bit0|bit1|bit3]
* 'pfchar' = unreserved / sub-delims / ":" / "@" / "/"
* [bit0|bit1|bit3|bit5]
* 'uchar' = unreserved / sub-delims / ":" [bit0|bit1|bit4]
* 'qchar' = pchar / "/" / "?" [bit0|bit1|bit3|bit5|bit6]
*
*/
#define CHAR_MASK_UNRESERVED (1<<0)
static unsigned const char _uri_char_lookup[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 00
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 10
0, 2, 0, 4, 2, 0, 2, 2, 2, 2, 2, 2, 2, 1, 1, 36, // 20
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 28, 2, 0, 2, 0, 68, // 30
12, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 40
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 0, 4, 0, 1, // 50
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, // 70
};
static inline int _decode_hex_digit(const unsigned char digit)
{
switch (digit) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
return digit - '0';
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
}
return -1;
}
static int
const unsigned char **p, const unsigned char *pend,
{
int value;
return 0;
*p += 1;
return -1;
}
if ((value = _decode_hex_digit(**p)) < 0) {
"Expecting hex digit after '%%', but found '%c'", **p);
return -1;
}
*p += 1;
if ((value = _decode_hex_digit(**p)) < 0) {
"Expecting hex digit after '%%%c', but found '%c'", *((*p)-1), **p);
return -1;
}
*p += 1;
if (*ch_r == '\0') {
"Percent encoding is not allowed to encode NUL character";
return -1;
}
return 1;
}
unsigned char *ch_r)
{
return uri_parse_pct_encoded_data
}
static int
{
return 0;
return 1;
}
return 0;
}
{
int len = 0;
int ret;
unsigned char ch = 0;
return -1;
if (ret == 0)
break;
len++;
}
return len > 0 ? 1 : 0;
}
{
int len = 0;
int ret;
unsigned char ch = 0;
return -1;
else if (ret == 0 &&
return -1;
if (ret == 0)
break;
len++;
}
return len > 0 ? 1 : 0;
}
{
const unsigned char *p = (const unsigned char *)data;
int ret;
/* NULL means unlimited; solely rely on '\0' */
}
if (p >= pend || *p == '\0') {
*decoded_r = "";
return TRUE;
}
while (p < pend && *p != '\0') {
unsigned char ch;
if (ret < 0)
return FALSE;
} else {
str_append_c(decoded, *p);
p++;
}
}
return TRUE;
}
{
/* RFC 3968:
* scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
*/
return 0;
while (len < URI_MAX_SCHEME_NAME_LEN &&
break;
len++;
}
return -1;
}
return 1;
}
{
struct uri_parser parser;
return -1;
return 0;
}
static int
{
unsigned int octet = 0;
int count = 0;
/* RFC 3986:
*
* dec-octet = DIGIT ; 0-9
* / %x31-39 DIGIT ; 10-99
* / "1" 2DIGIT ; 100-199
* / "2" %x30-34 DIGIT ; 200-249
* / "25" %x30-35 ; 250-255
*/
if (octet > 255)
return -1;
count++;
}
if (count > 0) {
return 1;
}
return 0;
}
static int
{
int ret;
int i;
/* RFC 3986:
*
* IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
*/
return ret;
return -1;
return -1;
}
return 1;
}
static int
{
/* RFC 3986:
*
* reg-name = *( unreserved / pct-encoded / sub-delims )
*/
int ret;
unsigned char c;
/* unreserved / pct-encoded */
return -1;
else if (ret == 0 &&
return -1;
if (ret > 0) {
str_append_c(reg_name, c);
continue;
}
/* sub-delims */
continue;
}
break;
}
return 0;
}
{
int ret;
/* RFC 3986, Section 3.2.2:
A registered name intended for lookup in the DNS uses the syntax
defined in Section 3.5 of [RFC1034] and Section 2.1 of [RFC1123].
Such a name consists of a sequence of domain labels separated by ".",
each domain label starting and ending with an alphanumeric character
and possibly also containing "-" characters. The rightmost domain
label of a fully qualified domain name in DNS may be followed by a
single "." and should be if it is necessary to distinguish between
the complete domain name and some local domain.
RFC 2396, Section 3.2.2 (old URI specification):
hostname = *( domainlabel "." ) toplabel [ "." ]
domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
toplabel = alpha | alpha *( alphanum | "-" ) alphanum
The description in RFC 3986 is more liberal, so:
hostname = *( domainlabel "." ) domainlabel [ "." ]
domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
We also support percent encoding in spirit of the generic reg-name,
even though this should explicitly not be used according to the RFC.
It is, however, not strictly forbidden (unlike older RFC), so we
support it.
*/
for (;;) {
const unsigned char *offset;
/* alphanum */
break;
return -1;
} else if (ret > 0) {
return -1;
} else {
break;
}
/* *( alphanum | "-" ) alphanum */
do {
return -1;
} else if (ret > 0) {
break;
}
} else {
break;
}
return -1;
}
}
/* "." */
break;
}
return 0;
/* remove trailing '.' */
}
return 1;
}
const char **host_name_r)
{
int ret;
if (host_name_r != NULL)
return ret;
if (host_name_r != NULL)
return 1;
}
static int
{
const unsigned char *p;
const char *address;
int ret;
/* IP-literal = "[" ( IPv6address / IPvFuture ) "]"
* IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
* IPv6address = ; Syntax not relevant: parsed using inet_pton()
*/
/* "[" already verified */
/* Scan for end of address */
if (*p == ']')
break;
}
return -1;
}
if (*address == '\0') {
return -1;
}
if (*address == 'v') {
"Future IP host address '%s' not supported", address);
return -1;
}
"Invalid IPv6 host address '%s'", address);
return -1;
}
return 1;
}
{
const unsigned char *preserve;
int ret;
/* RFC 3986:
*
* host = IP-literal / IPv4address / reg-name
*/
/* IP-literal / */
return -1;
}
return 1;
}
/* IPv4address /
*
* If it fails to parse, we try to parse it as a reg-name
*/
}
return ret;
}
str_truncate(literal, 0);
/* reg-name */
if (dns_name) {
return -1;
return -1;
return 0;
}
static int
{
const unsigned char *first;
/* RFC 3986:
*
* port = *DIGIT
*/
return 0;
return -1;
}
return 1;
}
{
const unsigned char *p;
int ret;
/*
* authority = [ userinfo "@" ] host [ ":" port ]
*/
/* Scan ahead to check whether there is a [userinfo "@"] uri component */
/* refuse 8bit characters */
if ((*p & 0x80) != 0)
break;
/* break at first delimiter */
break;
}
/* Extract userinfo */
}
/* host */
if (uri_parse_host(parser,
return -1;
return 1;
case ':': case '/': case '?': case '#':
break;
default:
return -1;
}
/* [":" port] */
return ret;
return 1;
case '/': case '?': case '#':
break;
default:
return -1;
}
}
return 1;
}
{
/* "//" authority */
return 0;
}
{
if (*p == '%') {
p++;
continue;
}
break;
p++;
}
*p != '/' && *p != '?' && *p != '#' ) {
"Path component contains invalid character";
return -1;
}
return 0;
return 1;
}
int *relative_r, const char *const **path_r)
{
unsigned int count;
int relative = 1;
int ret;
count = 0;
else
/* check for a leading '/' and indicate absolute path
when it is present
*/
relative = 0;
}
/* parse first segment */
return -1;
for (;;) {
if (ret > 0) {
/* strip dot segments */
if (segment[0] == '.') {
/* '..' -> skip and... */
/* ... pop last segment (if any) */
if (count > 0) {
}
count--;
} else if ( relative > 0 ) {
relative++;
}
}
/* '.' -> skip */
}
}
} else {
segment = "";
}
count++;
}
break;
/* parse next path segment */
return -1;
}
if (relative_r != NULL)
*relative_r = relative;
/* path part of URI is empty */
return 0;
}
/* special treatment for a trailing '..' or '.' */
segment = "";
}
}
return -1;
}
return 1;
}
{
/* RFC 3986:
*
* URI = { ... } [ "?" query ] { ... }
* query = *( pchar / "/" / "?" )
* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
*/
return 0;
p++;
if (*p == '%') {
p++;
continue;
}
break;
p++;
}
return -1;
}
return 1;
}
{
/* RFC 3986:
*
* URI = { ... } [ "#" fragment ]
* fragment = *( pchar / "/" / "?" )
* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
*/
return 0;
p++;
if (*p == '%') {
p++;
continue;
}
break;
p++;
}
return -1;
}
if (fragment_r != NULL)
return 1;
}
{
}
{
}
{
else
}
/*
* Generic URI manipulation
*/
{
/* create host name literal if caller is lazy */
}
}
/*
* Generic URI construction
*/
const unsigned char esc_table[256],
const char *data)
{
const unsigned char *pbegin, *p;
while (*p != '\0') {
if ((p - pbegin) > 0)
p++;
pbegin = p;
} else {
p++;
}
}
if ((p - pbegin) > 0)
}
{
}
const char *data)
{
}
{
}
{
}
{
return;
}
}
{
/* assume IPv6 literal if starts with '['; avoid encoding */
else
} else
}
{
if (port != 0)
}
const char *data)
{
}
{
if (*segment != '\0')
}
const char *data)
{
}
{
if (*path != '\0')
}
const char *data)
{
}
{
if (*query != '\0')
}
const char *data)
{
}
{
if (*fragment != '\0')
}