uri-util.c revision 601eea4261fc7aaf76328842e6c864f73fb308df
/* Copyright (c) 2010-2014 Dovecot authors, see the included COPYING file */
#include "lib.h"
#include "array.h"
#include "str.h"
#include "net.h"
#include "uri-util.h"
#include <ctype.h>
/*
* Generic URI parsing.
*
* [URI-GEN] RFC3986 Appendix A:
*
* host = IP-literal / IPv4address / reg-name
* port = *DIGIT
* reg-name = *( unreserved / pct-encoded / sub-delims )
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
* pct-encoded = "%" HEXDIG HEXDIG
* sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
* / "*" / "+" / "," / ";" / "="
* IP-literal = "[" ( IPv6address / IPvFuture ) "]"
* IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
* IPv6address = 6( h16 ":" ) ls32
* / "::" 5( h16 ":" ) ls32
* / [ h16 ] "::" 4( h16 ":" ) ls32
* / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
* / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
* / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
* / [ *4( h16 ":" ) h16 ] "::" ls32
* / [ *5( h16 ":" ) h16 ] "::" h16
* / [ *6( h16 ":" ) h16 ] "::"
* h16 = 1*4HEXDIG
* ls32 = ( h16 ":" h16 ) / IPv4address
* IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
* dec-octet = DIGIT ; 0-9
* / %x31-39 DIGIT ; 10-99
* / "1" 2DIGIT ; 100-199
* / "2" %x30-34 DIGIT ; 200-249
* / "25" %x30-35 ; 250-255
*/
#define URI_MAX_SCHEME_NAME_LEN 64
/* Character lookup table
*
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" [bit0]
* sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
* / "*" / "+" / "," / ";" / "=" [bit1]
* gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" [bit2]
* pchar = unreserved / sub-delims / ":" / "@" [bit0|bit1|bit3]
* 'pfchar' = unreserved / sub-delims / ":" / "@" / "/"
* [bit0|bit1|bit3|bit5]
* 'uchar' = unreserved / sub-delims / ":" [bit0|bit1|bit4]
* 'qchar' = pchar / "/" / "?" [bit0|bit1|bit3|bit5|bit6]
*
*/
#define CHAR_MASK_UNRESERVED (1<<0)
static unsigned const char _uri_char_lookup[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 00
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 10
0, 2, 0, 4, 2, 0, 2, 2, 2, 2, 2, 2, 2, 1, 1, 36, // 20
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 28, 2, 0, 2, 0, 68, // 30
12, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 40
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 0, 4, 0, 1, // 50
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, // 70
};
static inline int _decode_hex_digit(const unsigned char digit)
{
switch (digit) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
return digit - '0';
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
}
return -1;
}
static int ATTR_NULL(3)
{
int value;
return -1;
}
if ((value = _decode_hex_digit(**p)) < 0) {
"Expecting hex digit after '%%', but found '%c'", **p);
return -1;
}
*p += 1;
if ((value = _decode_hex_digit(**p)) < 0) {
"Expecting hex digit after '%%%c', but found '%c'", *((*p)-1), **p);
return -1;
}
*p += 1;
if (*ch_r == '\0') {
"Percent encoding is not allowed to encode NUL character";
return -1;
}
return 1;
}
static int
{
return -1;
return 1;
}
return 0;
return 1;
}
return 0;
}
{
int len = 0;
int ret;
unsigned char ch = 0;
return -1;
if (ret == 0)
break;
len++;
}
return len > 0 ? 1 : 0;
}
{
const unsigned char *p = (const unsigned char *)data;
/* NULL means unlimited; solely rely on '\0' */
}
if (p >= pend || *p == '\0') {
*decoded_r = "";
return TRUE;
}
while (p < pend && *p != '\0') {
unsigned char ch;
if (*p == '%') {
p++;
return FALSE;
} else {
str_append_c(decoded, *p);
p++;
}
}
return TRUE;
}
{
const char *p = *uri_p;
/* RFC 3968:
* scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
*/
if (!i_isalpha(*p))
return -1;
p++;
break;
p++;
len++;
}
if (*p != ':')
return -1;
*uri_p = p + 1;
return 0;
}
{
const char *p;
return 0;
if (uri_cut_scheme(&p, scheme_r) < 0)
return 0;
return 1;
}
static int
{
int count = 0;
/* RFC 3986:
*
* dec-octet = DIGIT ; 0-9
* / %x31-39 DIGIT ; 10-99
* / "1" 2DIGIT ; 100-199
* / "2" %x30-34 DIGIT ; 200-249
* / "25" %x30-35 ; 250-255
*/
return -1;
count++;
}
if (count > 0) {
return 1;
}
return 0;
}
static int
{
int ret;
int i;
/* RFC 3986:
*
* IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
*/
return ret;
return -1;
return -1;
}
return 1;
}
{
int len = 0;
/* RFC 3986:
*
* reg-name = *( unreserved / pct-encoded / sub-delims )
*/
int ret;
unsigned char c;
/* unreserved / pct-encoded */
return -1;
if (ret > 0) {
str_append_c(reg_name, c);
len++;
continue;
}
/* sub-delims */
len++;
continue;
}
break;
}
return len > 0 ? 1 : 0;
}
#ifdef HAVE_IPV6
static int
{
const unsigned char *p;
const char *address;
int ret;
/* IP-literal = "[" ( IPv6address / IPvFuture ) "]"
* IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
* IPv6address = ; Syntax not relevant: parsed using inet_pton()
*/
/* "[" already verified */
/* Scan for end of address */
if (*p == ']')
break;
}
return -1;
}
if (*address == '\0') {
return -1;
}
if (*address == 'v') {
"Future IP host address '%s' not supported", address);
return -1;
}
"Invalid IPv6 host address '%s'", address);
return -1;
}
return 1;
}
#endif
{
const unsigned char *preserve;
int ret;
/* RFC 3986:
*
* host = IP-literal / IPv4address / reg-name
*/
/* IP-literal / */
#ifdef HAVE_IPV6
return -1;
}
return 1;
#else
return -1;
#endif
}
/* IPv4address /
*
* If it fails to parse, we try to parse it as a reg-name
*/
}
return ret;
}
str_truncate(literal, 0);
/* reg-name */
}
return ret;
}
return 0;
}
{
unsigned long port = 0;
int count = 0;
/* RFC 3986:
*
* port = *DIGIT
*/
return -1;
}
count++;
}
if (count > 0) {
}
return 1;
}
return 0;
}
struct uri_authority *auth)
{
const unsigned char *p;
int ret;
/*
* authority = [ userinfo "@" ] host [ ":" port ]
*/
/* Scan ahead to check whether there is a [userinfo "@"] uri component */
/* refuse 8bit characters */
if ((*p & 0x80) != 0)
break;
/* break at first delimiter */
break;
}
/* Extract userinfo */
}
/* host */
if (ret == 0) {
else
return -1;
}
return ret;
}
/* [":" ... */
return 1;
/* ... port] */
return ret;
return 1;
}
struct uri_authority *auth)
{
/* "//" authority */
return 0;
}
{
if (*p == '%') {
p++;
continue;
}
break;
p++;
}
return 0;
return 1;
}
int *relative_r, const char *const **path_r)
{
unsigned int count;
int relative = 1;
int ret;
/* check for a leading '/' and indicate absolute path
when it is present
*/
relative = 0;
}
/* parse first segment */
return -1;
for (;;) {
if (ret > 0) {
/* strip dot segments */
if (segment[0] == '.') {
/* '..' -> skip and... */
/* ... pop last segment (if any) */
if (count > 0) {
} else if ( relative > 0 ) {
relative++;
}
}
/* '.' -> skip */
}
}
} else {
segment = "";
}
break;
/* parse next path segment */
return -1;
}
*relative_r = relative;
/* path part of URI is empty */
return 0;
}
/* special treatment for a trailing '..' or '.' */
segment = "";
}
return 1;
}
{
/* RFC 3986:
*
* URI = { ... } [ "?" query ] { ... }
* query = *( pchar / "/" / "?" )
* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
*/
return 0;
p++;
if (*p == '%') {
p++;
continue;
}
break;
p++;
}
return 1;
}
{
/* RFC 3986:
*
* URI = { ... } [ "#" fragment ]
* fragment = *( pchar / "/" / "?" )
* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
*/
return 0;
p++;
if (*p == '%') {
p++;
continue;
}
break;
p++;
}
if (fragment_r != NULL)
return 1;
}
{
}
{
else
}
/*
* Generic URI construction
*/
static void
{
const unsigned char *p = (const unsigned char *)data;
while (*p != '\0') {
} else {
str_append_c(out, *p);
}
p++;
}
}
{
}
const char *data)
{
}
{
}
{
}
{
return;
}
}
{
}
const char *data)
{
}
{
if (*segment != '\0')
}
const char *data)
{
}
{
if (*path != '\0')
}
const char *data)
{
}
{
if (*query != '\0')
}
const char *data)
{
}
{
if (*fragment != '\0')
}