/* Copyright (c) 2010-2018 Dovecot authors, see the included COPYING file */
#include "lib.h"
#include "array.h"
#include "str.h"
#include "net.h"
#include "uri-util.h"
#include <ctype.h>
/* [URI-GEN] RFC3986 Appendix A:
URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
absolute-URI = scheme ":" hier-part [ "?" query ]
scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
URI-reference = URI / relative-ref
relative-ref = relative-part [ "?" query ] [ "#" fragment ]
relative-part = "//" authority path-abempty
/ path-absolute
/ path-noscheme
/ path-empty
hier-part = "//" authority path-abempty
/ path-absolute
/ path-rootless
/ path-empty
authority = [ userinfo "@" ] host [ ":" port ]
userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
host = IP-literal / IPv4address / reg-name
port = *DIGIT
IP-literal = "[" ( IPv6address / IPvFuture ) "]"
IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
IPv6address = 6( h16 ":" ) ls32
/ "::" 5( h16 ":" ) ls32
/ [ h16 ] "::" 4( h16 ":" ) ls32
/ [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
/ [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
/ [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
/ [ *4( h16 ":" ) h16 ] "::" ls32
/ [ *5( h16 ":" ) h16 ] "::" h16
/ [ *6( h16 ":" ) h16 ] "::"
h16 = 1*4HEXDIG
ls32 = ( h16 ":" h16 ) / IPv4address
IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
dec-octet = DIGIT ; 0-9
/ %x31-39 DIGIT ; 10-99
/ "1" 2DIGIT ; 100-199
/ "2" %x30-34 DIGIT ; 200-249
/ "25" %x30-35 ; 250-255
reg-name = *( unreserved / pct-encoded / sub-delims )
path = path-abempty ; begins with "/" or is empty
/ path-absolute ; begins with "/" but not "//"
/ path-noscheme ; begins with a non-colon segment
/ path-rootless ; begins with a segment
/ path-empty ; zero characters
path-abempty = *( "/" segment )
path-absolute = "/" [ segment-nz *( "/" segment ) ]
path-noscheme = segment-nz-nc *( "/" segment )
path-rootless = segment-nz *( "/" segment )
path-empty = 0<pchar>
segment = *pchar
segment-nz = 1*pchar
segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
; non-zero-length segment without any colon ":"
pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
query = *( pchar / "/" / "?" )
fragment = *( pchar / "/" / "?" )
pct-encoded = "%" HEXDIG HEXDIG
unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
reserved = gen-delims / sub-delims
gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
/ "*" / "+" / "," / ";" / "="
*/
#define URI_MAX_SCHEME_NAME_LEN 64
/* Character lookup table
*
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" [bit0]
* sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
* / "*" / "+" / "," / ";" / "=" [bit1]
* gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" [bit2]
* pchar = unreserved / sub-delims / ":" / "@" [bit0|bit1|bit3]
* 'pfchar' = unreserved / sub-delims / ":" / "@" / "/"
* [bit0|bit1|bit3|bit5]
* 'uchar' = unreserved / sub-delims / ":" [bit0|bit1|bit4]
* 'qchar' = pchar / "/" / "?" [bit0|bit1|bit3|bit5|bit6]
*
*/
#define CHAR_MASK_UNRESERVED (1<<0)
#define CHAR_MASK_SUB_DELIMS (1<<1)
#define CHAR_MASK_PCHAR ((1<<0)|(1<<1)|(1<<3))
#define CHAR_MASK_PFCHAR ((1<<0)|(1<<1)|(1<<3)|(1<<5))
#define CHAR_MASK_UCHAR ((1<<0)|(1<<1)|(1<<4))
#define CHAR_MASK_QCHAR ((1<<0)|(1<<1)|(1<<3)|(1<<5)|(1<<6))
static unsigned const char _uri_char_lookup[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 00
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 10
0, 2, 0, 4, 2, 0, 2, 2, 2, 2, 2, 2, 2, 1, 1, 36, // 20
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 28, 2, 0, 2, 0, 68, // 30
12, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 40
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 0, 4, 0, 1, // 50
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, // 70
};
static inline int _decode_hex_digit(const unsigned char digit)
{
switch (digit) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
return digit - '0';
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
return digit - 'a' + 0x0a;
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
return digit - 'A' + 0x0A;
}
return -1;
}
static int
uri_parse_pct_encoded_data(struct uri_parser *parser,
const unsigned char **p, const unsigned char *pend,
unsigned char *ch_r) ATTR_NULL(3)
{
int value;
if (**p != '%' || (pend != NULL && *p >= pend))
return 0;
*p += 1;
if (**p == 0 || *(*p+1) == 0 || (pend != NULL && *p+1 >= pend)) {
parser->error = "Unexpected URI boundary after '%'";
return -1;
}
if ((value = _decode_hex_digit(**p)) < 0) {
parser->error = p_strdup_printf(parser->pool,
"Expecting hex digit after '%%', but found '%c'", **p);
return -1;
}
*ch_r = (value & 0x0f) << 4;
*p += 1;
if ((value = _decode_hex_digit(**p)) < 0) {
parser->error = p_strdup_printf(parser->pool,
"Expecting hex digit after '%%%c', but found '%c'", *((*p)-1), **p);
return -1;
}
*ch_r |= (value & 0x0f);
*p += 1;
if (!parser->allow_pct_nul && *ch_r == '\0') {
parser->error =
"Percent encoding is not allowed to encode NUL character";
return -1;
}
return 1;
}
int uri_parse_pct_encoded(struct uri_parser *parser,
unsigned char *ch_r)
{
return uri_parse_pct_encoded_data
(parser, &parser->cur, parser->end, ch_r);
}
static int
uri_parse_unreserved_char(struct uri_parser *parser, unsigned char *ch_r)
{
if ((*parser->cur & 0x80) != 0)
return 0;
if ((_uri_char_lookup[*parser->cur] & CHAR_MASK_UNRESERVED) != 0) {
*ch_r = *parser->cur;
parser->cur++;
return 1;
}
return 0;
}
int uri_parse_unreserved(struct uri_parser *parser, string_t *part)
{
int len = 0;
while (parser->cur < parser->end) {
int ret;
unsigned char ch = 0;
if ((ret = uri_parse_unreserved_char(parser, &ch)) < 0)
return -1;
if (ret == 0)
break;
if (part != NULL)
str_append_c(part, ch);
len++;
}
return len > 0 ? 1 : 0;
}
int uri_parse_unreserved_pct(struct uri_parser *parser, string_t *part)
{
int len = 0;
while (parser->cur < parser->end) {
int ret;
unsigned char ch = 0;
if ((ret=uri_parse_pct_encoded(parser, &ch)) < 0)
return -1;
else if (ret == 0 &&
(ret=uri_parse_unreserved_char(parser, &ch)) < 0)
return -1;
if (ret == 0)
break;
if (part != NULL)
str_append_c(part, ch);
len++;
}
return len > 0 ? 1 : 0;
}
bool uri_data_decode(struct uri_parser *parser, const char *data,
const char *until, const char **decoded_r)
{
const unsigned char *p = (const unsigned char *)data;
const unsigned char *pend = (const unsigned char *)until;
string_t *decoded;
int ret;
if (pend == NULL) {
/* NULL means unlimited; solely rely on '\0' */
pend = (const unsigned char *)(size_t)-1;
}
if (p >= pend || *p == '\0') {
if (decoded_r != NULL)
*decoded_r = "";
return TRUE;
}
decoded = uri_parser_get_tmpbuf(parser, 256);
while (p < pend && *p != '\0') {
unsigned char ch;
if ((ret=uri_parse_pct_encoded_data
(parser, &p, NULL, &ch)) != 0) {
if (ret < 0)
return FALSE;
str_append_c(decoded, ch);
} else {
str_append_c(decoded, *p);
p++;
}
}
if (decoded_r != NULL)
*decoded_r = p_strdup(parser->pool, str_c(decoded));
return TRUE;
}
int uri_parse_scheme(struct uri_parser *parser, const char **scheme_r)
{
const unsigned char *first = parser->cur;
size_t len = 1;
/* RFC 3968:
* scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
*/
if (parser->cur >= parser->end || !i_isalpha(*parser->cur))
return 0;
parser->cur++;
while (len < URI_MAX_SCHEME_NAME_LEN &&
parser->cur < parser->end) {
if (!i_isalnum(*parser->cur) &&
*parser->cur != '+' && *parser->cur != '-' &&
*parser->cur != '.')
break;
parser->cur++;
len++;
}
if (parser->cur >= parser->end || *parser->cur != ':') {
parser->error = "Invalid URI scheme";
return -1;
}
if (scheme_r != NULL)
*scheme_r = t_strndup(first, parser->cur - first);
parser->cur++;
return 1;
}
int uri_cut_scheme(const char **uri_p, const char **scheme_r)
{
struct uri_parser parser;
uri_parser_init(&parser, NULL, *uri_p);
if (uri_parse_scheme(&parser, scheme_r) <= 0)
return -1;
*uri_p = (const char *)parser.cur;
return 0;
}
static int
uri_parse_dec_octet(struct uri_parser *parser, string_t *literal,
uint8_t *octet_r) ATTR_NULL(2)
{
unsigned int octet = 0;
int count = 0;
/* RFC 3986:
*
* dec-octet = DIGIT ; 0-9
* / %x31-39 DIGIT ; 10-99
* / "1" 2DIGIT ; 100-199
* / "2" %x30-34 DIGIT ; 200-249
* / "25" %x30-35 ; 250-255
*/
while (parser->cur < parser->end && i_isdigit(*parser->cur)) {
octet = octet * 10 + (parser->cur[0] - '0');
if (octet > 255)
return -1;
if (literal != NULL)
str_append_c(literal, *parser->cur);
parser->cur++;
count++;
}
if (count > 0) {
*octet_r = octet;
return 1;
}
return 0;
}
static int
uri_parse_ipv4address(struct uri_parser *parser, string_t *literal,
struct in_addr *ip4_r) ATTR_NULL(2,3)
{
uint8_t octet;
uint32_t ip = 0;
int ret;
int i;
/* RFC 3986:
*
* IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
*/
if ((ret = uri_parse_dec_octet(parser, literal, &octet)) <= 0)
return ret;
ip = octet;
for (i = 0; i < 3 && parser->cur < parser->end; i++) {
if (*parser->cur != '.')
return -1;
if (literal != NULL)
str_append_c(literal, '.');
parser->cur++;
if ((ret = uri_parse_dec_octet(parser, literal, &octet)) <= 0)
return -1;
ip = (ip << 8) + octet;
}
if (ip4_r != NULL)
ip4_r->s_addr = htonl(ip);
return 1;
}
static int
uri_do_parse_reg_name(struct uri_parser *parser,
string_t *reg_name) ATTR_NULL(2)
{
/* RFC 3986:
*
* reg-name = *( unreserved / pct-encoded / sub-delims )
*/
while (parser->cur < parser->end) {
int ret;
unsigned char c;
/* unreserved / pct-encoded */
if ((ret=uri_parse_pct_encoded(parser, &c)) < 0)
return -1;
else if (ret == 0 &&
(ret=uri_parse_unreserved_char(parser, &c)) < 0)
return -1;
if (ret > 0) {
if (reg_name != NULL)
str_append_c(reg_name, c);
continue;
}
/* sub-delims */
c = *parser->cur;
if ((c & 0x80) == 0 && (_uri_char_lookup[c] & CHAR_MASK_SUB_DELIMS) != 0) {
if (reg_name != NULL)
str_append_c(reg_name, *parser->cur);
parser->cur++;
continue;
}
break;
}
return 0;
}
int uri_parse_reg_name(struct uri_parser *parser,
const char **reg_name_r)
{
string_t *reg_name = NULL;
int ret;
if (reg_name_r != NULL)
reg_name = uri_parser_get_tmpbuf(parser, 256);
if ((ret=uri_do_parse_reg_name(parser, reg_name)) <= 0)
return ret;
if (reg_name_r != NULL)
*reg_name_r = str_c(reg_name);
return 1;
}
static int uri_do_parse_host_name(struct uri_parser *parser,
string_t *host_name) ATTR_NULL(2)
{
const unsigned char *first, *part;
int ret;
/* RFC 3986, Section 3.2.2:
A registered name intended for lookup in the DNS uses the syntax
defined in Section 3.5 of [RFC1034] and Section 2.1 of [RFC1123].
Such a name consists of a sequence of domain labels separated by ".",
each domain label starting and ending with an alphanumeric character
and possibly also containing "-" characters. The rightmost domain
label of a fully qualified domain name in DNS may be followed by a
single "." and should be if it is necessary to distinguish between
the complete domain name and some local domain.
RFC 2396, Section 3.2.2 (old URI specification):
hostname = *( domainlabel "." ) toplabel [ "." ]
domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
toplabel = alpha | alpha *( alphanum | "-" ) alphanum
The description in RFC 3986 is more liberal, so:
hostname = *( domainlabel "." ) domainlabel [ "." ]
domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
We also support percent encoding in spirit of the generic reg-name,
even though this should explicitly not be used according to the RFC.
It is, however, not strictly forbidden (unlike older RFC), so we
support it.
*/
first = part = parser->cur;
for (;;) {
const unsigned char *offset;
unsigned char ch, pch;
/* alphanum */
offset = parser->cur;
ch = pch = *parser->cur;
if (parser->cur >= parser->end)
break;
if ((ret=uri_parse_pct_encoded(parser, &ch)) < 0) {
return -1;
} else if (ret > 0) {
if (!i_isalnum(ch))
return -1;
if (host_name != NULL)
str_append_c(host_name, ch);
part = parser->cur;
} else {
if (!i_isalnum(*parser->cur))
break;
parser->cur++;
}
if (parser->cur < parser->end) {
/* *( alphanum | "-" ) alphanum */
do {
offset = parser->cur;
if ((ret=uri_parse_pct_encoded(parser, &ch)) < 0) {
return -1;
} else if (ret > 0) {
if (!i_isalnum(ch) && ch != '-')
break;
if (host_name != NULL) {
if (offset > part)
str_append_n(host_name, part, offset - part);
str_append_c(host_name, ch);
}
part = parser->cur;
} else {
ch = *parser->cur;
if (!i_isalnum(ch) && ch != '-')
break;
parser->cur++;
}
pch = ch;
} while (parser->cur < parser->end);
if (!i_isalnum(pch)) {
parser->error = "Invalid domain label in hostname";
return -1;
}
}
if (host_name != NULL && parser->cur > part)
str_append_n(host_name, part, parser->cur - part);
/* "." */
if (parser->cur >= parser->end || ch != '.')
break;
if (host_name != NULL)
str_append_c(host_name, '.');
if (parser->cur == offset)
parser->cur++;
part = parser->cur;
}
if (parser->cur == first)
return 0;
/* remove trailing '.' */
if (host_name != NULL) {
const char *name = str_c(host_name);
i_assert(str_len(host_name) > 0);
if (name[str_len(host_name)-1] == '.')
str_truncate(host_name, str_len(host_name)-1);
}
return 1;
}
int uri_parse_host_name(struct uri_parser *parser,
const char **host_name_r)
{
string_t *host_name = NULL;
int ret;
if (host_name_r != NULL)
host_name = uri_parser_get_tmpbuf(parser, 256);
if ((ret=uri_do_parse_host_name(parser, host_name)) <= 0)
return ret;
if (host_name_r != NULL)
*host_name_r = str_c(host_name);
return 1;
}
static int
uri_parse_ip_literal(struct uri_parser *parser, string_t *literal,
struct in6_addr *ip6_r) ATTR_NULL(2,3)
{
const unsigned char *p;
const char *address;
struct in6_addr ip6;
int ret;
/* IP-literal = "[" ( IPv6address / IPvFuture ) "]"
* IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
* IPv6address = ; Syntax not relevant: parsed using inet_pton()
*/
/* "[" already verified */
/* Scan for end of address */
for (p = parser->cur+1; p < parser->end; p++) {
if (*p == ']')
break;
}
if (p >= parser->end || *p != ']') {
parser->error = "Expecting ']' at end of IP-literal";
return -1;
}
if (literal != NULL)
str_append_n(literal, parser->cur, p-parser->cur+1);
address = t_strdup_until(parser->cur+1, p);
parser->cur = p + 1;
if (*address == '\0') {
parser->error = "Empty IPv6 host address";
return -1;
}
if (*address == 'v') {
parser->error = p_strdup_printf(parser->pool,
"Future IP host address '%s' not supported", address);
return -1;
}
if ((ret = inet_pton(AF_INET6, address, &ip6)) <= 0) {
parser->error = p_strdup_printf(parser->pool,
"Invalid IPv6 host address '%s'", address);
return -1;
}
if (ip6_r != NULL)
*ip6_r = ip6;
return 1;
}
static int
uri_do_parse_host(struct uri_parser *parser,
struct uri_host *host, bool host_name)
ATTR_NULL(2)
{
const unsigned char *preserve;
struct in_addr ip4;
struct in6_addr ip6;
string_t *literal = NULL;
int ret;
/* RFC 3986:
*
* host = IP-literal / IPv4address / reg-name
*/
if (host != NULL)
i_zero(host);
literal = uri_parser_get_tmpbuf(parser, 256);
/* IP-literal / */
if (parser->cur < parser->end && *parser->cur == '[') {
if ((ret=uri_parse_ip_literal(parser, literal, &ip6)) <= 0)
return -1;
if (host != NULL) {
host->name = p_strdup(parser->pool, str_c(literal));;
host->ip.family = AF_INET6;
host->ip.u.ip6 = ip6;
}
return 1;
}
/* IPv4address /
*
* If it fails to parse, we try to parse it as a reg-name
*/
preserve = parser->cur;
if ((ret = uri_parse_ipv4address(parser, literal, &ip4)) > 0) {
if (host != NULL) {
host->name = p_strdup(parser->pool, str_c(literal));
host->ip.family = AF_INET;
host->ip.u.ip4 = ip4;
}
return ret;
}
parser->cur = preserve;
str_truncate(literal, 0);
/* reg-name */
if (host_name) {
if (uri_do_parse_host_name(parser, literal) < 0)
return -1;
} else if (uri_do_parse_reg_name(parser, literal) < 0)
return -1;
if (host != NULL)
host->name = p_strdup(parser->pool, str_c(literal));
return 0;
}
int uri_parse_host(struct uri_parser *parser,
struct uri_host *host)
{
return uri_do_parse_host(parser, host, TRUE);
}
static int
uri_parse_port(struct uri_parser *parser,
struct uri_authority *auth) ATTR_NULL(2)
{
const unsigned char *first;
in_port_t port;
/* RFC 3986:
*
* port = *DIGIT
*/
first = parser->cur;
while (parser->cur < parser->end && i_isdigit(*parser->cur))
parser->cur++;
if (parser->cur == first)
return 0;
if (net_str2port(t_strdup_until(first, parser->cur), &port) < 0) {
parser->error = "Invalid port number";
return -1;
}
if (auth != NULL)
auth->port = port;
return 1;
}
static int
uri_do_parse_authority(struct uri_parser *parser,
struct uri_authority *auth, bool host_name) ATTR_NULL(2)
{
const unsigned char *p;
int ret;
/*
* authority = [ userinfo "@" ] host [ ":" port ]
*/
if (auth != NULL)
i_zero(auth);
/* Scan ahead to check whether there is a [userinfo "@"] uri component */
for (p = parser->cur; p < parser->end; p++){
/* refuse 8bit characters */
if ((*p & 0x80) != 0)
break;
/* break at first delimiter */
if (*p != '%' && (_uri_char_lookup[*p] & CHAR_MASK_UCHAR) == 0)
break;
}
/* Extract userinfo */
if (p < parser->end && *p == '@') {
if (auth != NULL)
auth->enc_userinfo = p_strdup_until(parser->pool, parser->cur, p);
parser->cur = p+1;
}
/* host */
if (uri_do_parse_host(parser,
(auth == NULL ? NULL : &auth->host), host_name) < 0)
return -1;
if (parser->cur == parser->end)
return 1;
switch (*parser->cur) {
case ':': case '/': case '?': case '#':
break;
default:
parser->error = "Invalid host identifier";
return -1;
}
/* [":" port] */
if (*parser->cur == ':') {
parser->cur++;
if ((ret = uri_parse_port(parser, auth)) < 0)
return ret;
if (parser->cur == parser->end)
return 1;
switch (*parser->cur) {
case '/': case '?': case '#':
break;
default:
parser->error = "Invalid host port";
return -1;
}
}
return 1;
}
static int
uri_do_parse_slashslash_authority(struct uri_parser *parser,
struct uri_authority *auth, bool host_name)
ATTR_NULL(2)
{
/* "//" authority */
if ((parser->end - parser->cur) <= 2 || parser->cur[0] != '/' ||
parser->cur[1] != '/')
return 0;
parser->cur += 2;
return uri_do_parse_authority(parser, auth, host_name);
}
int uri_parse_authority(struct uri_parser *parser,
struct uri_authority *auth)
{
return uri_do_parse_authority(parser, auth, FALSE);
}
int uri_parse_slashslash_authority(struct uri_parser *parser,
struct uri_authority *auth)
{
return uri_do_parse_slashslash_authority(parser, auth, FALSE);
}
int uri_parse_host_authority(struct uri_parser *parser,
struct uri_authority *auth)
{
return uri_do_parse_authority(parser, auth, TRUE);
}
int uri_parse_slashslash_host_authority(struct uri_parser *parser,
struct uri_authority *auth)
{
return uri_do_parse_slashslash_authority(parser, auth, TRUE);
}
int uri_parse_path_segment(struct uri_parser *parser, const char **segment_r)
{
const unsigned char *first = parser->cur;
int ret;
while (parser->cur < parser->end) {
if (*parser->cur == '%') {
unsigned char ch = 0;
if ((ret=uri_parse_pct_encoded(parser, &ch)) < 0)
return -1;
if (ret > 0)
continue;
}
if ((*parser->cur & 0x80) != 0 ||
(_uri_char_lookup[*parser->cur] & CHAR_MASK_PCHAR) == 0)
break;
parser->cur++;
}
if (parser->cur < parser->end &&
*parser->cur != '/' && *parser->cur != '?' && *parser->cur != '#' ) {
parser->error =
"Path component contains invalid character";
return -1;
}
if (first == parser->cur)
return 0;
if (segment_r != NULL)
*segment_r = p_strdup_until(parser->pool, first, parser->cur);
return 1;
}
int uri_parse_path(struct uri_parser *parser,
int *relative_r, const char *const **path_r)
{
const unsigned char *pbegin = parser->cur;
ARRAY_TYPE(const_string) segments;
const char *segment = NULL;
unsigned int count;
int relative = 1;
int ret;
count = 0;
if (path_r != NULL)
p_array_init(&segments, parser->pool, 16);
else
i_zero(&segments);
/* check for a leading '/' and indicate absolute path
when it is present
*/
if (parser->cur < parser->end && *parser->cur == '/') {
parser->cur++;
relative = 0;
}
/* parse first segment */
if ((ret = uri_parse_path_segment(parser, &segment)) < 0)
return -1;
for (;;) {
if (ret > 0) {
/* strip dot segments */
if (segment[0] == '.') {
if (segment[1] == '.') {
if (segment[2] == '\0') {
/* '..' -> skip and... */
segment = NULL;
/* ... pop last segment (if any) */
if (count > 0) {
if (path_r != NULL) {
i_assert(count == array_count(&segments));
array_delete(&segments, count-1, 1);
}
count--;
} else if ( relative > 0 ) {
relative++;
}
}
} else if (segment[1] == '\0') {
/* '.' -> skip */
segment = NULL;
}
}
} else {
segment = "";
}
if (segment != NULL) {
if (path_r != NULL)
array_append(&segments, &segment, 1);
count++;
}
if (parser->cur >= parser->end || *parser->cur != '/')
break;
parser->cur++;
/* parse next path segment */
if ((ret = uri_parse_path_segment(parser, &segment)) < 0)
return -1;
}
if (relative_r != NULL)
*relative_r = relative;
if (path_r != NULL)
*path_r = NULL;
if (parser->cur == pbegin) {
/* path part of URI is empty */
return 0;
}
if (path_r != NULL) {
/* special treatment for a trailing '..' or '.' */
if (segment == NULL) {
segment = "";
array_append(&segments, &segment, 1);
}
array_append_zero(&segments);
*path_r = array_get(&segments, &count);
}
if (parser->cur < parser->end &&
*parser->cur != '?' && *parser->cur != '#') {
parser->error = "Path component contains invalid character";
return -1;
}
return 1;
}
int uri_parse_query(struct uri_parser *parser, const char **query_r)
{
const unsigned char *first = parser->cur;
int ret;
/* RFC 3986:
*
* URI = { ... } [ "?" query ] { ... }
* query = *( pchar / "/" / "?" )
* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
*/
if (parser->cur >= parser->end || *parser->cur != '?')
return 0;
parser->cur++;
while (parser->cur < parser->end) {
if (*parser->cur == '%') {
unsigned char ch = 0;
if ((ret=uri_parse_pct_encoded(parser, &ch)) < 0)
return -1;
if (ret > 0)
continue;
}
if ((*parser->cur & 0x80) != 0 ||
(_uri_char_lookup[*parser->cur] & CHAR_MASK_QCHAR) == 0)
break;
parser->cur++;
}
if (parser->cur < parser->end && *parser->cur != '#') {
parser->error = "Query component contains invalid character";
return -1;
}
if (query_r != NULL)
*query_r = p_strdup_until(parser->pool, first+1, parser->cur);
return 1;
}
int uri_parse_fragment(struct uri_parser *parser, const char **fragment_r)
{
const unsigned char *first = parser->cur;
int ret;
/* RFC 3986:
*
* URI = { ... } [ "#" fragment ]
* fragment = *( pchar / "/" / "?" )
* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
*/
if (parser->cur >= parser->end || *parser->cur != '#')
return 0;
parser->cur++;
while (parser->cur < parser->end) {
if (*parser->cur == '%') {
unsigned char ch = 0;
if ((ret=uri_parse_pct_encoded(parser, &ch)) < 0)
return -1;
if (ret > 0)
continue;
}
if ((*parser->cur & 0x80) != 0 ||
(_uri_char_lookup[*parser->cur] & CHAR_MASK_QCHAR) == 0)
break;
parser->cur++;
}
if (parser->cur < parser->end) {
parser->error = "Fragment component contains invalid character";
return -1;
}
if (fragment_r != NULL)
*fragment_r = p_strdup_until(parser->pool, first+1, parser->cur);
return 1;
}
void uri_parser_init_data(struct uri_parser *parser,
pool_t pool, const unsigned char *data, size_t size)
{
i_zero(parser);
parser->pool = pool;
parser->begin = parser->cur = data;
parser->end = data + size;
}
void uri_parser_init(struct uri_parser *parser,
pool_t pool, const char *uri)
{
uri_parser_init_data
(parser, pool, (const unsigned char *)uri, strlen(uri));
}
string_t *uri_parser_get_tmpbuf(struct uri_parser *parser, size_t size)
{
if (parser->tmpbuf == NULL)
parser->tmpbuf = str_new(parser->pool, size);
else
str_truncate(parser->tmpbuf, 0);
return parser->tmpbuf;
}
int uri_parse_absolute_generic(struct uri_parser *parser,
enum uri_parse_flags flags)
{
int relative, aret, ret = 0;
/*
URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
hier-part = "//" authority path-abempty
/ path-absolute
/ path-rootless
/ path-empty
path-abempty = *( "/" segment )
path-absolute = "/" [ segment-nz *( "/" segment ) ]
path-rootless = segment-nz *( "/" segment )
path-empty = 0<pchar>
segment = *pchar
segment-nz = 1*pchar
*/
/* scheme ":" */
if ((flags & URI_PARSE_SCHEME_EXTERNAL) == 0 &&
(ret=uri_parse_scheme(parser, NULL)) <= 0) {
if (ret == 0)
parser->error = "Missing scheme";
return -1;
}
/* "//" authority */
if ((aret=uri_parse_slashslash_authority
(parser, NULL)) < 0)
return -1;
/* path-absolute / path-rootless / path-empty */
if (aret == 0) {
ret = uri_parse_path(parser, &relative, NULL);
/* path-abempty */
} else if (parser->cur < parser->end && *parser->cur == '/') {
ret = uri_parse_path(parser, &relative, NULL);
i_assert(ret <= 0 || relative == 0);
}
if (ret < 0)
return -1;
/* [ "?" query ] */
if (uri_parse_query(parser, NULL) < 0)
return -1;
/* [ "#" fragment ] */
if ((ret=uri_parse_fragment(parser, NULL)) < 0)
return ret;
if (ret > 0 && (flags & URI_PARSE_ALLOW_FRAGMENT_PART) == 0) {
parser->error = "Fragment part not allowed";
return -1;
}
i_assert(parser->cur == parser->end);
return 0;
}
/*
* Generic URI manipulation
*/
void uri_host_copy(pool_t pool, struct uri_host *dest,
const struct uri_host *src)
{
const char *host_name = src->name;
/* create host name literal if caller is lazy */
if (host_name == NULL && src->ip.family != 0) {
host_name = net_ip2addr(&src->ip);
i_assert(*host_name != '\0');
}
*dest = *src;
dest->name = p_strdup(pool, host_name);
}
/*
* Check generic URI
*/
int uri_check_data(const unsigned char *data, size_t size,
enum uri_parse_flags flags, const char **error_r)
{
struct uri_parser parser;
int ret;
i_zero(&parser);
parser.pool = pool_datastack_create();
parser.begin = parser.cur = data;
parser.end = data + size;
ret = uri_parse_absolute_generic(&parser, flags);
*error_r = parser.error;
return ret;
}
int uri_check(const char *uri, enum uri_parse_flags flags,
const char **error_r)
{
return uri_check_data
((unsigned char *)uri, strlen(uri), flags, error_r);
}
/*
* Generic URI construction
*/
void uri_data_encode(string_t *out,
const unsigned char esc_table[256],
unsigned char esc_mask, const char *esc_extra,
const char *data)
{
const unsigned char *pbegin, *p;
pbegin = p = (const unsigned char *)data;
while (*p != '\0') {
if ((*p & 0x80) != 0 || (esc_table[*p] & esc_mask) == 0 ||
(esc_extra != NULL && strchr(esc_extra, (char)*p) != NULL)) {
if ((p - pbegin) > 0)
str_append_n(out, pbegin, p - pbegin);
str_printfa(out, "%%%02x", *p);
p++;
pbegin = p;
} else {
p++;
}
}
if ((p - pbegin) > 0)
str_append_n(out, pbegin, p - pbegin);
}
void uri_append_scheme(string_t *out, const char *scheme)
{
str_append(out, scheme);
str_append_c(out, ':');
}
void uri_append_user_data(string_t *out, const char *esc,
const char *data)
{
uri_data_encode(out, _uri_char_lookup, CHAR_MASK_UCHAR, esc, data);
}
void uri_append_userinfo(string_t *out, const char *userinfo)
{
uri_append_user_data(out, NULL, userinfo);
str_append_c(out, '@');
}
void uri_append_host_name(string_t *out, const char *name)
{
uri_data_encode(out, _uri_char_lookup,
CHAR_MASK_UNRESERVED | CHAR_MASK_SUB_DELIMS, NULL, name);
}
void uri_append_host_ip(string_t *out, const struct ip_addr *host_ip)
{
const char *addr = net_ip2addr(host_ip);
i_assert(host_ip->family != 0);
if (host_ip->family == AF_INET) {
str_append(out, addr);
return;
}
i_assert(host_ip->family == AF_INET6);
str_append_c(out, '[');
str_append(out, addr);
str_append_c(out, ']');
}
void uri_append_host(string_t *out, const struct uri_host *host)
{
if (host->name != NULL) {
/* assume IPv6 literal if starts with '['; avoid encoding */
if (*host->name == '[')
str_append(out, host->name);
else
uri_append_host_name(out, host->name);
} else
uri_append_host_ip(out, &host->ip);
}
void uri_append_port(string_t *out, in_port_t port)
{
if (port != 0)
str_printfa(out, ":%u", port);
}
void uri_append_path_segment_data(string_t *out, const char *esc,
const char *data)
{
uri_data_encode(out, _uri_char_lookup, CHAR_MASK_PCHAR, esc, data);
}
void uri_append_path_segment(string_t *out, const char *segment)
{
str_append_c(out, '/');
if (*segment != '\0')
uri_append_path_data(out, NULL, segment);
}
void uri_append_path_data(string_t *out, const char *esc,
const char *data)
{
uri_data_encode(out, _uri_char_lookup, CHAR_MASK_PFCHAR, esc, data);
}
void uri_append_path(string_t *out, const char *path)
{
str_append_c(out, '/');
if (*path != '\0')
uri_append_path_data(out, NULL, path);
}
void uri_append_query_data(string_t *out, const char *esc,
const char *data)
{
uri_data_encode(out, _uri_char_lookup, CHAR_MASK_QCHAR, esc, data);
}
void uri_append_query(string_t *out, const char *query)
{
str_append_c(out, '?');
if (*query != '\0')
uri_append_query_data(out, NULL, query);
}
void uri_append_fragment_data(string_t *out, const char *esc,
const char *data)
{
uri_data_encode(out, _uri_char_lookup, CHAR_MASK_QCHAR, esc, data);
}
void uri_append_fragment(string_t *out, const char *fragment)
{
str_append_c(out, '#');
if (*fragment != '\0')
uri_append_fragment_data(out, NULL, fragment);
}