uri-util.c revision 3ec5378aadaa699c38b2e02be30aae1add36eb7c
2e37d45867d081db150ab78dad303b9077aea24fTimo Sirainen/* Copyright (c) 2010-2013 Dovecot authors, see the included COPYING file */
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen * Generic URI parsing.
9dd1c256910f1fb42823116a641e7edb3ad11970Timo Sirainen * [URI-GEN] RFC3986 Appendix A:
d477acb83e14a776ece4ca94dcd1869e75d0c6eeTimo Sirainen * host = IP-literal / IPv4address / reg-name
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen * port = *DIGIT
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen * reg-name = *( unreserved / pct-encoded / sub-delims )
daa7e7459749ae8f82cd3eed9c44522d81c609a3Timo Sirainen * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
bbadd5331f534017cf62d5183003b3d9fdad079eTimo Sirainen * pct-encoded = "%" HEXDIG HEXDIG
373492be949e159fda651807b3acda2c5c077027Timo Sirainen * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
889437fa2b6f44ffe0a8a7bcac94c00b71856767Timo Sirainen * / "*" / "+" / "," / ";" / "="
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen * IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen * IPv6address = 6( h16 ":" ) ls32
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen * / "::" 5( h16 ":" ) ls32
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen * / [ h16 ] "::" 4( h16 ":" ) ls32
373492be949e159fda651807b3acda2c5c077027Timo Sirainen * / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
bbadd5331f534017cf62d5183003b3d9fdad079eTimo Sirainen * / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
bbadd5331f534017cf62d5183003b3d9fdad079eTimo Sirainen * / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen * / [ *4( h16 ":" ) h16 ] "::" ls32
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen * / [ *5( h16 ":" ) h16 ] "::" h16
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen * / [ *6( h16 ":" ) h16 ] "::"
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen * h16 = 1*4HEXDIG
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen * ls32 = ( h16 ":" h16 ) / IPv4address
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen * dec-octet = DIGIT ; 0-9
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen * / %x31-39 DIGIT ; 10-99
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen * / "1" 2DIGIT ; 100-199
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen * / "2" %x30-34 DIGIT ; 200-249
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen * / "25" %x30-35 ; 250-255
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen/* Character lookup table
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" [bit0]
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen * / "*" / "+" / "," / ";" / "=" [bit1]
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" [bit2]
99430beb12dfbc6c9c160f08e2102aeab38a589cTimo Sirainen * pchar = unreserved / sub-delims / ":" / "@" [bit0|bit1|bit3]
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen * 'pfchar' = unreserved / sub-delims / ":" / "@" / "/"
93a7d1ee4b518b5c85f9721dc6539e4dab6aae00Timo Sirainen * [bit0|bit1|bit3|bit5]
f7f25f9e1a38678d0e97d2e609beac16285fac6bTimo Sirainen * 'uchar' = unreserved / sub-delims / ":" [bit0|bit1|bit4]
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen * 'qchar' = pchar / "/" / "?" [bit0|bit1|bit3|bit5|bit6]
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen#define CHAR_MASK_PFCHAR ((1<<0)|(1<<1)|(1<<3)|(1<<5))
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen#define CHAR_MASK_QCHAR ((1<<0)|(1<<1)|(1<<3)|(1<<5)|(1<<6))
635df5b4cbcd7b24c825e01d9dd66d3a4274c4c7Timo Sirainenstatic unsigned const char _uri_char_lookup[256] = {
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 00
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 10
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen 0, 2, 0, 4, 2, 0, 2, 2, 2, 2, 2, 2, 2, 1, 1, 36, // 20
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 28, 2, 0, 2, 0, 68, // 30
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen 12, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 40
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 0, 4, 0, 1, // 50
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, // 70
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainenstatic inline int _decode_hex_digit(const unsigned char digit)
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen case '0': case '1': case '2': case '3': case '4':
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen case '5': case '6': case '7': case '8': case '9':
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
f7f25f9e1a38678d0e97d2e609beac16285fac6bTimo Sirainenuri_parse_pct_encoded(struct uri_parser *parser, const unsigned char **p,
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen const unsigned char *pend, unsigned char *ch_r)
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen if (**p == 0 || *(*p+1) == 0 || (pend != NULL && *p+1 >= pend)) {
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen parser->error = "Unexpected URI boundary after '%'";
7744586e3e0fd60158abfbb03a233d3bd8d6c48bTimo Sirainen "Expecting hex digit after '%%', but found '%c'", **p);
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen "Expecting hex digit after '%%%c', but found '%c'", *((*p)-1), **p);
3cf67672fdc87583cb23ce088c95bb5dee60e74dTimo Sirainen "Percent encoding is not allowed to encode NUL character";
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainenuri_parse_unreserved_char(struct uri_parser *parser, unsigned char *ch_r)
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen if (uri_parse_pct_encoded(parser, &parser->cur,
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen if ((_uri_char_lookup[*parser->cur] & CHAR_MASK_UNRESERVED) != 0) {
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainenint uri_parse_unreserved(struct uri_parser *parser, string_t *part)
04052d7cacaa866a3f00afb4e104fa46c04c1dd7Timo Sirainen unsigned char ch = 0;
3cf67672fdc87583cb23ce088c95bb5dee60e74dTimo Sirainen if ((ret = uri_parse_unreserved_char(parser, &ch)) < 0)
57d2429fae575e96ca276355af675deb66b76d00Timo Sirainenbool uri_data_decode(struct uri_parser *parser, const char *data,
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen const unsigned char *p = (const unsigned char *)data;
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen const unsigned char *pend = (const unsigned char *)until;
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen /* NULL means unlimited; solely rely on '\0' */
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen unsigned char ch;
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen if (*p == '%') {
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen if (uri_parse_pct_encoded(parser, &p, NULL, &ch) <= 0)
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainenint uri_cut_scheme(const char **uri_p, const char **scheme_r)
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen const char *p = *uri_p;
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen * scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen while (len < URI_MAX_SCHEME_NAME_LEN && *p != '\0') {
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen if (!i_isalnum(*p) && *p != '+' && *p != '-' && *p != '.')
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen if (*p != ':')
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainenint uri_parse_scheme(struct uri_parser *parser, const char **scheme_r)
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen const char *p;
7744586e3e0fd60158abfbb03a233d3bd8d6c48bTimo Sirainenuri_parse_dec_octet(struct uri_parser *parser, string_t *literal,
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen * dec-octet = DIGIT ; 0-9
383d0e8c24451468d6bea17e4b55d74de744abe6Timo Sirainen * / %x31-39 DIGIT ; 10-99
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen * / "1" 2DIGIT ; 100-199
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen * / "2" %x30-34 DIGIT ; 200-249
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen * / "25" %x30-35 ; 250-255
4fc74bba3548987b7e8597491cd9fafc1f701be6Timo Sirainen while (parser->cur < parser->end && i_isdigit(*parser->cur)) {
7289c5600711b45f30fe289ab5b0293b51d87041Timo Sirainenuri_parse_ipv4address(struct uri_parser *parser, string_t *literal,
7289c5600711b45f30fe289ab5b0293b51d87041Timo Sirainen * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
d477acb83e14a776ece4ca94dcd1869e75d0c6eeTimo Sirainen if ((ret = uri_parse_dec_octet(parser, literal, &octet)) <= 0)
9dd1c256910f1fb42823116a641e7edb3ad11970Timo Sirainen for (i = 0; i < 3 && parser->cur < parser->end; i++) {
9dd1c256910f1fb42823116a641e7edb3ad11970Timo Sirainen if ((ret = uri_parse_dec_octet(parser, literal, &octet)) <= 0)
9dd1c256910f1fb42823116a641e7edb3ad11970Timo Sirainenstatic int uri_parse_reg_name(struct uri_parser *parser, string_t *reg_name)
d477acb83e14a776ece4ca94dcd1869e75d0c6eeTimo Sirainen * reg-name = *( unreserved / pct-encoded / sub-delims )
5d60e31c7b701b606067a20bc88dcc8a6de7bbd6Timo Sirainen unsigned char c;
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen /* unreserved / pct-encoded */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if ((ret = uri_parse_unreserved_char(parser, &c)) < 0)
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen /* sub-delims */
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen if ((c & 0x80) == 0 && (_uri_char_lookup[c] & CHAR_MASK_SUB_DELIMS) != 0) {
#ifdef HAVE_IPV6
const char *address;
int ret;
const unsigned char *preserve;
int ret;
#ifdef HAVE_IPV6
return ret;
return ret;
int count = 0;
count++;
if (count > 0) {
int ret;
if (ret == 0) {
return ret;
return ret;
unsigned int count;
int ret;
relative = 0;
if (ret > 0) {
if (count > 0) {
} else if ( relative > 0 ) {
relative++;
const unsigned char *p = (const unsigned char *)data;
const char *data)
const char *data)
const char *data)
const char *data)
const char *data)