uri-util.c revision 05262e3132642bbdc4a8087c17b0903cf2ff22d2
02c335c23bf5fa225a467c19f2c063fb0dc7b8c3Timo Sirainen/* Copyright (c) 2010-2012 Dovecot authors, see the included COPYING file */
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainen * Generic URI parsing.
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainen * [URI-GEN] RFC3986 Appendix A:
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainen * host = IP-literal / IPv4address / reg-name
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainen * port = *DIGIT
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainen * reg-name = *( unreserved / pct-encoded / sub-delims )
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainen * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainen * pct-encoded = "%" HEXDIG HEXDIG
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainen * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainen * / "*" / "+" / "," / ";" / "="
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainen * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainen * IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainen * IPv6address = 6( h16 ":" ) ls32
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainen * / "::" 5( h16 ":" ) ls32
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainen * / [ h16 ] "::" 4( h16 ":" ) ls32
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainen * / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainen * / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainen * / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainen * / [ *4( h16 ":" ) h16 ] "::" ls32
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainen * / [ *5( h16 ":" ) h16 ] "::" h16
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainen * / [ *6( h16 ":" ) h16 ] "::"
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainen * h16 = 1*4HEXDIG
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainen * ls32 = ( h16 ":" h16 ) / IPv4address
84a5175b9768da401404635c9b606264585739bdTimo Sirainen * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
84a5175b9768da401404635c9b606264585739bdTimo Sirainen * dec-octet = DIGIT ; 0-9
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainen * / %x31-39 DIGIT ; 10-99
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainen * / "1" 2DIGIT ; 100-199
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainen * / "2" %x30-34 DIGIT ; 200-249
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen * / "25" %x30-35 ; 250-255
5f4e547bb810403e8cfb19a49d8fe34713507ffdTimo Sirainen/* Character lookup table
5f4e547bb810403e8cfb19a49d8fe34713507ffdTimo Sirainen * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" [bit0]
5f4e547bb810403e8cfb19a49d8fe34713507ffdTimo Sirainen * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
5f4e547bb810403e8cfb19a49d8fe34713507ffdTimo Sirainen * / "*" / "+" / "," / ";" / "=" [bit1]
5f4e547bb810403e8cfb19a49d8fe34713507ffdTimo Sirainen * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" [bit2]
d2cf6522779802d0edeab7dcf960ffea2f2e1828Timo Sirainen * pchar = unreserved / sub-delims / ":" / "@" [bit0|bit1|bit3]
d2cf6522779802d0edeab7dcf960ffea2f2e1828Timo Sirainen * 'pfchar' = unreserved / sub-delims / ":" / "@" / "/"
d2cf6522779802d0edeab7dcf960ffea2f2e1828Timo Sirainen * [bit0|bit1|bit3|bit5]
d2cf6522779802d0edeab7dcf960ffea2f2e1828Timo Sirainen * 'uchar' = unreserved / sub-delims / ":" [bit0|bit1|bit4]
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen * 'qchar' = pchar / "/" / "?" [bit0|bit1|bit3|bit5|bit6]
d2cf6522779802d0edeab7dcf960ffea2f2e1828Timo Sirainen#define CHAR_MASK_PFCHAR ((1<<0)|(1<<1)|(1<<3)|(1<<5))
d2cf6522779802d0edeab7dcf960ffea2f2e1828Timo Sirainen#define CHAR_MASK_QCHAR ((1<<0)|(1<<1)|(1<<3)|(1<<5)|(1<<6))
d2cf6522779802d0edeab7dcf960ffea2f2e1828Timo Sirainenstatic unsigned const char _uri_char_lookup[256] = {
d2cf6522779802d0edeab7dcf960ffea2f2e1828Timo Sirainen 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 00
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 10
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen 0, 2, 0, 4, 2, 0, 2, 2, 2, 2, 2, 2, 2, 1, 1, 36, // 20
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 28, 2, 0, 2, 0, 68, // 30
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen 12, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 40
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 0, 4, 0, 1, // 50
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, // 70
fcb5f4cd72b413a5356a8db55e679403c6a1adb5Timo Sirainenstatic inline int _decode_hex_digit(const unsigned char digit)
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen case '0': case '1': case '2': case '3': case '4':
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen case '5': case '6': case '7': case '8': case '9':
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainenuri_parse_pct_encoded(struct uri_parser *parser, const unsigned char **p,
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen const unsigned char *pend, unsigned char *ch_r)
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen if (**p == 0 || *(*p+1) == 0 || (pend != NULL && *p+1 >= pend)) {
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen parser->error = "Unexpected URI boundary after '%'";
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen "Expecting hex digit after '%%', but found '%c'", **p);
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen "Expecting hex digit after '%%%c', but found '%c'", *((*p)-1), **p);
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen "Percent encoding is not allowed to encode NUL character";
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainenuri_parse_unreserved_char(struct uri_parser *parser, unsigned char *ch_r)
84a5175b9768da401404635c9b606264585739bdTimo Sirainen if (uri_parse_pct_encoded(parser, &parser->cur,
f2686912e0156c04296d6dc306f39d61089a1363Timo Sirainen if ((_uri_char_lookup[*parser->cur] & CHAR_MASK_UNRESERVED) != 0) {
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainenint uri_parse_unreserved(struct uri_parser *parser, string_t *part)
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainen unsigned char ch = 0;
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainen if ((ret = uri_parse_unreserved_char(parser, &ch)) < 0)
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainenbool uri_data_decode(struct uri_parser *parser, const char *data,
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainen const unsigned char *p = (const unsigned char *)data;
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen const unsigned char *pend = (const unsigned char *)until;
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen /* NULL means unlimited; solely rely on '\0' */
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainen unsigned char ch;
afc77c5375cdb8f2bf0ab6280d9229ac27c933c6Timo Sirainen if (*p == '%') {
84a5175b9768da401404635c9b606264585739bdTimo Sirainen if (uri_parse_pct_encoded(parser, &p, NULL, &ch) <= 0)
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainenint uri_cut_scheme(const char **uri_p, const char **scheme_r)
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen const char *p = *uri_p;
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen * scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen while (len < URI_MAX_SCHEME_NAME_LEN && *p != '\0') {
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen if (!i_isalnum(*p) && *p != '+' && *p != '-' && *p != '.')
b13d76faf0c82162c29050382cd7f4a808294622Timo Sirainen if (*p != ':')
0d6ae58916bee3452c91d9d81be72227761ec33dTimo Sirainenint uri_parse_scheme(struct uri_parser *parser, const char **scheme_r)
60d1fdf2c17fd0c7020234590dbd73da81c3ce8fTimo Sirainen const char *p;
e130bb802c8bfb6c6cc44e5c8bc098b4fa5af789Timo Sirainenuri_parse_dec_octet(struct uri_parser *parser, string_t *literal,
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen * dec-octet = DIGIT ; 0-9
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen * / %x31-39 DIGIT ; 10-99
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen * / "1" 2DIGIT ; 100-199
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen * / "2" %x30-34 DIGIT ; 200-249
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen * / "25" %x30-35 ; 250-255
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen while (parser->cur < parser->end && i_isdigit(*parser->cur)) {
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainenuri_parse_ipv4address(struct uri_parser *parser, string_t *literal,
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
return ret;
int len = 0;
int ret;
if (ret > 0) {
len++;
len++;
#ifdef HAVE_IPV6
const char *address;
int ret;
const unsigned char *preserve;
int ret;
#ifdef HAVE_IPV6
return ret;
return ret;
int count = 0;
count++;
if (count > 0) {
int ret;
if (ret == 0) {
return ret;
return ret;
unsigned int count;
int ret;
relative = 0;
if (ret > 0) {
if (count > 0) {
} else if ( relative > 0 ) {
relative++;
const unsigned char *p = (const unsigned char *)data;
const char *data)
const char *data)
const char *data)
const char *data)
const char *data)