http-url.c revision 1e653c7406ec0b062c0bacfdf2e7568a3f860500
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen/* Copyright (c) 2013 Dovecot authors, see the included COPYING file */
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen * HTTP URL parser
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainenstatic bool http_url_parse_authority(struct http_url_parser *url_parser)
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen struct uri_parser *parser = &url_parser->parser;
f1901fd21906911f7be075c965ac882f6a87b4c3Timo Sirainen if ((ret = uri_parse_authority(parser, &auth)) < 0)
4b41116563110d00330896a568eff1078c382827Timo Sirainen const char *p;
992a13add4eea0810e4db0f042a595dddf85536aTimo Sirainen if ((url_parser->flags & HTTP_URL_ALLOW_USERINFO_PART) == 0) {
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen /* http://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-20
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen Section 2.8.1:
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen {...} Senders MUST NOT include a userinfo subcomponent (and its "@"
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen delimiter) when transmitting an "http" URI in a message. Recipients
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen of HTTP messages that contain a URI reference SHOULD parse for the
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen existence of userinfo and treat its presence as an error, likely
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen indicating that the deprecated subcomponent is being used to
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen obscure the authority for the sake of phishing attacks.
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen parser->error = "HTTP URL does not allow `userinfo@' part";
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen if (!uri_data_decode(parser, auth.enc_userinfo, NULL, &user))
d30da25fb6be1f1c667d93767c9194000194b618Timo Sirainen if (!uri_data_decode(parser, auth.enc_userinfo, p, &user))
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen if (!uri_data_decode(parser, p+1, NULL, &password))
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen url->host_name = p_strdup(parser->pool, auth.host_literal);
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen url->password = p_strdup(parser->pool, password);
6060b7c8edf8fce73470d0df6a2479b69b01c537Timo Sirainenstatic bool http_url_parse_authority_form(struct http_url_parser *url_parser)
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen struct uri_parser *parser = &url_parser->parser;
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen url_parser->req_format = HTTP_REQUEST_TARGET_FORMAT_AUTHORITY;
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainenstatic bool http_url_do_parse(struct http_url_parser *url_parser)
b5e6f6f27c1461f0f9f202615eeb738a645188c3Timo Sirainen struct uri_parser *parser = &url_parser->parser;
8000c86be02008b74acc71fa422444dc432e2c01Timo Sirainen struct http_url *url = url_parser->url, *base = url_parser->base;
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen const char *const *path;
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen bool relative = TRUE, have_scheme = FALSE, have_authority = FALSE,
b5e6f6f27c1461f0f9f202615eeb738a645188c3Timo Sirainen http://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-23
345212e8f61ebf14ff4f80df26df9e655eb5121eTimo Sirainen http-URI = "http://" authority path-abempty [ "?" query ]
345212e8f61ebf14ff4f80df26df9e655eb5121eTimo Sirainen [ "#" fragment ]
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen https-URI = "https://" authority path-abempty [ "?" query ]
345212e8f61ebf14ff4f80df26df9e655eb5121eTimo Sirainen [ "#" fragment ]
8000c86be02008b74acc71fa422444dc432e2c01Timo Sirainen partial-URI = relative-part [ "?" query ]
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen request-target = origin-form / absolute-form / authority-form /
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen asterisk-form
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen origin-form = absolute-path [ "?" query ]
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen absolute-form = absolute-URI
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen authority-form = authority
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen asterisk-form = "*"
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen ; Not parsed here
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen absolute-path = 1*( "/" segment )
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen Appendix A: (implemented in uri-util.h)
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen absolute-URI = scheme ":" hier-part [ "?" query ]
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen hier-part = "//" authority path-abempty
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen / path-absolute
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen / path-rootless
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen relative-part = "//" authority path-abempty
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen / path-absolute
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen / path-noscheme
b5e6f6f27c1461f0f9f202615eeb738a645188c3Timo Sirainen authority = [ userinfo "@" ] host [ ":" port ]
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen path-abempty = *( "/" segment )
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen path-absolute = "/" [ segment-nz *( "/" segment ) ]
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen path-noscheme = segment-nz-nc *( "/" segment )
4b41116563110d00330896a568eff1078c382827Timo Sirainen path-rootless = segment-nz *( "/" segment )
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen path-empty = 0<pchar>
b5e6f6f27c1461f0f9f202615eeb738a645188c3Timo Sirainen segment = *pchar
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen segment-nz = 1*pchar
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen ; non-zero-length segment without any colon ":"
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen query = *( pchar / "/" / "?" )
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen fragment = *( pchar / "/" / "?" )
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen /* "http:" / "https:" */
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen if ((url_parser->flags & HTTP_URL_PARSE_SCHEME_EXTERNAL) == 0) {
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen if ((ret = uri_parse_scheme(parser, &scheme)) < 0)
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen else if (ret > 0) {
d67fde1a8ebc1d85704c5986d8f93aae97eccef3Timo Sirainen /* valid as non-HTTP scheme, but also try to parse as authority */
d67fde1a8ebc1d85704c5986d8f93aae97eccef3Timo Sirainen if (!http_url_parse_authority_form(url_parser)) {
d67fde1a8ebc1d85704c5986d8f93aae97eccef3Timo Sirainen url_parser->url = NULL; /* indicate non-http-url */
b5e6f6f27c1461f0f9f202615eeb738a645188c3Timo Sirainen url_parser->req_format = HTTP_REQUEST_TARGET_FORMAT_ABSOLUTE;
d67fde1a8ebc1d85704c5986d8f93aae97eccef3Timo Sirainen /* "//" authority ; or
d67fde1a8ebc1d85704c5986d8f93aae97eccef3Timo Sirainen * ["//"] authority ; when parsing a request target
d67fde1a8ebc1d85704c5986d8f93aae97eccef3Timo Sirainen if (parser->cur < parser->end && parser->cur[0] == '/') {
d67fde1a8ebc1d85704c5986d8f93aae97eccef3Timo Sirainen if (parser->cur+1 < parser->end && parser->cur[1] == '/') {
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen /* start of absolute-path */
b5e6f6f27c1461f0f9f202615eeb738a645188c3Timo Sirainen } else if (url_parser->request_target && !have_scheme) {
b2ecd50bb98c44816cb07c17aa17fae2b425f941Timo Sirainen if (!http_url_parse_authority_form(url_parser)) {
db7c9201c88e3d9bee10485194ee5b0c67249916Timo Sirainen /* not non-HTTP scheme and invalid as authority-form */
18565c69efcd7db003dbf27cf625ed822e889fb1Timo Sirainen parser->error = "Absolute HTTP URL requires `//' after `http:'";
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen /* path-abempty / path-absolute / path-noscheme / path-empty */
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen if ((ret = uri_parse_path(parser, &path_relative, &path)) < 0)
fdc557286bc9f92c5f3bb49096ff6e2bcec0ea79Timo Sirainen /* Relative URLs are only valid when we have a base URL */
a12399903f415a7e14c2816cffa2f7a09dcbb097Timo Sirainen parser->error = "Relative HTTP URL not allowed";
16c89b1260c9d07c01c83a9219424d3727069b2eTimo Sirainen url->host_name = p_strdup_empty(parser->pool, base->host_name);
d67fde1a8ebc1d85704c5986d8f93aae97eccef3Timo Sirainen url->user = p_strdup_empty(parser->pool, base->user);
87460b08cb97b31cde640d4975a6aa2c1d0e7226Timo Sirainen url->password = p_strdup_empty(parser->pool, base->password);
16c89b1260c9d07c01c83a9219424d3727069b2eTimo Sirainen /* Resolve path */
if (p >= pbegin) {
pend = p;
if (p > pbegin) p--;
return FALSE;
path++;
return FALSE;
if (ret > 0) {
return FALSE;
return FALSE;
if (ret > 0) {
return FALSE;
return FALSE;
return FALSE;
if (have_scheme)
return TRUE;
const char **error_r)
i_unreached();