http-url.c revision 1e653c7406ec0b062c0bacfdf2e7568a3f860500
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen/* Copyright (c) 2013 Dovecot authors, see the included COPYING file */
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen#include "lib.h"
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen#include "str.h"
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen#include "strfuncs.h"
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen#include "net.h"
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen#include "uri-util.h"
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen
f1901fd21906911f7be075c965ac882f6a87b4c3Timo Sirainen#include "http-url.h"
63a61b7a739ae0f3f520215137d9c50f94d0f34fTimo Sirainen#include "http-request.h"
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen
18565c69efcd7db003dbf27cf625ed822e889fb1Timo Sirainen/*
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen * HTTP URL parser
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen */
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainenstruct http_url_parser {
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen struct uri_parser parser;
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen enum http_url_parse_flags flags;
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen struct http_url *url;
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen struct http_url *base;
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen enum http_request_target_format req_format;
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen unsigned int relative:1;
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen unsigned int request_target:1;
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen};
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainenstatic bool http_url_parse_authority(struct http_url_parser *url_parser)
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen{
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen struct uri_parser *parser = &url_parser->parser;
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen struct http_url *url = url_parser->url;
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen struct uri_authority auth;
af1f4b17a92ca7b2661737e65c7849df289d3070Timo Sirainen const char *user = NULL, *password = NULL;
af1f4b17a92ca7b2661737e65c7849df289d3070Timo Sirainen int ret;
b5e6f6f27c1461f0f9f202615eeb738a645188c3Timo Sirainen
f1901fd21906911f7be075c965ac882f6a87b4c3Timo Sirainen if ((ret = uri_parse_authority(parser, &auth)) < 0)
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen return FALSE;
6ef7e31619edfaa17ed044b45861d106a86191efTimo Sirainen if (ret > 0) {
01cbf4ac5d44137ab434791be7f838d98d0fcf3bTimo Sirainen if (auth.enc_userinfo != NULL) {
4b41116563110d00330896a568eff1078c382827Timo Sirainen const char *p;
4b41116563110d00330896a568eff1078c382827Timo Sirainen
992a13add4eea0810e4db0f042a595dddf85536aTimo Sirainen if ((url_parser->flags & HTTP_URL_ALLOW_USERINFO_PART) == 0) {
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen /* http://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-20
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen Section 2.8.1:
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen {...} Senders MUST NOT include a userinfo subcomponent (and its "@"
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen delimiter) when transmitting an "http" URI in a message. Recipients
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen of HTTP messages that contain a URI reference SHOULD parse for the
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen existence of userinfo and treat its presence as an error, likely
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen indicating that the deprecated subcomponent is being used to
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen obscure the authority for the sake of phishing attacks.
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen */
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen parser->error = "HTTP URL does not allow `userinfo@' part";
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen return FALSE;
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen }
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen p = strchr(auth.enc_userinfo, ':');
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen if (p == NULL) {
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen if (!uri_data_decode(parser, auth.enc_userinfo, NULL, &user))
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen return FALSE;
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen } else {
d30da25fb6be1f1c667d93767c9194000194b618Timo Sirainen if (!uri_data_decode(parser, auth.enc_userinfo, p, &user))
d30da25fb6be1f1c667d93767c9194000194b618Timo Sirainen return FALSE;
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen if (!uri_data_decode(parser, p+1, NULL, &password))
597dce34068d603fb759b4dff404b34049213e51Timo Sirainen return FALSE;
63a61b7a739ae0f3f520215137d9c50f94d0f34fTimo Sirainen }
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen }
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen }
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen if (url != NULL) {
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen url->host_name = p_strdup(parser->pool, auth.host_literal);
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen url->host_ip = auth.host_ip;
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen url->have_host_ip = auth.have_host_ip;
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen url->port = auth.port;
4b41116563110d00330896a568eff1078c382827Timo Sirainen url->have_port = auth.have_port;
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen url->user = p_strdup(parser->pool, user);
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen url->password = p_strdup(parser->pool, password);
f1901fd21906911f7be075c965ac882f6a87b4c3Timo Sirainen }
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen return TRUE;
f1901fd21906911f7be075c965ac882f6a87b4c3Timo Sirainen}
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen
6060b7c8edf8fce73470d0df6a2479b69b01c537Timo Sirainenstatic bool http_url_parse_authority_form(struct http_url_parser *url_parser)
7f773564b94e6054a40d3785cb63c29f1e4d4deeTimo Sirainen{
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen struct uri_parser *parser = &url_parser->parser;
ccc895c0358108d2304239063e940b7d75f364abTimo Sirainen
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen if (!http_url_parse_authority(url_parser))
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen return FALSE;
fe6c1556d3529a6376d4cbb3766c34aebde0de99Timo Sirainen if (parser->cur != parser->end)
4b41116563110d00330896a568eff1078c382827Timo Sirainen return FALSE;
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen url_parser->req_format = HTTP_REQUEST_TARGET_FORMAT_AUTHORITY;
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen return TRUE;
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen}
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainenstatic bool http_url_do_parse(struct http_url_parser *url_parser)
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen{
b5e6f6f27c1461f0f9f202615eeb738a645188c3Timo Sirainen struct uri_parser *parser = &url_parser->parser;
8000c86be02008b74acc71fa422444dc432e2c01Timo Sirainen struct http_url *url = url_parser->url, *base = url_parser->base;
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen const char *const *path;
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen bool relative = TRUE, have_scheme = FALSE, have_authority = FALSE,
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen have_path = FALSE;
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen int path_relative;
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen const char *part;
4b41116563110d00330896a568eff1078c382827Timo Sirainen int ret;
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen /*
b5e6f6f27c1461f0f9f202615eeb738a645188c3Timo Sirainen http://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-23
345212e8f61ebf14ff4f80df26df9e655eb5121eTimo Sirainen Appendix C:
345212e8f61ebf14ff4f80df26df9e655eb5121eTimo Sirainen
345212e8f61ebf14ff4f80df26df9e655eb5121eTimo Sirainen http-URI = "http://" authority path-abempty [ "?" query ]
345212e8f61ebf14ff4f80df26df9e655eb5121eTimo Sirainen [ "#" fragment ]
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen https-URI = "https://" authority path-abempty [ "?" query ]
345212e8f61ebf14ff4f80df26df9e655eb5121eTimo Sirainen [ "#" fragment ]
8000c86be02008b74acc71fa422444dc432e2c01Timo Sirainen partial-URI = relative-part [ "?" query ]
8000c86be02008b74acc71fa422444dc432e2c01Timo Sirainen
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen request-target = origin-form / absolute-form / authority-form /
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen asterisk-form
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen origin-form = absolute-path [ "?" query ]
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen absolute-form = absolute-URI
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen authority-form = authority
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen asterisk-form = "*"
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen ; Not parsed here
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen absolute-path = 1*( "/" segment )
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen http://tools.ietf.org/html/rfc3986
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen Appendix A: (implemented in uri-util.h)
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen absolute-URI = scheme ":" hier-part [ "?" query ]
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen hier-part = "//" authority path-abempty
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen / path-absolute
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen / path-rootless
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen / path-empty
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen relative-part = "//" authority path-abempty
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen / path-absolute
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen / path-noscheme
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen / path-empty
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen
b5e6f6f27c1461f0f9f202615eeb738a645188c3Timo Sirainen authority = [ userinfo "@" ] host [ ":" port ]
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen path-abempty = *( "/" segment )
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen path-absolute = "/" [ segment-nz *( "/" segment ) ]
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen path-noscheme = segment-nz-nc *( "/" segment )
4b41116563110d00330896a568eff1078c382827Timo Sirainen path-rootless = segment-nz *( "/" segment )
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen path-empty = 0<pchar>
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen
b5e6f6f27c1461f0f9f202615eeb738a645188c3Timo Sirainen segment = *pchar
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen segment-nz = 1*pchar
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen ; non-zero-length segment without any colon ":"
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen query = *( pchar / "/" / "?" )
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen fragment = *( pchar / "/" / "?" )
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen */
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen /* "http:" / "https:" */
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen if ((url_parser->flags & HTTP_URL_PARSE_SCHEME_EXTERNAL) == 0) {
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen const char *scheme;
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen if ((ret = uri_parse_scheme(parser, &scheme)) < 0)
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen return FALSE;
a0b0d629931773c17a236f6214adbe0e13b9b3fdTimo Sirainen else if (ret > 0) {
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen if (strcasecmp(scheme, "https") == 0) {
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen if (url != NULL)
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen url->have_ssl = TRUE;
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen } else if (strcasecmp(scheme, "http") != 0) {
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen if (url_parser->request_target) {
d67fde1a8ebc1d85704c5986d8f93aae97eccef3Timo Sirainen /* valid as non-HTTP scheme, but also try to parse as authority */
d67fde1a8ebc1d85704c5986d8f93aae97eccef3Timo Sirainen parser->cur = parser->begin;
d67fde1a8ebc1d85704c5986d8f93aae97eccef3Timo Sirainen if (!http_url_parse_authority_form(url_parser)) {
d67fde1a8ebc1d85704c5986d8f93aae97eccef3Timo Sirainen url_parser->url = NULL; /* indicate non-http-url */
b5e6f6f27c1461f0f9f202615eeb738a645188c3Timo Sirainen url_parser->req_format = HTTP_REQUEST_TARGET_FORMAT_ABSOLUTE;
d67fde1a8ebc1d85704c5986d8f93aae97eccef3Timo Sirainen }
d67fde1a8ebc1d85704c5986d8f93aae97eccef3Timo Sirainen return TRUE;
d67fde1a8ebc1d85704c5986d8f93aae97eccef3Timo Sirainen }
4b41116563110d00330896a568eff1078c382827Timo Sirainen parser->error = "Not an HTTP URL";
a1808be0774cbcb28fec45341aabf803ec44bae5Timo Sirainen return FALSE;
a1808be0774cbcb28fec45341aabf803ec44bae5Timo Sirainen }
d67fde1a8ebc1d85704c5986d8f93aae97eccef3Timo Sirainen relative = FALSE;
b5e6f6f27c1461f0f9f202615eeb738a645188c3Timo Sirainen have_scheme = TRUE;
d67fde1a8ebc1d85704c5986d8f93aae97eccef3Timo Sirainen }
d67fde1a8ebc1d85704c5986d8f93aae97eccef3Timo Sirainen } else {
c74ea62a27878910e3ca1614ca055d7e2b3b00d5Timo Sirainen relative = FALSE;
c74ea62a27878910e3ca1614ca055d7e2b3b00d5Timo Sirainen have_scheme = TRUE;
d67fde1a8ebc1d85704c5986d8f93aae97eccef3Timo Sirainen }
d67fde1a8ebc1d85704c5986d8f93aae97eccef3Timo Sirainen
d67fde1a8ebc1d85704c5986d8f93aae97eccef3Timo Sirainen /* "//" authority ; or
d67fde1a8ebc1d85704c5986d8f93aae97eccef3Timo Sirainen * ["//"] authority ; when parsing a request target
d67fde1a8ebc1d85704c5986d8f93aae97eccef3Timo Sirainen */
d67fde1a8ebc1d85704c5986d8f93aae97eccef3Timo Sirainen if (parser->cur < parser->end && parser->cur[0] == '/') {
d67fde1a8ebc1d85704c5986d8f93aae97eccef3Timo Sirainen if (parser->cur+1 < parser->end && parser->cur[1] == '/') {
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen parser->cur += 2;
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen relative = FALSE;
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen have_authority = TRUE;
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen } else {
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen /* start of absolute-path */
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen }
b5e6f6f27c1461f0f9f202615eeb738a645188c3Timo Sirainen } else if (url_parser->request_target && !have_scheme) {
b2ecd50bb98c44816cb07c17aa17fae2b425f941Timo Sirainen if (!http_url_parse_authority_form(url_parser)) {
db7c9201c88e3d9bee10485194ee5b0c67249916Timo Sirainen /* not non-HTTP scheme and invalid as authority-form */
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen parser->error = "Request target is invalid";
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen return FALSE;
4b41116563110d00330896a568eff1078c382827Timo Sirainen }
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen return TRUE;
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen }
b5e6f6f27c1461f0f9f202615eeb738a645188c3Timo Sirainen
db7c9201c88e3d9bee10485194ee5b0c67249916Timo Sirainen if (have_scheme && !have_authority) {
18565c69efcd7db003dbf27cf625ed822e889fb1Timo Sirainen parser->error = "Absolute HTTP URL requires `//' after `http:'";
18565c69efcd7db003dbf27cf625ed822e889fb1Timo Sirainen return FALSE;
18565c69efcd7db003dbf27cf625ed822e889fb1Timo Sirainen }
185ed0142fbbfb86e7a98519e7c6f11ec00723cdTimo Sirainen
59151b71059df1190acd75d8717ed04a7920c862Timo Sirainen if (have_authority) {
59151b71059df1190acd75d8717ed04a7920c862Timo Sirainen if (!http_url_parse_authority(url_parser))
59151b71059df1190acd75d8717ed04a7920c862Timo Sirainen return FALSE;
d5cebe7f98e63d4e2822863ef2faa4971e8b3a5dTimo Sirainen }
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen /* path-abempty / path-absolute / path-noscheme / path-empty */
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen if ((ret = uri_parse_path(parser, &path_relative, &path)) < 0)
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen return FALSE;
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen
fdc557286bc9f92c5f3bb49096ff6e2bcec0ea79Timo Sirainen /* Relative URLs are only valid when we have a base URL */
fdc557286bc9f92c5f3bb49096ff6e2bcec0ea79Timo Sirainen if (relative) {
fdc557286bc9f92c5f3bb49096ff6e2bcec0ea79Timo Sirainen if (base == NULL) {
a12399903f415a7e14c2816cffa2f7a09dcbb097Timo Sirainen parser->error = "Relative HTTP URL not allowed";
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen return FALSE;
16c89b1260c9d07c01c83a9219424d3727069b2eTimo Sirainen } else if (!have_authority && url != NULL) {
16c89b1260c9d07c01c83a9219424d3727069b2eTimo Sirainen url->host_name = p_strdup_empty(parser->pool, base->host_name);
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen url->host_ip = base->host_ip;
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen url->have_host_ip = base->have_host_ip;
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen url->port = base->port;
90adcaa0a00eba29b7fbd50ca66be11c8d086d6aTimo Sirainen url->have_port = base->have_port;
d67fde1a8ebc1d85704c5986d8f93aae97eccef3Timo Sirainen url->have_ssl = base->have_ssl;
d67fde1a8ebc1d85704c5986d8f93aae97eccef3Timo Sirainen url->user = p_strdup_empty(parser->pool, base->user);
87460b08cb97b31cde640d4975a6aa2c1d0e7226Timo Sirainen url->password = p_strdup_empty(parser->pool, base->password);
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen }
87460b08cb97b31cde640d4975a6aa2c1d0e7226Timo Sirainen
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen url_parser->relative = TRUE;
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen }
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen
16c89b1260c9d07c01c83a9219424d3727069b2eTimo Sirainen /* Resolve path */
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen if (ret > 0) {
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen string_t *fullpath = NULL;
have_path = TRUE;
if (url != NULL)
fullpath = t_str_new(256);
if (relative && path_relative > 0 && base->path != NULL) {
const char *pbegin = base->path;
const char *pend = base->path + strlen(base->path);
const char *p = pend - 1;
i_assert(*pbegin == '/');
/* discard trailing segments of base path based on how many effective
leading '..' segments were found in the relative path.
*/
while (path_relative > 0 && p > pbegin) {
while (p > pbegin && *p != '/') p--;
if (p >= pbegin) {
pend = p;
path_relative--;
}
if (p > pbegin) p--;
}
if (url != NULL && pend > pbegin)
str_append_n(fullpath, pbegin, pend-pbegin);
}
/* append relative path */
while (*path != NULL) {
if (!uri_data_decode(parser, *path, NULL, &part))
return FALSE;
if (url != NULL) {
str_append_c(fullpath, '/');
str_append(fullpath, part);
}
path++;
}
if (url != NULL)
url->path = p_strdup(parser->pool, str_c(fullpath));
} else if (relative && url != NULL) {
url->path = p_strdup(parser->pool, base->path);
}
/* [ "?" query ] */
if ((ret = uri_parse_query(parser, &part)) < 0)
return FALSE;
if (ret > 0) {
if (!uri_data_decode(parser, part, NULL, NULL)) // check only
return FALSE;
if (url != NULL)
url->enc_query = p_strdup(parser->pool, part);
} else if (relative && !have_path && url != NULL) {
url->enc_query = p_strdup(parser->pool, base->enc_query);
}
/* [ "#" fragment ] */
if ((ret = uri_parse_fragment(parser, &part)) < 0)
return FALSE;
if (ret > 0) {
if ((url_parser->flags & HTTP_URL_ALLOW_FRAGMENT_PART) == 0) {
parser->error = "URL fragment not allowed for HTTP URL in this context";
return FALSE;
}
if (!uri_data_decode(parser, part, NULL, NULL)) // check only
return FALSE;
if (url != NULL)
url->enc_fragment = p_strdup(parser->pool, part);
} else if (relative && !have_path && url != NULL) {
url->enc_fragment = p_strdup(parser->pool, base->enc_fragment);
}
if (parser->cur != parser->end) {
parser->error = "HTTP URL contains invalid character";
return FALSE;
}
if (have_scheme)
url_parser->req_format = HTTP_REQUEST_TARGET_FORMAT_ABSOLUTE;
return TRUE;
}
/* Public API */
int http_url_parse(const char *url, struct http_url *base,
enum http_url_parse_flags flags, pool_t pool,
struct http_url **url_r, const char **error_r)
{
struct http_url_parser url_parser;
/* base != NULL indicates whether relative URLs are allowed. However, certain
flags may also dictate whether relative URLs are allowed/required. */
i_assert((flags & HTTP_URL_PARSE_SCHEME_EXTERNAL) == 0 || base == NULL);
memset(&url_parser, '\0', sizeof(url_parser));
uri_parser_init(&url_parser.parser, pool, url);
url_parser.url = p_new(pool, struct http_url, 1);
url_parser.base = base;
url_parser.flags = flags;
if (!http_url_do_parse(&url_parser)) {
*error_r = url_parser.parser.error;
return -1;
}
*url_r = url_parser.url;
return 0;
}
int http_url_request_target_parse(const char *request_target,
const char *host_header, pool_t pool, struct http_request_target *target,
const char **error_r)
{
struct http_url_parser url_parser;
struct uri_parser *parser;
struct uri_authority host;
struct http_url base;
memset(&url_parser, '\0', sizeof(url_parser));
parser = &url_parser.parser;
uri_parser_init(parser, pool, host_header);
if (uri_parse_authority(parser, &host) <= 0) {
parser->error = t_strdup_printf("Invalid Host header: %s", parser->error);
return -1;
}
if (parser->cur != parser->end || host.enc_userinfo != NULL) {
parser->error = "Invalid Host header: Contains invalid character";
return -1;
}
if (request_target[0] == '*' && request_target[1] == '\0') {
struct http_url *url = p_new(pool, struct http_url, 1);
url->host_name = p_strdup(pool, host.host_literal);
url->host_ip = host.host_ip;
url->port = host.port;
url->have_host_ip = host.have_host_ip;
url->have_port = host.have_port;
target->url = url;
target->format = HTTP_REQUEST_TARGET_FORMAT_ASTERISK;
return 0;
}
memset(&base, 0, sizeof(base));
base.host_name = host.host_literal;
base.host_ip = host.host_ip;
base.port = host.port;
base.have_host_ip = host.have_host_ip;
base.have_port = host.have_port;
memset(parser, '\0', sizeof(*parser));
uri_parser_init(parser, pool, request_target);
url_parser.url = p_new(pool, struct http_url, 1);
url_parser.request_target = TRUE;
url_parser.req_format = HTTP_REQUEST_TARGET_FORMAT_ORIGIN;
url_parser.base = &base;
url_parser.flags = 0;
if (!http_url_do_parse(&url_parser)) {
*error_r = url_parser.parser.error;
return -1;
}
target->url = url_parser.url;
target->format = url_parser.req_format;
return 0;
}
/*
* HTTP URL construction
*/
static void http_url_add_target(string_t *urlstr, const struct http_url *url)
{
if (url->path == NULL || *url->path == '\0') {
/* Older syntax of RFC 2616 requires this slash at all times for an
absolute URL
*/
str_append_c(urlstr, '/');
} else {
uri_append_path_data(urlstr, "", url->path);
}
/* query (pre-encoded) */
if (url->enc_query != NULL) {
str_append_c(urlstr, '?');
str_append(urlstr, url->enc_query);
}
}
const char *http_url_create(const struct http_url *url)
{
string_t *urlstr = t_str_new(512);
/* scheme */
uri_append_scheme(urlstr, "http");
str_append(urlstr, "//");
/* host:port */
if (url->host_name != NULL) {
/* assume IPv6 literal if starts with '['; avoid encoding */
if (*url->host_name == '[')
str_append(urlstr, url->host_name);
else
uri_append_host_name(urlstr, url->host_name);
} else if (url->have_host_ip) {
uri_append_host_ip(urlstr, &url->host_ip);
} else
i_unreached();
if (url->have_port)
uri_append_port(urlstr, url->port);
http_url_add_target(urlstr, url);
/* fragment */
if (url->enc_fragment != NULL) {
str_append_c(urlstr, '#');
str_append(urlstr, url->enc_fragment);
}
return str_c(urlstr);
}
const char *http_url_create_target(const struct http_url *url)
{
string_t *urlstr = t_str_new(256);
http_url_add_target(urlstr, url);
return str_c(urlstr);
}
void http_url_escape_param(string_t *out, const char *data)
{
uri_append_query_data(out, "&;/?=+", data);
}