uri-util.c revision 05262e3132642bbdc4a8087c17b0903cf2ff22d2
c25356d5978632df6203437e1953bcb29e0c736fTimo Sirainen/* Copyright (c) 2010-2012 Dovecot authors, see the included COPYING file */
c25356d5978632df6203437e1953bcb29e0c736fTimo Sirainen
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen#include "lib.h"
2767104d81e97a109f0aa9758792bfa1da325a97Timo Sirainen#include "array.h"
2767104d81e97a109f0aa9758792bfa1da325a97Timo Sirainen#include "str.h"
c0435c854a0e7246373b9752d163095cc4fbe985Timo Sirainen#include "net.h"
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen#include "uri-util.h"
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen
6cc0546c058f3e6253c6f99727b28dd602712974Timo Sirainen#include <ctype.h>
6cc0546c058f3e6253c6f99727b28dd602712974Timo Sirainen
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen/*
6cc0546c058f3e6253c6f99727b28dd602712974Timo Sirainen * Generic URI parsing.
0ce5f96804e81cb0f857e7df32c0272f1eed9377Timo Sirainen *
0ce5f96804e81cb0f857e7df32c0272f1eed9377Timo Sirainen * [URI-GEN] RFC3986 Appendix A:
0ce5f96804e81cb0f857e7df32c0272f1eed9377Timo Sirainen *
0ce5f96804e81cb0f857e7df32c0272f1eed9377Timo Sirainen * host = IP-literal / IPv4address / reg-name
8b2cf1c1bd8ddcea0525b62fd35ba76e136828a1Timo Sirainen * port = *DIGIT
8b2cf1c1bd8ddcea0525b62fd35ba76e136828a1Timo Sirainen * reg-name = *( unreserved / pct-encoded / sub-delims )
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen * pct-encoded = "%" HEXDIG HEXDIG
252db51b6c0a605163326b3ea5d09e9936ca3b29Timo Sirainen * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen * / "*" / "+" / "," / ";" / "="
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
211ed7806d8715ec2280ffbf5d10f0d6e4f1beb2Timo Sirainen * IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
211ed7806d8715ec2280ffbf5d10f0d6e4f1beb2Timo Sirainen * IPv6address = 6( h16 ":" ) ls32
211ed7806d8715ec2280ffbf5d10f0d6e4f1beb2Timo Sirainen * / "::" 5( h16 ":" ) ls32
211ed7806d8715ec2280ffbf5d10f0d6e4f1beb2Timo Sirainen * / [ h16 ] "::" 4( h16 ":" ) ls32
eb98a038ca8b0ef33d1d11794803ce09547496faTimo Sirainen * / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
211ed7806d8715ec2280ffbf5d10f0d6e4f1beb2Timo Sirainen * / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
9b7eeffb5752b500ac62ba1fd01c4a8c4ada14e9Timo Sirainen * / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
9b7eeffb5752b500ac62ba1fd01c4a8c4ada14e9Timo Sirainen * / [ *4( h16 ":" ) h16 ] "::" ls32
c0435c854a0e7246373b9752d163095cc4fbe985Timo Sirainen * / [ *5( h16 ":" ) h16 ] "::" h16
93fa87cf1a96c4f279ec4f5c311820313ba12c34Timo Sirainen * / [ *6( h16 ":" ) h16 ] "::"
43834f87bf431198f986e86052a4f6e558fdb07dTimo Sirainen * h16 = 1*4HEXDIG
43834f87bf431198f986e86052a4f6e558fdb07dTimo Sirainen * ls32 = ( h16 ":" h16 ) / IPv4address
93fa87cf1a96c4f279ec4f5c311820313ba12c34Timo Sirainen * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
93fa87cf1a96c4f279ec4f5c311820313ba12c34Timo Sirainen * dec-octet = DIGIT ; 0-9
93fa87cf1a96c4f279ec4f5c311820313ba12c34Timo Sirainen * / %x31-39 DIGIT ; 10-99
93fa87cf1a96c4f279ec4f5c311820313ba12c34Timo Sirainen * / "1" 2DIGIT ; 100-199
43834f87bf431198f986e86052a4f6e558fdb07dTimo Sirainen * / "2" %x30-34 DIGIT ; 200-249
b565a6a7a66fb9f224d00c06a950e3c1c585c18eTimo Sirainen * / "25" %x30-35 ; 250-255
b565a6a7a66fb9f224d00c06a950e3c1c585c18eTimo Sirainen */
0c1835a90dd1dcedaeaedd1cd91672299cbeb5beTimo Sirainen
0c1835a90dd1dcedaeaedd1cd91672299cbeb5beTimo Sirainen#define URI_MAX_SCHEME_NAME_LEN 64
f4735bf7ec2019fdc730e9ebdb39e5a4ea580405Timo Sirainen
f4735bf7ec2019fdc730e9ebdb39e5a4ea580405Timo Sirainen/* Character lookup table
f4735bf7ec2019fdc730e9ebdb39e5a4ea580405Timo Sirainen *
981139bb2e446bb2050c1158614725f8413fd709Timo Sirainen * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" [bit0]
981139bb2e446bb2050c1158614725f8413fd709Timo Sirainen * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
981139bb2e446bb2050c1158614725f8413fd709Timo Sirainen * / "*" / "+" / "," / ";" / "=" [bit1]
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" [bit2]
8cb72c59d5ea4e9e5f638d7ec840bb853f5a188eTimo Sirainen * pchar = unreserved / sub-delims / ":" / "@" [bit0|bit1|bit3]
8cb72c59d5ea4e9e5f638d7ec840bb853f5a188eTimo Sirainen * 'pfchar' = unreserved / sub-delims / ":" / "@" / "/"
8cb72c59d5ea4e9e5f638d7ec840bb853f5a188eTimo Sirainen * [bit0|bit1|bit3|bit5]
8cb72c59d5ea4e9e5f638d7ec840bb853f5a188eTimo Sirainen * 'uchar' = unreserved / sub-delims / ":" [bit0|bit1|bit4]
8cb72c59d5ea4e9e5f638d7ec840bb853f5a188eTimo Sirainen * 'qchar' = pchar / "/" / "?" [bit0|bit1|bit3|bit5|bit6]
8b247780e911909a9fdc47f69ce6d1478902ad98Timo Sirainen *
8b247780e911909a9fdc47f69ce6d1478902ad98Timo Sirainen */
f988b93c2ef773987bcdcbfb4cca39b955e3a392Timo Sirainen
862ec874f9373e3e499e237d3b9f71fdf1413feeTimo Sirainen#define CHAR_MASK_UNRESERVED (1<<0)
8b247780e911909a9fdc47f69ce6d1478902ad98Timo Sirainen#define CHAR_MASK_SUB_DELIMS (1<<1)
e2ce8d4a6ac5d82a906178148453e7613fab9ba0Timo Sirainen#define CHAR_MASK_PCHAR ((1<<0)|(1<<1)|(1<<3))
cd56a23e21f1df3f79648cf07e2f4385e2fadebbTimo Sirainen#define CHAR_MASK_PFCHAR ((1<<0)|(1<<1)|(1<<3)|(1<<5))
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen#define CHAR_MASK_UCHAR ((1<<0)|(1<<1)|(1<<4))
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen#define CHAR_MASK_QCHAR ((1<<0)|(1<<1)|(1<<3)|(1<<5)|(1<<6))
c0435c854a0e7246373b9752d163095cc4fbe985Timo Sirainen
d5cebe7f98e63d4e2822863ef2faa4971e8b3a5dTimo Sirainenstatic unsigned const char _uri_char_lookup[256] = {
d5cebe7f98e63d4e2822863ef2faa4971e8b3a5dTimo Sirainen 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 00
eb98a038ca8b0ef33d1d11794803ce09547496faTimo Sirainen 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 10
eb98a038ca8b0ef33d1d11794803ce09547496faTimo Sirainen 0, 2, 0, 4, 2, 0, 2, 2, 2, 2, 2, 2, 2, 1, 1, 36, // 20
eb98a038ca8b0ef33d1d11794803ce09547496faTimo Sirainen 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 28, 2, 0, 2, 0, 68, // 30
eb98a038ca8b0ef33d1d11794803ce09547496faTimo Sirainen 12, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 40
eb98a038ca8b0ef33d1d11794803ce09547496faTimo Sirainen 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 0, 4, 0, 1, // 50
eb98a038ca8b0ef33d1d11794803ce09547496faTimo Sirainen 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60
eb98a038ca8b0ef33d1d11794803ce09547496faTimo Sirainen 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, // 70
eb98a038ca8b0ef33d1d11794803ce09547496faTimo Sirainen};
eb98a038ca8b0ef33d1d11794803ce09547496faTimo Sirainen
eb98a038ca8b0ef33d1d11794803ce09547496faTimo Sirainenstatic inline int _decode_hex_digit(const unsigned char digit)
eb98a038ca8b0ef33d1d11794803ce09547496faTimo Sirainen{
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen switch (digit) {
e2ce8d4a6ac5d82a906178148453e7613fab9ba0Timo Sirainen case '0': case '1': case '2': case '3': case '4':
e2ce8d4a6ac5d82a906178148453e7613fab9ba0Timo Sirainen case '5': case '6': case '7': case '8': case '9':
c0435c854a0e7246373b9752d163095cc4fbe985Timo Sirainen return digit - '0';
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen
211ed7806d8715ec2280ffbf5d10f0d6e4f1beb2Timo Sirainen case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
211ed7806d8715ec2280ffbf5d10f0d6e4f1beb2Timo Sirainen return digit - 'a' + 0x0a;
2767104d81e97a109f0aa9758792bfa1da325a97Timo Sirainen
211ed7806d8715ec2280ffbf5d10f0d6e4f1beb2Timo Sirainen case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
a10ed8c47534b4c6b6bf2711ccfe577e720a47b4Timo Sirainen return digit - 'A' + 0x0A;
59151b71059df1190acd75d8717ed04a7920c862Timo Sirainen }
27a44fcfd8d19bffe0f267f20a2b5d3fe7600fddTimo Sirainen return -1;
27a44fcfd8d19bffe0f267f20a2b5d3fe7600fddTimo Sirainen}
27a44fcfd8d19bffe0f267f20a2b5d3fe7600fddTimo Sirainen
59151b71059df1190acd75d8717ed04a7920c862Timo Sirainenstatic int ATTR_NULL(3)
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainenuri_parse_pct_encoded(struct uri_parser *parser, const unsigned char **p,
c0435c854a0e7246373b9752d163095cc4fbe985Timo Sirainen const unsigned char *pend, unsigned char *ch_r)
556f95092c3bc850517d5ab2bb502024a55645f1Timo Sirainen{
556f95092c3bc850517d5ab2bb502024a55645f1Timo Sirainen int value;
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen if (**p == 0 || *(*p+1) == 0 || (pend != NULL && *p+1 >= pend)) {
2767104d81e97a109f0aa9758792bfa1da325a97Timo Sirainen parser->error = "Unexpected URI boundary after '%'";
c0435c854a0e7246373b9752d163095cc4fbe985Timo Sirainen return -1;
2767104d81e97a109f0aa9758792bfa1da325a97Timo Sirainen }
10ff47d5d6146995e16da00d36eca7d162064a7bTimo Sirainen
683eebe490bbe5caec246c535a10ea9f93f5c330Timo Sirainen if ((value = _decode_hex_digit(**p)) < 0) {
683eebe490bbe5caec246c535a10ea9f93f5c330Timo Sirainen parser->error = t_strdup_printf(
c0435c854a0e7246373b9752d163095cc4fbe985Timo Sirainen "Expecting hex digit after '%%', but found '%c'", **p);
5238111c460098d9cc8cc22527026138a278b9a4Timo Sirainen return -1;
5238111c460098d9cc8cc22527026138a278b9a4Timo Sirainen }
6ef7e31619edfaa17ed044b45861d106a86191efTimo Sirainen
2767104d81e97a109f0aa9758792bfa1da325a97Timo Sirainen *ch_r = (value & 0x0f) << 4;
68a4946b12583b88fa802e52ebee45cd96056772Timo Sirainen *p += 1;
de954ff15b495be13007a8aca2c09fd1d356a283Timo Sirainen
de954ff15b495be13007a8aca2c09fd1d356a283Timo Sirainen if ((value = _decode_hex_digit(**p)) < 0) {
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen parser->error = t_strdup_printf(
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen "Expecting hex digit after '%%%c', but found '%c'", *((*p)-1), **p);
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen return -1;
c0435c854a0e7246373b9752d163095cc4fbe985Timo Sirainen }
2767104d81e97a109f0aa9758792bfa1da325a97Timo Sirainen
c0435c854a0e7246373b9752d163095cc4fbe985Timo Sirainen *ch_r |= (value & 0x0f);
2767104d81e97a109f0aa9758792bfa1da325a97Timo Sirainen *p += 1;
0ce5f96804e81cb0f857e7df32c0272f1eed9377Timo Sirainen
c0435c854a0e7246373b9752d163095cc4fbe985Timo Sirainen if (*ch_r == '\0') {
9de176ef7f3d28ff486c2a8805110b84389e4f19Timo Sirainen parser->error =
ceb43cc04edb94445fab8f914bc4da6d740403d1Timo Sirainen "Percent encoding is not allowed to encode NUL character";
9de176ef7f3d28ff486c2a8805110b84389e4f19Timo Sirainen return -1;
9de176ef7f3d28ff486c2a8805110b84389e4f19Timo Sirainen }
9de176ef7f3d28ff486c2a8805110b84389e4f19Timo Sirainen return 1;
e2a88d59c0d47d63ce1ad5b1fd95e487124a3fd4Timo Sirainen}
e2a88d59c0d47d63ce1ad5b1fd95e487124a3fd4Timo Sirainen
e2a88d59c0d47d63ce1ad5b1fd95e487124a3fd4Timo Sirainenstatic int
e2a88d59c0d47d63ce1ad5b1fd95e487124a3fd4Timo Sirainenuri_parse_unreserved_char(struct uri_parser *parser, unsigned char *ch_r)
e2a88d59c0d47d63ce1ad5b1fd95e487124a3fd4Timo Sirainen{
9de176ef7f3d28ff486c2a8805110b84389e4f19Timo Sirainen if (*parser->cur == '%') {
9de176ef7f3d28ff486c2a8805110b84389e4f19Timo Sirainen parser->cur++;
9de176ef7f3d28ff486c2a8805110b84389e4f19Timo Sirainen if (uri_parse_pct_encoded(parser, &parser->cur,
e2a88d59c0d47d63ce1ad5b1fd95e487124a3fd4Timo Sirainen parser->end, ch_r) <= 0)
e2a88d59c0d47d63ce1ad5b1fd95e487124a3fd4Timo Sirainen return -1;
e2a88d59c0d47d63ce1ad5b1fd95e487124a3fd4Timo Sirainen return 1;
e2a88d59c0d47d63ce1ad5b1fd95e487124a3fd4Timo Sirainen }
e2a88d59c0d47d63ce1ad5b1fd95e487124a3fd4Timo Sirainen
e2a88d59c0d47d63ce1ad5b1fd95e487124a3fd4Timo Sirainen if ((*parser->cur & 0x80) != 0)
e2a88d59c0d47d63ce1ad5b1fd95e487124a3fd4Timo Sirainen return 0;
e2a88d59c0d47d63ce1ad5b1fd95e487124a3fd4Timo Sirainen
e2a88d59c0d47d63ce1ad5b1fd95e487124a3fd4Timo Sirainen if ((_uri_char_lookup[*parser->cur] & CHAR_MASK_UNRESERVED) != 0) {
6adf683655750bcb809275cd65dc75fd12214198Timo Sirainen *ch_r = *parser->cur;
6adf683655750bcb809275cd65dc75fd12214198Timo Sirainen parser->cur++;
6adf683655750bcb809275cd65dc75fd12214198Timo Sirainen return 1;
6adf683655750bcb809275cd65dc75fd12214198Timo Sirainen }
03f5c621d06d6b6d77a145196c9633a7aa64dc78Timo Sirainen return 0;
6adf683655750bcb809275cd65dc75fd12214198Timo Sirainen}
6adf683655750bcb809275cd65dc75fd12214198Timo Sirainen
6adf683655750bcb809275cd65dc75fd12214198Timo Sirainenint uri_parse_unreserved(struct uri_parser *parser, string_t *part)
6adf683655750bcb809275cd65dc75fd12214198Timo Sirainen{
6adf683655750bcb809275cd65dc75fd12214198Timo Sirainen int len = 0;
6adf683655750bcb809275cd65dc75fd12214198Timo Sirainen
6adf683655750bcb809275cd65dc75fd12214198Timo Sirainen while (parser->cur < parser->end) {
6adf683655750bcb809275cd65dc75fd12214198Timo Sirainen int ret;
d7e72877b7a5085c3addf9729d0bfbe1b5357853Timo Sirainen unsigned char ch = 0;
03f5c621d06d6b6d77a145196c9633a7aa64dc78Timo Sirainen
6adf683655750bcb809275cd65dc75fd12214198Timo Sirainen if ((ret = uri_parse_unreserved_char(parser, &ch)) < 0)
6adf683655750bcb809275cd65dc75fd12214198Timo Sirainen return -1;
6adf683655750bcb809275cd65dc75fd12214198Timo Sirainen
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen if (ret == 0)
8cdb3234fe3c77e477c7a0e6934678f58fc54d4dTimo Sirainen break;
8cdb3234fe3c77e477c7a0e6934678f58fc54d4dTimo Sirainen
8cdb3234fe3c77e477c7a0e6934678f58fc54d4dTimo Sirainen if (part != NULL)
8cdb3234fe3c77e477c7a0e6934678f58fc54d4dTimo Sirainen str_append_c(part, ch);
71da447014454c84828d9dface77219875554d7dTimo Sirainen len++;
71da447014454c84828d9dface77219875554d7dTimo Sirainen }
71da447014454c84828d9dface77219875554d7dTimo Sirainen
71da447014454c84828d9dface77219875554d7dTimo Sirainen return len > 0 ? 1 : 0;
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen}
bool uri_data_decode(struct uri_parser *parser, const char *data,
const char *until, const char **decoded_r)
{
const unsigned char *p = (const unsigned char *)data;
const unsigned char *pend = (const unsigned char *)until;
string_t *decoded;
if (pend == NULL) {
/* NULL means unlimited; solely rely on '\0' */
pend = (const unsigned char *)(size_t)-1;
}
if (p >= pend || *p == '\0') {
if (decoded_r != NULL)
*decoded_r = "";
return TRUE;
}
decoded = uri_parser_get_tmpbuf(parser, 256);
while (p < pend && *p != '\0') {
unsigned char ch;
if (*p == '%') {
p++;
if (uri_parse_pct_encoded(parser, &p, NULL, &ch) <= 0)
return FALSE;
str_append_c(decoded, ch);
} else {
str_append_c(decoded, *p);
p++;
}
}
if (decoded_r != NULL)
*decoded_r = t_strdup(str_c(decoded));
return TRUE;
}
int uri_cut_scheme(const char **uri_p, const char **scheme_r)
{
const char *p = *uri_p;
size_t len = 1;
/* RFC 3968:
* scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
*/
if (!i_isalpha(*p))
return -1;
p++;
while (len < URI_MAX_SCHEME_NAME_LEN && *p != '\0') {
if (!i_isalnum(*p) && *p != '+' && *p != '-' && *p != '.')
break;
p++;
len++;
}
if (*p != ':')
return -1;
*scheme_r = t_strdup_until(*uri_p, p);
*uri_p = p + 1;
return 0;
}
int uri_parse_scheme(struct uri_parser *parser, const char **scheme_r)
{
const char *p;
if (parser->cur >= parser->end)
return 0;
p = (const char *)parser->cur;
if (uri_cut_scheme(&p, scheme_r) < 0)
return 0;
parser->cur = (const unsigned char *)p;
return 1;
}
static int
uri_parse_dec_octet(struct uri_parser *parser, string_t *literal,
uint8_t *octet_r)
{
uint8_t octet = 0;
int count = 0;
/* RFC 3986:
*
* dec-octet = DIGIT ; 0-9
* / %x31-39 DIGIT ; 10-99
* / "1" 2DIGIT ; 100-199
* / "2" %x30-34 DIGIT ; 200-249
* / "25" %x30-35 ; 250-255
*/
while (parser->cur < parser->end && i_isdigit(*parser->cur)) {
uint8_t prev = octet;
octet = octet * 10 + (parser->cur[0] - '0');
if (octet < prev)
return -1;
if (literal != NULL)
str_append_c(literal, *parser->cur);
parser->cur++;
count++;
}
if (count > 0) {
*octet_r = octet;
return 1;
}
return 0;
}
static int
uri_parse_ipv4address(struct uri_parser *parser, string_t *literal,
struct in_addr *ip4_r)
{
uint8_t octet;
uint32_t ip = 0;
int ret;
int i;
/* RFC 3986:
*
* IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
*/
if ((ret = uri_parse_dec_octet(parser, literal, &octet)) <= 0)
return ret;
ip = octet;
for (i = 0; i < 3 && parser->cur < parser->end; i++) {
if (*parser->cur != '.')
return -1;
if (literal != NULL)
str_append_c(literal, '.');
parser->cur++;
if ((ret = uri_parse_dec_octet(parser, literal, &octet)) <= 0)
return -1;
ip = (ip << 8) + octet;
}
if (ip4_r != NULL)
ip4_r->s_addr = htonl(ip);
return 1;
}
static int uri_parse_reg_name(struct uri_parser *parser, string_t *reg_name)
{
int len = 0;
/* RFC 3986:
*
* reg-name = *( unreserved / pct-encoded / sub-delims )
*/
while (parser->cur < parser->end) {
int ret;
unsigned char c;
/* unreserved / pct-encoded */
if ((ret = uri_parse_unreserved_char(parser, &c)) < 0)
return -1;
if (ret > 0) {
if (reg_name != NULL)
str_append_c(reg_name, c);
len++;
continue;
}
/* sub-delims */
c = *parser->cur;
if ((c & 0x80) == 0 && (_uri_char_lookup[c] & CHAR_MASK_SUB_DELIMS) != 0) {
if (reg_name != NULL)
str_append_c(reg_name, *parser->cur);
parser->cur++;
len++;
continue;
}
break;
}
return len > 0 ? 1 : 0;
}
#ifdef HAVE_IPV6
static int
uri_parse_ip_literal(struct uri_parser *parser, string_t *literal,
struct in6_addr *ip6_r)
{
const unsigned char *p;
const char *address;
int ret;
/* IP-literal = "[" ( IPv6address / IPvFuture ) "]"
* IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
* IPv6address = ; Syntax not relevant: parsed using inet_pton()
*/
/* "[" already verified */
/* Scan for end of address */
for (p = parser->cur+1; p < parser->end; p++) {
if (*p == ']')
break;
}
if (p >= parser->end || *p != ']') {
parser->error = "Expecting ']' at end of IP-literal";
return -1;
}
if (literal != NULL)
str_append_n(literal, parser->cur, p-parser->cur+1);
address = t_strdup_until(parser->cur+1, p);
parser->cur = p + 1;
if (*address == '\0') {
parser->error = "Empty IPv6 host address";
return -1;
}
if (*address == 'v') {
parser->error = t_strdup_printf(
"Future IP host address '%s' not supported", address);
return -1;
}
if ((ret = inet_pton(AF_INET6, address, ip6_r)) <= 0) {
parser->error = t_strdup_printf(
"Invalid IPv6 host address '%s'", address);
return -1;
}
return 1;
}
#endif
static int uri_parse_host(struct uri_parser *parser, struct uri_authority *auth)
{
const unsigned char *preserve;
struct in_addr ip4;
struct in6_addr ip6;
string_t *literal = NULL;
int ret;
/* RFC 3986:
*
* host = IP-literal / IPv4address / reg-name
*/
literal = uri_parser_get_tmpbuf(parser, 256);
/* IP-literal / */
if (parser->cur < parser->end && *parser->cur == '[') {
#ifdef HAVE_IPV6
if ((ret=uri_parse_ip_literal(parser, literal, &ip6)) <= 0)
return -1;
if (auth != NULL) {
auth->host_literal = t_strdup(str_c(literal));
auth->host_ip.family = AF_INET6;
auth->host_ip.u.ip6 = ip6;
auth->have_host_ip = TRUE;
}
return 1;
#else
parser->error = "IPv6 host address is not supported";
return -1;
#endif
}
/* IPv4address /
*
* If it fails to parse, we try to parse it as a reg-name
*/
preserve = parser->cur;
if ((ret = uri_parse_ipv4address(parser, literal, &ip4)) > 0) {
if (auth != NULL) {
auth->host_literal = t_strdup(str_c(literal));
auth->host_ip.family = AF_INET;
auth->host_ip.u.ip4 = ip4;
auth->have_host_ip = TRUE;
}
return ret;
}
parser->cur = preserve;
str_truncate(literal, 0);
/* reg-name */
if ((ret = uri_parse_reg_name(parser, literal)) != 0) {
if (ret > 0 && auth != NULL) {
auth->host_literal = t_strdup(str_c(literal));
auth->have_host_ip = FALSE;
}
return ret;
}
return 0;
}
static int uri_parse_port(struct uri_parser *parser, struct uri_authority *auth)
{
in_port_t port = 0;
int count = 0;
/* RFC 3986:
*
* port = *DIGIT
*/
while (parser->cur < parser->end && i_isdigit(*parser->cur)) {
in_port_t prev = port;
port = port * 10 + (in_port_t)(parser->cur[0] - '0');
if (port < prev) {
parser->error = "Port number is too high";
return -1;
}
parser->cur++;
count++;
}
if (count > 0) {
if (auth != NULL) {
auth->port = port;
auth->have_port = TRUE;
}
return 1;
}
return 0;
}
int uri_parse_authority(struct uri_parser *parser, struct uri_authority *auth)
{
const unsigned char *p;
int ret;
/* hier-part = "//" authority {...}
* relative-part = "//" authority {...}
* authority = [ userinfo "@" ] host [ ":" port ]
*/
/* Parse "//" as part of authority */
if ((parser->end - parser->cur) <= 2 || parser->cur[0] != '/' ||
parser->cur[1] != '/')
return 0;
parser->cur += 2;
if (auth != NULL)
memset(auth, 0, sizeof(*auth));
/* Scan ahead to check whether there is a [userinfo "@"] uri component */
for (p = parser->cur; p < parser->end; p++){
/* refuse 8bit characters */
if ((*p & 0x80) != 0)
break;
/* break at first delimiter */
if (*p != '%' && (_uri_char_lookup[*p] & CHAR_MASK_UCHAR) == 0)
break;
}
/* Extract userinfo */
if (p < parser->end && *p == '@') {
if (auth != NULL)
auth->enc_userinfo = t_strdup_until(parser->cur, p);
parser->cur = p+1;
}
/* host */
if ((ret = uri_parse_host(parser, auth)) <= 0) {
if (ret == 0) {
parser->error = "Missing 'host' component";
return -1;
}
return ret;
}
/* [":" ... */
if (parser->cur >= parser->end || *parser->cur != ':')
return 1;
parser->cur++;
/* ... port] */
if ((ret = uri_parse_port(parser, auth)) < 0)
return ret;
return 1;
}
int uri_parse_path_segment(struct uri_parser *parser, const char **segment_r)
{
const unsigned char *p = parser->cur;
while (p < parser->end) {
if (*p == '%') {
p++;
continue;
}
if ((*p & 0x80) != 0 || (_uri_char_lookup[*p] & CHAR_MASK_PCHAR) == 0)
break;
p++;
}
if (p == parser->cur)
return 0;
if (segment_r != NULL)
*segment_r = t_strdup_until(parser->cur, p);
parser->cur = p;
return 1;
}
int uri_parse_path(struct uri_parser *parser,
int *relative_r, const char *const **path_r)
{
const unsigned char *pbegin = parser->cur;
ARRAY_TYPE(const_string) segments;
const char *segment = NULL;
unsigned int count;
int relative = 1;
int ret;
t_array_init(&segments, 16);
/* check for a leading '/' and indicate absolute path
when it is present
*/
if (parser->cur < parser->end && *parser->cur == '/') {
parser->cur++;
relative = 0;
}
/* parse first segment */
if ((ret = uri_parse_path_segment(parser, &segment)) < 0)
return -1;
for (;;) {
if (ret > 0) {
/* strip dot segments */
if (segment[0] == '.') {
if (segment[1] == '.') {
if (segment[2] == '\0') {
/* '..' -> skip and... */
segment = NULL;
/* ... pop last segment (if any) */
count = array_count(&segments);
if (count > 0) {
array_delete(&segments, count-1, 1);
} else if ( relative > 0 ) {
relative++;
}
}
} else if (segment[1] == '\0') {
/* '.' -> skip */
segment = NULL;
}
}
} else {
segment = "";
}
if (segment != NULL)
array_append(&segments, &segment, 1);
if (parser->cur >= parser->end || *parser->cur != '/')
break;
parser->cur++;
/* parse next path segment */
if ((ret = uri_parse_path_segment(parser, &segment)) < 0)
return -1;
}
if (parser->cur == pbegin) {
/* path part of URI is missing */
return 0;
}
/* special treatment for a trailing '..' or '.' */
if (segment == NULL) {
segment = "";
array_append(&segments, &segment, 1);
}
array_append_zero(&segments);
*path_r = array_get(&segments, &count);
*relative_r = relative;
return 1;
}
int uri_parse_query(struct uri_parser *parser, const char **query_r)
{
const unsigned char *p = parser->cur;
/* RFC 3986:
*
* URI = { ... } [ "?" query ] { ... }
* query = *( pchar / "/" / "?" )
* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
*/
if (p >= parser->end || *p != '?')
return 0;
p++;
while (p < parser->end) {
if (*p == '%') {
p++;
continue;
}
if ((*p & 0x80) != 0 || (_uri_char_lookup[*p] & CHAR_MASK_QCHAR) == 0)
break;
p++;
}
if (query_r != NULL)
*query_r = t_strdup_until(parser->cur+1, p);
parser->cur = p;
return 1;
}
int uri_parse_fragment(struct uri_parser *parser, const char **fragment_r)
{
const unsigned char *p = parser->cur;
/* RFC 3986:
*
* URI = { ... } [ "#" fragment ]
* fragment = *( pchar / "/" / "?" )
* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
*/
if (p >= parser->end || *p != '#')
return 0;
p++;
while (p < parser->end) {
if (*p == '%') {
p++;
continue;
}
if ((*p & 0x80) != 0 || (_uri_char_lookup[*p] & CHAR_MASK_QCHAR) == 0)
break;
p++;
}
if (fragment_r != NULL)
*fragment_r = t_strdup_until(parser->cur+1, p);
parser->cur = p;
return 1;
}
void uri_parser_init(struct uri_parser *parser, pool_t pool, const char *data)
{
parser->pool = pool;
parser->begin = parser->cur = (unsigned char *)data;
parser->end = (unsigned char *)data + strlen(data);
parser->error = NULL;
parser->tmpbuf = NULL;
}
string_t *uri_parser_get_tmpbuf(struct uri_parser *parser, size_t size)
{
if (parser->tmpbuf == NULL)
parser->tmpbuf = t_str_new(size);
else
str_truncate(parser->tmpbuf, 0);
return parser->tmpbuf;
}
/*
* Generic URI construction
*/
static void
uri_data_encode(string_t *out, const unsigned char esc_table[256],
unsigned char esc_mask, const char *esc_extra, const char *data)
{
const unsigned char *p = (const unsigned char *)data;
while (*p != '\0') {
if ((*p & 0x80) != 0 || (esc_table[*p] & esc_mask) == 0 ||
strchr(esc_extra, (char)*p) != NULL) {
str_printfa(out, "%%%2x", *p);
} else {
str_append_c(out, *p);
}
p++;
}
}
void uri_append_scheme(string_t *out, const char *scheme)
{
str_append(out, scheme);
str_append_c(out, ':');
}
void uri_append_user_data(string_t *out, const char *esc,
const char *data)
{
uri_data_encode(out, _uri_char_lookup, CHAR_MASK_UCHAR, esc, data);
}
void uri_append_userinfo(string_t *out, const char *userinfo)
{
uri_append_user_data(out, "", userinfo);
str_append_c(out, '@');
}
void uri_append_host_name(string_t *out, const char *name)
{
uri_data_encode(out, _uri_char_lookup,
CHAR_MASK_UNRESERVED | CHAR_MASK_SUB_DELIMS, "", name);
}
void uri_append_host_ip(string_t *out, const struct ip_addr *host_ip)
{
const char *addr = net_ip2addr(host_ip);
i_assert(addr != NULL);
if (host_ip->family == AF_INET) {
str_append(out, addr);
return;
}
i_assert(host_ip->family == AF_INET6);
str_append_c(out, '[');
str_append(out, addr);
str_append_c(out, ']');
}
void uri_append_port(string_t *out, in_port_t port)
{
str_printfa(out, ":%u", port);
}
void uri_append_path_segment_data(string_t *out, const char *esc,
const char *data)
{
uri_data_encode(out, _uri_char_lookup, CHAR_MASK_PCHAR, esc, data);
}
void uri_append_path_segment(string_t *out, const char *segment)
{
str_append_c(out, '/');
if (*segment != '\0')
uri_append_path_data(out, "", segment);
}
void uri_append_path_data(string_t *out, const char *esc,
const char *data)
{
uri_data_encode(out, _uri_char_lookup, CHAR_MASK_PFCHAR, esc, data);
}
void uri_append_path(string_t *out, const char *path)
{
str_append_c(out, '/');
if (*path != '\0')
uri_append_path_data(out, "", path);
}
void uri_append_query_data(string_t *out, const char *esc,
const char *data)
{
uri_data_encode(out, _uri_char_lookup, CHAR_MASK_QCHAR, esc, data);
}
void uri_append_query(string_t *out, const char *query)
{
str_append_c(out, '?');
if (*query != '\0')
uri_append_query_data(out, "", query);
}
void uri_append_fragment_data(string_t *out, const char *esc,
const char *data)
{
uri_data_encode(out, _uri_char_lookup, CHAR_MASK_QCHAR, esc, data);
}
void uri_append_fragment(string_t *out, const char *fragment)
{
str_append_c(out, '#');
if (*fragment != '\0')
uri_append_fragment_data(out, "", fragment);
}