uri-util.c revision e4e9ba5f43f9bf7e072d7d9fcc3259a42ecb15c8
5a580c3a38ced62d4bcc95b8ac7c4f2935b5d294Timo Sirainen/* Copyright (c) 2010-2016 Dovecot authors, see the included COPYING file */
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch#include "lib.h"
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch#include "array.h"
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch#include "str.h"
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch#include "net.h"
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch#include "uri-util.h"
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch#include <ctype.h>
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch/* [URI-GEN] RFC3986 Appendix A:
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch absolute-URI = scheme ":" hier-part [ "?" query ]
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch URI-reference = URI / relative-ref
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch relative-ref = relative-part [ "?" query ] [ "#" fragment ]
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch relative-part = "//" authority path-abempty
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch / path-absolute
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch / path-noscheme
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch / path-empty
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch hier-part = "//" authority path-abempty
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch / path-absolute
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch / path-rootless
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch / path-empty
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch authority = [ userinfo "@" ] host [ ":" port ]
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch host = IP-literal / IPv4address / reg-name
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch port = *DIGIT
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch IP-literal = "[" ( IPv6address / IPvFuture ) "]"
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch IPv6address = 6( h16 ":" ) ls32
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch / "::" 5( h16 ":" ) ls32
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch / [ h16 ] "::" 4( h16 ":" ) ls32
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch / [ *4( h16 ":" ) h16 ] "::" ls32
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch / [ *5( h16 ":" ) h16 ] "::" h16
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch / [ *6( h16 ":" ) h16 ] "::"
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch h16 = 1*4HEXDIG
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch ls32 = ( h16 ":" h16 ) / IPv4address
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch dec-octet = DIGIT ; 0-9
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch / %x31-39 DIGIT ; 10-99
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch / "1" 2DIGIT ; 100-199
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch / "2" %x30-34 DIGIT ; 200-249
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch / "25" %x30-35 ; 250-255
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch reg-name = *( unreserved / pct-encoded / sub-delims )
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch path = path-abempty ; begins with "/" or is empty
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch / path-absolute ; begins with "/" but not "//"
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch / path-noscheme ; begins with a non-colon segment
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch / path-rootless ; begins with a segment
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch / path-empty ; zero characters
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch path-abempty = *( "/" segment )
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch path-absolute = "/" [ segment-nz *( "/" segment ) ]
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch path-noscheme = segment-nz-nc *( "/" segment )
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch path-rootless = segment-nz *( "/" segment )
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch path-empty = 0<pchar>
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch segment = *pchar
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch segment-nz = 1*pchar
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch ; non-zero-length segment without any colon ":"
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch query = *( pchar / "/" / "?" )
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch fragment = *( pchar / "/" / "?" )
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch pct-encoded = "%" HEXDIG HEXDIG
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch reserved = gen-delims / sub-delims
b99130e4cf4af4e6b103b949456222f3a2dff424Timo Sirainen gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
b99130e4cf4af4e6b103b949456222f3a2dff424Timo Sirainen sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
b99130e4cf4af4e6b103b949456222f3a2dff424Timo Sirainen / "*" / "+" / "," / ";" / "="
b99130e4cf4af4e6b103b949456222f3a2dff424Timo Sirainen */
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
56d1345c43bbd28c36b7faa85e4163bd9e874290Timo Sirainen#define URI_MAX_SCHEME_NAME_LEN 64
30d917bcd48d70af0371baf27571cc198d621a62Timo Sirainen
9d0aee99a8c80d71137aa9b8c216cc203bec7a9aTimo Sirainen/* Character lookup table
9d0aee99a8c80d71137aa9b8c216cc203bec7a9aTimo Sirainen *
35e962a9186b4e9b2001628c1d7b55c24b33ce84Timo Sirainen * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" [bit0]
35e962a9186b4e9b2001628c1d7b55c24b33ce84Timo Sirainen * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
35e962a9186b4e9b2001628c1d7b55c24b33ce84Timo Sirainen * / "*" / "+" / "," / ";" / "=" [bit1]
4219de12b28f1936219e27501b9c4b27a4f8d53cStephan Bosch * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" [bit2]
4219de12b28f1936219e27501b9c4b27a4f8d53cStephan Bosch * pchar = unreserved / sub-delims / ":" / "@" [bit0|bit1|bit3]
4219de12b28f1936219e27501b9c4b27a4f8d53cStephan Bosch * 'pfchar' = unreserved / sub-delims / ":" / "@" / "/"
4219de12b28f1936219e27501b9c4b27a4f8d53cStephan Bosch * [bit0|bit1|bit3|bit5]
4219de12b28f1936219e27501b9c4b27a4f8d53cStephan Bosch * 'uchar' = unreserved / sub-delims / ":" [bit0|bit1|bit4]
4219de12b28f1936219e27501b9c4b27a4f8d53cStephan Bosch * 'qchar' = pchar / "/" / "?" [bit0|bit1|bit3|bit5|bit6]
4219de12b28f1936219e27501b9c4b27a4f8d53cStephan Bosch *
4219de12b28f1936219e27501b9c4b27a4f8d53cStephan Bosch */
4219de12b28f1936219e27501b9c4b27a4f8d53cStephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch#define CHAR_MASK_UNRESERVED (1<<0)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch#define CHAR_MASK_SUB_DELIMS (1<<1)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch#define CHAR_MASK_PCHAR ((1<<0)|(1<<1)|(1<<3))
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch#define CHAR_MASK_PFCHAR ((1<<0)|(1<<1)|(1<<3)|(1<<5))
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch#define CHAR_MASK_UCHAR ((1<<0)|(1<<1)|(1<<4))
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch#define CHAR_MASK_QCHAR ((1<<0)|(1<<1)|(1<<3)|(1<<5)|(1<<6))
ad03049781fc14807248007d524be4daf06c3ee2Stephan Bosch
e47c2f17d8136c4d972d1074a3f84ba2ecef4fdcStephan Boschstatic unsigned const char _uri_char_lookup[256] = {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 00
feba5e502b2131c9a1c766b7ef9ff041dbf71d1dStephan Bosch 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 10
a8c4e79ff50fac21b05a7368b052583d410ca15cTimo Sirainen 0, 2, 0, 4, 2, 0, 2, 2, 2, 2, 2, 2, 2, 1, 1, 36, // 20
a8c4e79ff50fac21b05a7368b052583d410ca15cTimo Sirainen 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 28, 2, 0, 2, 0, 68, // 30
70505f4839520ac67895992621c97d2480c22e7fTimo Sirainen 12, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 40
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 0, 4, 0, 1, // 50
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, // 70
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch};
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Boschstatic inline int _decode_hex_digit(const unsigned char digit)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch{
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch switch (digit) {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch case '0': case '1': case '2': case '3': case '4':
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch case '5': case '6': case '7': case '8': case '9':
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch return digit - '0';
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch return digit - 'a' + 0x0a;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch return digit - 'A' + 0x0A;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch }
9dc01e0d10a61cab14867b26bf0d2d1dcf8ad978Timo Sirainen return -1;
9dc01e0d10a61cab14867b26bf0d2d1dcf8ad978Timo Sirainen}
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Boschstatic int
7384b4e78eaab44693c985192276e31322155e32Stephan Boschuri_parse_pct_encoded_data(struct uri_parser *parser,
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch const unsigned char **p, const unsigned char *pend,
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch unsigned char *ch_r) ATTR_NULL(3)
aacf2a69acc59e9382578d6f4e030788abc79706Timo Sirainen{
aacf2a69acc59e9382578d6f4e030788abc79706Timo Sirainen int value;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch if (**p != '%' || (pend != NULL && *p >= pend))
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch return 0;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch *p += 1;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
415e16c3dc185578695b7d88e561a52de6c8b1b1Timo Sirainen if (**p == 0 || *(*p+1) == 0 || (pend != NULL && *p+1 >= pend)) {
415e16c3dc185578695b7d88e561a52de6c8b1b1Timo Sirainen parser->error = "Unexpected URI boundary after '%'";
415e16c3dc185578695b7d88e561a52de6c8b1b1Timo Sirainen return -1;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch }
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch if ((value = _decode_hex_digit(**p)) < 0) {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch parser->error = p_strdup_printf(parser->pool,
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch "Expecting hex digit after '%%', but found '%c'", **p);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch return -1;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch }
636d0f43138468f8efe685a681326b123f660e49Timo Sirainen
fc94140acba51adafedafbc8491a3223a51db7a8Stephan Bosch *ch_r = (value & 0x0f) << 4;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch *p += 1;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch if ((value = _decode_hex_digit(**p)) < 0) {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch parser->error = p_strdup_printf(parser->pool,
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch "Expecting hex digit after '%%%c', but found '%c'", *((*p)-1), **p);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch return -1;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch }
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch *ch_r |= (value & 0x0f);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch *p += 1;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch if (*ch_r == '\0') {
fc94140acba51adafedafbc8491a3223a51db7a8Stephan Bosch parser->error =
fc94140acba51adafedafbc8491a3223a51db7a8Stephan Bosch "Percent encoding is not allowed to encode NUL character";
fc94140acba51adafedafbc8491a3223a51db7a8Stephan Bosch return -1;
fc94140acba51adafedafbc8491a3223a51db7a8Stephan Bosch }
636d0f43138468f8efe685a681326b123f660e49Timo Sirainen return 1;
636d0f43138468f8efe685a681326b123f660e49Timo Sirainen}
636d0f43138468f8efe685a681326b123f660e49Timo Sirainen
7384b4e78eaab44693c985192276e31322155e32Stephan Boschint uri_parse_pct_encoded(struct uri_parser *parser,
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch unsigned char *ch_r)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch{
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch return uri_parse_pct_encoded_data
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch (parser, &parser->cur, parser->end, ch_r);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch}
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Boschstatic int
fb1be3de0159d6a10e916ad992e2bc53be64c6d5Timo Sirainenuri_parse_unreserved_char(struct uri_parser *parser, unsigned char *ch_r)
fb1be3de0159d6a10e916ad992e2bc53be64c6d5Timo Sirainen{
fb1be3de0159d6a10e916ad992e2bc53be64c6d5Timo Sirainen if ((*parser->cur & 0x80) != 0)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch return 0;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
1a9a35a6b307f8d5b25345af55e40a99162b4072Timo Sirainen if ((_uri_char_lookup[*parser->cur] & CHAR_MASK_UNRESERVED) != 0) {
1a9a35a6b307f8d5b25345af55e40a99162b4072Timo Sirainen *ch_r = *parser->cur;
1a9a35a6b307f8d5b25345af55e40a99162b4072Timo Sirainen parser->cur++;
1a9a35a6b307f8d5b25345af55e40a99162b4072Timo Sirainen return 1;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch }
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch return 0;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch}
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Boschint uri_parse_unreserved(struct uri_parser *parser, string_t *part)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch{
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch int len = 0;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch while (parser->cur < parser->end) {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch int ret;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch unsigned char ch = 0;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch if ((ret = uri_parse_unreserved_char(parser, &ch)) < 0)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch return -1;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch if (ret == 0)
17cd0e0963f2fb0e66d49703e8cd0bda1b842468Timo Sirainen break;
17cd0e0963f2fb0e66d49703e8cd0bda1b842468Timo Sirainen
17cd0e0963f2fb0e66d49703e8cd0bda1b842468Timo Sirainen if (part != NULL)
17cd0e0963f2fb0e66d49703e8cd0bda1b842468Timo Sirainen str_append_c(part, ch);
17cd0e0963f2fb0e66d49703e8cd0bda1b842468Timo Sirainen len++;
415e16c3dc185578695b7d88e561a52de6c8b1b1Timo Sirainen }
415e16c3dc185578695b7d88e561a52de6c8b1b1Timo Sirainen
415e16c3dc185578695b7d88e561a52de6c8b1b1Timo Sirainen return len > 0 ? 1 : 0;
ba1c847d0af4afe4787ed470d0c818e948e184e2Timo Sirainen}
415e16c3dc185578695b7d88e561a52de6c8b1b1Timo Sirainen
415e16c3dc185578695b7d88e561a52de6c8b1b1Timo Sirainenint uri_parse_unreserved_pct(struct uri_parser *parser, string_t *part)
415e16c3dc185578695b7d88e561a52de6c8b1b1Timo Sirainen{
415e16c3dc185578695b7d88e561a52de6c8b1b1Timo Sirainen int len = 0;
415e16c3dc185578695b7d88e561a52de6c8b1b1Timo Sirainen
415e16c3dc185578695b7d88e561a52de6c8b1b1Timo Sirainen while (parser->cur < parser->end) {
56d1345c43bbd28c36b7faa85e4163bd9e874290Timo Sirainen int ret;
415e16c3dc185578695b7d88e561a52de6c8b1b1Timo Sirainen unsigned char ch = 0;
415e16c3dc185578695b7d88e561a52de6c8b1b1Timo Sirainen
415e16c3dc185578695b7d88e561a52de6c8b1b1Timo Sirainen if ((ret=uri_parse_pct_encoded(parser, &ch)) < 0)
35e962a9186b4e9b2001628c1d7b55c24b33ce84Timo Sirainen return -1;
35e962a9186b4e9b2001628c1d7b55c24b33ce84Timo Sirainen else if (ret == 0 &&
35e962a9186b4e9b2001628c1d7b55c24b33ce84Timo Sirainen (ret=uri_parse_unreserved_char(parser, &ch)) < 0)
d47b9f1bd7274c7b2d9049c2e1718d1cf89cc572Timo Sirainen return -1;
d47b9f1bd7274c7b2d9049c2e1718d1cf89cc572Timo Sirainen if (ret == 0)
415e16c3dc185578695b7d88e561a52de6c8b1b1Timo Sirainen break;
ba1c847d0af4afe4787ed470d0c818e948e184e2Timo Sirainen
ba1c847d0af4afe4787ed470d0c818e948e184e2Timo Sirainen if (part != NULL)
ba1c847d0af4afe4787ed470d0c818e948e184e2Timo Sirainen str_append_c(part, ch);
415e16c3dc185578695b7d88e561a52de6c8b1b1Timo Sirainen len++;
415e16c3dc185578695b7d88e561a52de6c8b1b1Timo Sirainen }
415e16c3dc185578695b7d88e561a52de6c8b1b1Timo Sirainen
415e16c3dc185578695b7d88e561a52de6c8b1b1Timo Sirainen return len > 0 ? 1 : 0;
}
bool uri_data_decode(struct uri_parser *parser, const char *data,
const char *until, const char **decoded_r)
{
const unsigned char *p = (const unsigned char *)data;
const unsigned char *pend = (const unsigned char *)until;
string_t *decoded;
int ret;
if (pend == NULL) {
/* NULL means unlimited; solely rely on '\0' */
pend = (const unsigned char *)(size_t)-1;
}
if (p >= pend || *p == '\0') {
if (decoded_r != NULL)
*decoded_r = "";
return TRUE;
}
decoded = uri_parser_get_tmpbuf(parser, 256);
while (p < pend && *p != '\0') {
unsigned char ch;
if ((ret=uri_parse_pct_encoded_data
(parser, &p, NULL, &ch)) != 0) {
if (ret < 0)
return FALSE;
str_append_c(decoded, ch);
} else {
str_append_c(decoded, *p);
p++;
}
}
if (decoded_r != NULL)
*decoded_r = p_strdup(parser->pool, str_c(decoded));
return TRUE;
}
int uri_parse_scheme(struct uri_parser *parser, const char **scheme_r)
{
const unsigned char *first = parser->cur;
size_t len = 1;
/* RFC 3968:
* scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
*/
if (parser->cur >= parser->end || !i_isalpha(*parser->cur))
return 0;
parser->cur++;
while (len < URI_MAX_SCHEME_NAME_LEN &&
parser->cur < parser->end) {
if (!i_isalnum(*parser->cur) &&
*parser->cur != '+' && *parser->cur != '-' &&
*parser->cur != '.')
break;
parser->cur++;
len++;
}
if (parser->cur >= parser->end || *parser->cur != ':') {
parser->error = "Invalid URI scheme";
return -1;
}
if (scheme_r != NULL)
*scheme_r = t_strndup(first, parser->cur - first);
parser->cur++;
return 1;
}
int uri_cut_scheme(const char **uri_p, const char **scheme_r)
{
struct uri_parser parser;
uri_parser_init(&parser, NULL, *uri_p);
if (uri_parse_scheme(&parser, scheme_r) <= 0)
return -1;
*uri_p = (const char *)parser.cur;
return 0;
}
static int
uri_parse_dec_octet(struct uri_parser *parser, string_t *literal,
uint8_t *octet_r) ATTR_NULL(2)
{
unsigned int octet = 0;
int count = 0;
/* RFC 3986:
*
* dec-octet = DIGIT ; 0-9
* / %x31-39 DIGIT ; 10-99
* / "1" 2DIGIT ; 100-199
* / "2" %x30-34 DIGIT ; 200-249
* / "25" %x30-35 ; 250-255
*/
while (parser->cur < parser->end && i_isdigit(*parser->cur)) {
octet = octet * 10 + (parser->cur[0] - '0');
if (octet > 255)
return -1;
if (literal != NULL)
str_append_c(literal, *parser->cur);
parser->cur++;
count++;
}
if (count > 0) {
*octet_r = octet;
return 1;
}
return 0;
}
static int
uri_parse_ipv4address(struct uri_parser *parser, string_t *literal,
struct in_addr *ip4_r) ATTR_NULL(2,3)
{
uint8_t octet;
uint32_t ip = 0;
int ret;
int i;
/* RFC 3986:
*
* IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
*/
if ((ret = uri_parse_dec_octet(parser, literal, &octet)) <= 0)
return ret;
ip = octet;
for (i = 0; i < 3 && parser->cur < parser->end; i++) {
if (*parser->cur != '.')
return -1;
if (literal != NULL)
str_append_c(literal, '.');
parser->cur++;
if ((ret = uri_parse_dec_octet(parser, literal, &octet)) <= 0)
return -1;
ip = (ip << 8) + octet;
}
if (ip4_r != NULL)
ip4_r->s_addr = htonl(ip);
return 1;
}
static int
uri_parse_reg_name(struct uri_parser *parser,
string_t *reg_name) ATTR_NULL(2)
{
/* RFC 3986:
*
* reg-name = *( unreserved / pct-encoded / sub-delims )
*/
while (parser->cur < parser->end) {
int ret;
unsigned char c;
/* unreserved / pct-encoded */
if ((ret=uri_parse_pct_encoded(parser, &c)) < 0)
return -1;
else if (ret == 0 &&
(ret=uri_parse_unreserved_char(parser, &c)) < 0)
return -1;
if (ret > 0) {
if (reg_name != NULL)
str_append_c(reg_name, c);
continue;
}
/* sub-delims */
c = *parser->cur;
if ((c & 0x80) == 0 && (_uri_char_lookup[c] & CHAR_MASK_SUB_DELIMS) != 0) {
if (reg_name != NULL)
str_append_c(reg_name, *parser->cur);
parser->cur++;
continue;
}
break;
}
return 0;
}
static int uri_do_parse_host_name_dns(struct uri_parser *parser,
string_t *host_name) ATTR_NULL(2, 3)
{
const unsigned char *first, *part;
int ret;
/* RFC 3986, Section 3.2.2:
A registered name intended for lookup in the DNS uses the syntax
defined in Section 3.5 of [RFC1034] and Section 2.1 of [RFC1123].
Such a name consists of a sequence of domain labels separated by ".",
each domain label starting and ending with an alphanumeric character
and possibly also containing "-" characters. The rightmost domain
label of a fully qualified domain name in DNS may be followed by a
single "." and should be if it is necessary to distinguish between
the complete domain name and some local domain.
RFC 2396, Section 3.2.2 (old URI specification):
hostname = *( domainlabel "." ) toplabel [ "." ]
domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
toplabel = alpha | alpha *( alphanum | "-" ) alphanum
The description in RFC 3986 is more liberal, so:
hostname = *( domainlabel "." ) domainlabel [ "." ]
domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
We also support percent encoding in spirit of the generic reg-name,
even though this should explicitly not be used according to the RFC.
It is, however, not strictly forbidden (unlike older RFC), so we
support it.
*/
first = part = parser->cur;
for (;;) {
const unsigned char *offset;
unsigned char ch, pch;
/* alphanum */
offset = parser->cur;
ch = pch = *parser->cur;
if (parser->cur >= parser->end)
break;
if ((ret=uri_parse_pct_encoded(parser, &ch)) < 0) {
return -1;
} else if (ret > 0) {
if (!i_isalnum(ch))
return -1;
if (host_name != NULL)
str_append_c(host_name, ch);
part = parser->cur;
} else {
if (!i_isalnum(*parser->cur))
break;
parser->cur++;
}
if (parser->cur < parser->end) {
/* *( alphanum | "-" ) alphanum */
do {
offset = parser->cur;
if ((ret=uri_parse_pct_encoded(parser, &ch)) < 0) {
return -1;
} else if (ret > 0) {
if (!i_isalnum(ch) && ch != '-')
break;
if (host_name != NULL) {
if (offset > part)
str_append_n(host_name, part, offset - part);
str_append_c(host_name, ch);
}
part = parser->cur;
} else {
ch = *parser->cur;
if (!i_isalnum(ch) && ch != '-')
break;
parser->cur++;
}
pch = ch;
} while (parser->cur < parser->end);
if (!i_isalnum(pch)) {
parser->error = "Invalid domain label in hostname";
return -1;
}
}
if (host_name != NULL && parser->cur > part)
str_append_n(host_name, part, parser->cur - part);
/* "." */
if (parser->cur >= parser->end || ch != '.')
break;
if (host_name != NULL)
str_append_c(host_name, '.');
if (parser->cur == offset)
parser->cur++;
part = parser->cur;
}
if (parser->cur == first)
return 0;
/* remove trailing '.' */
if (host_name != NULL) {
const char *name = str_c(host_name);
i_assert(str_len(host_name) > 0);
if (name[str_len(host_name)-1] == '.')
str_truncate(host_name, str_len(host_name)-1);
}
return 1;
}
int uri_parse_host_name_dns(struct uri_parser *parser,
const char **host_name_r)
{
string_t *host_name = NULL;
int ret;
if (host_name_r != NULL)
host_name = uri_parser_get_tmpbuf(parser, 256);
if ((ret=uri_do_parse_host_name_dns(parser, host_name)) <= 0)
return ret;
*host_name_r = str_c(host_name);
return 1;
}
static int
uri_parse_ip_literal(struct uri_parser *parser, string_t *literal,
struct in6_addr *ip6_r) ATTR_NULL(2,3)
{
const unsigned char *p;
const char *address;
struct in6_addr ip6;
int ret;
/* IP-literal = "[" ( IPv6address / IPvFuture ) "]"
* IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
* IPv6address = ; Syntax not relevant: parsed using inet_pton()
*/
/* "[" already verified */
/* Scan for end of address */
for (p = parser->cur+1; p < parser->end; p++) {
if (*p == ']')
break;
}
if (p >= parser->end || *p != ']') {
parser->error = "Expecting ']' at end of IP-literal";
return -1;
}
if (literal != NULL)
str_append_n(literal, parser->cur, p-parser->cur+1);
address = t_strdup_until(parser->cur+1, p);
parser->cur = p + 1;
if (*address == '\0') {
parser->error = "Empty IPv6 host address";
return -1;
}
if (*address == 'v') {
parser->error = p_strdup_printf(parser->pool,
"Future IP host address '%s' not supported", address);
return -1;
}
if ((ret = inet_pton(AF_INET6, address, &ip6)) <= 0) {
parser->error = p_strdup_printf(parser->pool,
"Invalid IPv6 host address '%s'", address);
return -1;
}
if (ip6_r != NULL)
*ip6_r = ip6;
return 1;
}
int uri_parse_host(struct uri_parser *parser,
struct uri_host *host, bool dns_name) ATTR_NULL(2)
{
const unsigned char *preserve;
struct in_addr ip4;
struct in6_addr ip6;
string_t *literal = NULL;
int ret;
/* RFC 3986:
*
* host = IP-literal / IPv4address / reg-name
*/
if (host != NULL)
memset(host, 0, sizeof(*host));
literal = uri_parser_get_tmpbuf(parser, 256);
/* IP-literal / */
if (parser->cur < parser->end && *parser->cur == '[') {
if ((ret=uri_parse_ip_literal(parser, literal, &ip6)) <= 0)
return -1;
if (host != NULL) {
host->name = p_strdup(parser->pool, str_c(literal));;
host->ip.family = AF_INET6;
host->ip.u.ip6 = ip6;
}
return 1;
}
/* IPv4address /
*
* If it fails to parse, we try to parse it as a reg-name
*/
preserve = parser->cur;
if ((ret = uri_parse_ipv4address(parser, literal, &ip4)) > 0) {
if (host != NULL) {
host->name = p_strdup(parser->pool, str_c(literal));
host->ip.family = AF_INET;
host->ip.u.ip4 = ip4;
}
return ret;
}
parser->cur = preserve;
str_truncate(literal, 0);
/* reg-name */
if (dns_name) {
if (uri_do_parse_host_name_dns(parser, literal) < 0)
return -1;
} else if (uri_parse_reg_name(parser, literal) < 0)
return -1;
if (host != NULL)
host->name = p_strdup(parser->pool, str_c(literal));
return 0;
}
static int
uri_parse_port(struct uri_parser *parser,
struct uri_authority *auth) ATTR_NULL(2)
{
const unsigned char *first;
in_port_t port;
/* RFC 3986:
*
* port = *DIGIT
*/
first = parser->cur;
while (parser->cur < parser->end && i_isdigit(*parser->cur))
parser->cur++;
if (parser->cur == first)
return 0;
if (net_str2port(t_strdup_until(first, parser->cur), &port) < 0) {
parser->error = "Invalid port number";
return -1;
}
if (auth != NULL)
auth->port = port;
return 1;
}
int uri_parse_authority(struct uri_parser *parser,
struct uri_authority *auth, bool dns_name)
{
const unsigned char *p;
int ret;
/*
* authority = [ userinfo "@" ] host [ ":" port ]
*/
if (auth != NULL)
memset(auth, 0, sizeof(*auth));
/* Scan ahead to check whether there is a [userinfo "@"] uri component */
for (p = parser->cur; p < parser->end; p++){
/* refuse 8bit characters */
if ((*p & 0x80) != 0)
break;
/* break at first delimiter */
if (*p != '%' && (_uri_char_lookup[*p] & CHAR_MASK_UCHAR) == 0)
break;
}
/* Extract userinfo */
if (p < parser->end && *p == '@') {
if (auth != NULL)
auth->enc_userinfo = p_strdup_until(parser->pool, parser->cur, p);
parser->cur = p+1;
}
/* host */
if (uri_parse_host(parser,
(auth == NULL ? NULL : &auth->host), dns_name) < 0)
return -1;
if (parser->cur == parser->end)
return 1;
switch (*parser->cur) {
case ':': case '/': case '?': case '#':
break;
default:
parser->error = "Invalid host identifier";
return -1;
}
/* [":" port] */
if (*parser->cur == ':') {
parser->cur++;
if ((ret = uri_parse_port(parser, auth)) < 0)
return ret;
if (parser->cur == parser->end)
return 1;
switch (*parser->cur) {
case '/': case '?': case '#':
break;
default:
parser->error = "Invalid host port";
return -1;
}
}
return 1;
}
int uri_parse_slashslash_authority(struct uri_parser *parser,
struct uri_authority *auth, bool dns_name)
{
/* "//" authority */
if ((parser->end - parser->cur) <= 2 || parser->cur[0] != '/' ||
parser->cur[1] != '/')
return 0;
parser->cur += 2;
return uri_parse_authority(parser, auth, dns_name);
}
int uri_parse_path_segment(struct uri_parser *parser, const char **segment_r)
{
const unsigned char *p = parser->cur;
while (p < parser->end) {
if (*p == '%') {
p++;
continue;
}
if ((*p & 0x80) != 0 || (_uri_char_lookup[*p] & CHAR_MASK_PCHAR) == 0)
break;
p++;
}
if (p < parser->end &&
*p != '/' && *p != '?' && *p != '#' ) {
parser->error =
"Path component contains invalid character";
return -1;
}
if (p == parser->cur)
return 0;
if (segment_r != NULL)
*segment_r = p_strdup_until(parser->pool, parser->cur, p);
parser->cur = p;
return 1;
}
int uri_parse_path(struct uri_parser *parser,
int *relative_r, const char *const **path_r)
{
const unsigned char *pbegin = parser->cur;
ARRAY_TYPE(const_string) segments;
const char *segment = NULL;
unsigned int count;
int relative = 1;
int ret;
count = 0;
if (path_r != NULL)
p_array_init(&segments, parser->pool, 16);
else
memset(&segments, 0, sizeof(segments));
/* check for a leading '/' and indicate absolute path
when it is present
*/
if (parser->cur < parser->end && *parser->cur == '/') {
parser->cur++;
relative = 0;
}
/* parse first segment */
if ((ret = uri_parse_path_segment(parser, &segment)) < 0)
return -1;
for (;;) {
if (ret > 0) {
/* strip dot segments */
if (segment[0] == '.') {
if (segment[1] == '.') {
if (segment[2] == '\0') {
/* '..' -> skip and... */
segment = NULL;
/* ... pop last segment (if any) */
if (count > 0) {
if (path_r != NULL) {
i_assert(count == array_count(&segments));
array_delete(&segments, count-1, 1);
}
count--;
} else if ( relative > 0 ) {
relative++;
}
}
} else if (segment[1] == '\0') {
/* '.' -> skip */
segment = NULL;
}
}
} else {
segment = "";
}
if (segment != NULL) {
if (path_r != NULL)
array_append(&segments, &segment, 1);
count++;
}
if (parser->cur >= parser->end || *parser->cur != '/')
break;
parser->cur++;
/* parse next path segment */
if ((ret = uri_parse_path_segment(parser, &segment)) < 0)
return -1;
}
if (relative_r != NULL)
*relative_r = relative;
if (path_r != NULL)
*path_r = NULL;
if (parser->cur == pbegin) {
/* path part of URI is empty */
return 0;
}
if (path_r != NULL) {
/* special treatment for a trailing '..' or '.' */
if (segment == NULL) {
segment = "";
array_append(&segments, &segment, 1);
}
array_append_zero(&segments);
*path_r = array_get(&segments, &count);
}
if (parser->cur < parser->end &&
*parser->cur != '?' && *parser->cur != '#') {
parser->error = "Path component contains invalid character";
return -1;
}
return 1;
}
int uri_parse_query(struct uri_parser *parser, const char **query_r)
{
const unsigned char *p = parser->cur;
/* RFC 3986:
*
* URI = { ... } [ "?" query ] { ... }
* query = *( pchar / "/" / "?" )
* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
*/
if (p >= parser->end || *p != '?')
return 0;
p++;
while (p < parser->end) {
if (*p == '%') {
p++;
continue;
}
if ((*p & 0x80) != 0 || (_uri_char_lookup[*p] & CHAR_MASK_QCHAR) == 0)
break;
p++;
}
if (p < parser->end && *p != '#') {
parser->error = "Query component contains invalid character";
return -1;
}
if (query_r != NULL)
*query_r = p_strdup_until(parser->pool, parser->cur+1, p);
parser->cur = p;
return 1;
}
int uri_parse_fragment(struct uri_parser *parser, const char **fragment_r)
{
const unsigned char *p = parser->cur;
/* RFC 3986:
*
* URI = { ... } [ "#" fragment ]
* fragment = *( pchar / "/" / "?" )
* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
*/
if (p >= parser->end || *p != '#')
return 0;
p++;
while (p < parser->end) {
if (*p == '%') {
p++;
continue;
}
if ((*p & 0x80) != 0 || (_uri_char_lookup[*p] & CHAR_MASK_QCHAR) == 0)
break;
p++;
}
if (p < parser->end) {
parser->error = "Fragment component contains invalid character";
return -1;
}
if (fragment_r != NULL)
*fragment_r = p_strdup_until(parser->pool, parser->cur+1, p);
parser->cur = p;
return 1;
}
void uri_parser_init_data(struct uri_parser *parser,
pool_t pool, const unsigned char *data, size_t size)
{
parser->pool = pool;
parser->begin = parser->cur = data;
parser->end = data + size;
parser->error = NULL;
parser->tmpbuf = NULL;
}
void uri_parser_init(struct uri_parser *parser,
pool_t pool, const char *uri)
{
uri_parser_init_data
(parser, pool, (const unsigned char *)uri, strlen(uri));
}
string_t *uri_parser_get_tmpbuf(struct uri_parser *parser, size_t size)
{
if (parser->tmpbuf == NULL)
parser->tmpbuf = str_new(parser->pool, size);
else
str_truncate(parser->tmpbuf, 0);
return parser->tmpbuf;
}
/*
* Generic URI manipulation
*/
void uri_host_copy(pool_t pool, struct uri_host *dest,
const struct uri_host *src)
{
const char *host_name = src->name;
/* create host name literal if caller is lazy */
if (host_name == NULL && src->ip.family != 0) {
host_name = net_ip2addr(&src->ip);
i_assert(*host_name != '\0');
}
*dest = *src;
dest->name = p_strdup(pool, host_name);
}
/*
* Generic URI construction
*/
void uri_data_encode(string_t *out,
const unsigned char esc_table[256],
unsigned char esc_mask, const char *esc_extra,
const char *data)
{
const unsigned char *pbegin, *p;
pbegin = p = (const unsigned char *)data;
while (*p != '\0') {
if ((*p & 0x80) != 0 || (esc_table[*p] & esc_mask) == 0 ||
strchr(esc_extra, (char)*p) != NULL) {
if ((p - pbegin) > 0)
str_append_n(out, pbegin, p - pbegin);
str_printfa(out, "%%%02x", *p);
p++;
pbegin = p;
} else {
p++;
}
}
if ((p - pbegin) > 0)
str_append_n(out, pbegin, p - pbegin);
}
void uri_append_scheme(string_t *out, const char *scheme)
{
str_append(out, scheme);
str_append_c(out, ':');
}
void uri_append_user_data(string_t *out, const char *esc,
const char *data)
{
uri_data_encode(out, _uri_char_lookup, CHAR_MASK_UCHAR, esc, data);
}
void uri_append_userinfo(string_t *out, const char *userinfo)
{
uri_append_user_data(out, "", userinfo);
str_append_c(out, '@');
}
void uri_append_host_name(string_t *out, const char *name)
{
uri_data_encode(out, _uri_char_lookup,
CHAR_MASK_UNRESERVED | CHAR_MASK_SUB_DELIMS, "", name);
}
void uri_append_host_ip(string_t *out, const struct ip_addr *host_ip)
{
const char *addr = net_ip2addr(host_ip);
i_assert(host_ip->family != 0);
if (host_ip->family == AF_INET) {
str_append(out, addr);
return;
}
i_assert(host_ip->family == AF_INET6);
str_append_c(out, '[');
str_append(out, addr);
str_append_c(out, ']');
}
void uri_append_host(string_t *out, const struct uri_host *host)
{
if (host->name != NULL) {
/* assume IPv6 literal if starts with '['; avoid encoding */
if (*host->name == '[')
str_append(out, host->name);
else
uri_append_host_name(out, host->name);
} else
uri_append_host_ip(out, &host->ip);
}
void uri_append_port(string_t *out, in_port_t port)
{
if (port != 0)
str_printfa(out, ":%u", port);
}
void uri_append_path_segment_data(string_t *out, const char *esc,
const char *data)
{
uri_data_encode(out, _uri_char_lookup, CHAR_MASK_PCHAR, esc, data);
}
void uri_append_path_segment(string_t *out, const char *segment)
{
str_append_c(out, '/');
if (*segment != '\0')
uri_append_path_data(out, "", segment);
}
void uri_append_path_data(string_t *out, const char *esc,
const char *data)
{
uri_data_encode(out, _uri_char_lookup, CHAR_MASK_PFCHAR, esc, data);
}
void uri_append_path(string_t *out, const char *path)
{
str_append_c(out, '/');
if (*path != '\0')
uri_append_path_data(out, "", path);
}
void uri_append_query_data(string_t *out, const char *esc,
const char *data)
{
uri_data_encode(out, _uri_char_lookup, CHAR_MASK_QCHAR, esc, data);
}
void uri_append_query(string_t *out, const char *query)
{
str_append_c(out, '?');
if (*query != '\0')
uri_append_query_data(out, "", query);
}
void uri_append_fragment_data(string_t *out, const char *esc,
const char *data)
{
uri_data_encode(out, _uri_char_lookup, CHAR_MASK_QCHAR, esc, data);
}
void uri_append_fragment(string_t *out, const char *fragment)
{
str_append_c(out, '#');
if (*fragment != '\0')
uri_append_fragment_data(out, "", fragment);
}