uri-util.c revision 639bb36b12b9f9bb54c8bb1be50eac623622f8a0
89a126810703c666309310d0f3189e9834d70b5bTimo Sirainen/* Copyright (c) 2010-2012 Dovecot authors, see the included COPYING file */
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen#include "lib.h"
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen#include "array.h"
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen#include "str.h"
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen#include "network.h"
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen#include "uri-util.h"
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen#include <ctype.h>
4fdf60e22b5340fe35f083b9ea5eb3fd1cf2e742Timo Sirainen
4fdf60e22b5340fe35f083b9ea5eb3fd1cf2e742Timo Sirainen/*
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen * Generic URI parsing.
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen *
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen * [URI-GEN] RFC3986 Appendix A:
252db51b6c0a605163326b3ea5d09e9936ca3b29Timo Sirainen *
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen * host = IP-literal / IPv4address / reg-name
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen * port = *DIGIT
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen * reg-name = *( unreserved / pct-encoded / sub-delims )
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen * pct-encoded = "%" HEXDIG HEXDIG
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen * / "*" / "+" / "," / ";" / "="
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen * IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen * IPv6address = 6( h16 ":" ) ls32
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen * / "::" 5( h16 ":" ) ls32
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen * / [ h16 ] "::" 4( h16 ":" ) ls32
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen * / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
4fdf60e22b5340fe35f083b9ea5eb3fd1cf2e742Timo Sirainen * / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
4fdf60e22b5340fe35f083b9ea5eb3fd1cf2e742Timo Sirainen * / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
4fdf60e22b5340fe35f083b9ea5eb3fd1cf2e742Timo Sirainen * / [ *4( h16 ":" ) h16 ] "::" ls32
4fdf60e22b5340fe35f083b9ea5eb3fd1cf2e742Timo Sirainen * / [ *5( h16 ":" ) h16 ] "::" h16
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen * / [ *6( h16 ":" ) h16 ] "::"
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen * h16 = 1*4HEXDIG
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen * ls32 = ( h16 ":" h16 ) / IPv4address
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen * dec-octet = DIGIT ; 0-9
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen * / %x31-39 DIGIT ; 10-99
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen * / "1" 2DIGIT ; 100-199
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen * / "2" %x30-34 DIGIT ; 200-249
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen * / "25" %x30-35 ; 250-255
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen */
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen#define URI_MAX_SCHEME_NAME_LEN 64
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen/* Character lookup table
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen *
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" [bit0]
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
4fdf60e22b5340fe35f083b9ea5eb3fd1cf2e742Timo Sirainen * / "*" / "+" / "," / ";" / "=" [bit1]
4fdf60e22b5340fe35f083b9ea5eb3fd1cf2e742Timo Sirainen * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" [bit2]
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen * pchar = unreserved / sub-delims / ":" / "@" [bit0|bit1|bit3]
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen * 'uchar' = unreserved / sub-delims / ":" [bit0|bit1|bit4]
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen * 'fchar' = pchar / "/" / "?" [bit0|bit1|bit3|bit5]
4fdf60e22b5340fe35f083b9ea5eb3fd1cf2e742Timo Sirainen *
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen */
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainenstatic unsigned const char _uri_char_lookup[256] = {
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 00
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 10
a94936bafd127680184da114c6a177b37ff656e5Timo Sirainen 0, 2, 0, 4, 2, 0, 2, 2, 2, 2, 2, 2, 2, 1, 1, 36, // 20
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 28, 2, 0, 2, 0, 36, // 30
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen 12, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 40
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 0, 4, 0, 1, // 50
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, // 70
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen};
a94936bafd127680184da114c6a177b37ff656e5Timo Sirainen
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainenstatic inline int _decode_hex_digit(const unsigned char digit)
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen{
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen switch (digit) {
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen case '0': case '1': case '2': case '3': case '4':
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen case '5': case '6': case '7': case '8': case '9':
4fdf60e22b5340fe35f083b9ea5eb3fd1cf2e742Timo Sirainen return digit - '0';
4fdf60e22b5340fe35f083b9ea5eb3fd1cf2e742Timo Sirainen
4fdf60e22b5340fe35f083b9ea5eb3fd1cf2e742Timo Sirainen case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen return digit - 'a' + 0x0a;
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen return digit - 'A' + 0x0A;
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen }
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen return -1;
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen}
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen
4fdf60e22b5340fe35f083b9ea5eb3fd1cf2e742Timo Sirainenstatic int
4fdf60e22b5340fe35f083b9ea5eb3fd1cf2e742Timo Sirainenuri_parse_pct_encoded(struct uri_parser *parser, const unsigned char **p,
4fdf60e22b5340fe35f083b9ea5eb3fd1cf2e742Timo Sirainen const unsigned char *pend, unsigned char *ch_r)
4fdf60e22b5340fe35f083b9ea5eb3fd1cf2e742Timo Sirainen{
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen int value;
fe363b433b8038a69b55169da9dca27892ad7d18Timo Sirainen
4fdf60e22b5340fe35f083b9ea5eb3fd1cf2e742Timo Sirainen if (**p == 0 || *(*p+1) == 0 || (pend != NULL && *p+1 >= pend)) {
4fdf60e22b5340fe35f083b9ea5eb3fd1cf2e742Timo Sirainen parser->error = "Unexpected URI boundary after '%'";
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen return -1;
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen }
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen
252db51b6c0a605163326b3ea5d09e9936ca3b29Timo Sirainen if ((value = _decode_hex_digit(**p)) < 0) {
a94936bafd127680184da114c6a177b37ff656e5Timo Sirainen parser->error = t_strdup_printf(
a94936bafd127680184da114c6a177b37ff656e5Timo Sirainen "Expecting hex digit after '%%', but found '%c'", **p);
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen return -1;
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen }
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen
74f810327aca91b3375d3fc963bce8076785b1cbTimo Sirainen *ch_r = (value & 0x0f) << 4;
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen *p += 1;
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen
a94936bafd127680184da114c6a177b37ff656e5Timo Sirainen if ((value = _decode_hex_digit(**p)) < 0) {
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen parser->error = t_strdup_printf(
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen "Expecting hex digit after '%%%c', but found '%c'", *((*p)-1), **p);
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen return -1;
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen }
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen *ch_r |= (value & 0x0f);
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen *p += 1;
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen if (*ch_r == '\0') {
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen parser->error =
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen "Percent encoding is not allowed to encode NUL character";
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen return -1;
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen }
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen return 1;
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen}
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainenstatic int
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainenuri_parse_unreserved_char(struct uri_parser *parser, unsigned char *ch_r)
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen{
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen if (*parser->cur == '%') {
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen parser->cur++;
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen if (uri_parse_pct_encoded(parser, &parser->cur,
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen parser->end, ch_r) <= 0)
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen return -1;
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen return 1;
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen }
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen if ((*parser->cur & 0x80) != 0)
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen return 0;
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen
4fdf60e22b5340fe35f083b9ea5eb3fd1cf2e742Timo Sirainen if (_uri_char_lookup[*parser->cur] & 0x01) {
4fdf60e22b5340fe35f083b9ea5eb3fd1cf2e742Timo Sirainen *ch_r = *parser->cur;
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen parser->cur++;
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen return 1;
4fdf60e22b5340fe35f083b9ea5eb3fd1cf2e742Timo Sirainen }
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen return 0;
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen}
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainenint uri_parse_unreserved(struct uri_parser *parser, string_t *part)
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen{
43d32cbe60fdaef2699d99f1ca259053e9350411Timo Sirainen int len = 0;
a94936bafd127680184da114c6a177b37ff656e5Timo Sirainen
a94936bafd127680184da114c6a177b37ff656e5Timo Sirainen while (parser->cur < parser->end) {
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen int ret;
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen unsigned char ch = 0;
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen if ((ret = uri_parse_unreserved_char(parser, &ch)) < 0)
a94936bafd127680184da114c6a177b37ff656e5Timo Sirainen return -1;
a94936bafd127680184da114c6a177b37ff656e5Timo Sirainen
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen if (ret == 0)
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen break;
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen if (part != NULL)
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen str_append_c(part, ch);
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen len++;
a94936bafd127680184da114c6a177b37ff656e5Timo Sirainen }
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen return len > 0 ? 1 : 0;
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen}
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainenbool uri_data_decode(struct uri_parser *parser, const char *data,
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen const char *until, const char **decoded_r)
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen{
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen const unsigned char *p = (const unsigned char *)data;
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen const unsigned char *pend = (const unsigned char *)until;
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen string_t *decoded;
74f810327aca91b3375d3fc963bce8076785b1cbTimo Sirainen
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen if (pend == NULL) {
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen /* NULL means unlimited; solely rely on '\0' */
fe363b433b8038a69b55169da9dca27892ad7d18Timo Sirainen pend = (const unsigned char *)(size_t)-1;
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen }
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen if (p >= pend || *p == '\0') {
fe363b433b8038a69b55169da9dca27892ad7d18Timo Sirainen if (decoded_r != NULL)
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen *decoded_r = "";
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen return TRUE;
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen }
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen decoded = uri_parser_get_tmpbuf(parser, 256);
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen while (p < pend && *p != '\0') {
fe363b433b8038a69b55169da9dca27892ad7d18Timo Sirainen unsigned char ch;
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen if (*p == '%') {
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen p++;
fe363b433b8038a69b55169da9dca27892ad7d18Timo Sirainen if (uri_parse_pct_encoded(parser, &p, NULL, &ch) <= 0)
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen return FALSE;
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen
a94936bafd127680184da114c6a177b37ff656e5Timo Sirainen str_append_c(decoded, ch);
a94936bafd127680184da114c6a177b37ff656e5Timo Sirainen } else {
a94936bafd127680184da114c6a177b37ff656e5Timo Sirainen str_append_c(decoded, *p);
a94936bafd127680184da114c6a177b37ff656e5Timo Sirainen p++;
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen }
a94936bafd127680184da114c6a177b37ff656e5Timo Sirainen }
a94936bafd127680184da114c6a177b37ff656e5Timo Sirainen
a94936bafd127680184da114c6a177b37ff656e5Timo Sirainen if (decoded_r != NULL)
a94936bafd127680184da114c6a177b37ff656e5Timo Sirainen *decoded_r = t_strdup(str_c(decoded));
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen return TRUE;
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen}
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainenint uri_cut_scheme(const char **uri_p, const char **scheme_r)
252db51b6c0a605163326b3ea5d09e9936ca3b29Timo Sirainen{
252db51b6c0a605163326b3ea5d09e9936ca3b29Timo Sirainen const char *p = *uri_p;
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen size_t len = 1;
/* RFC 3968:
* scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
*/
if (!i_isalpha(*p))
return -1;
p++;
while (len < URI_MAX_SCHEME_NAME_LEN && *p != '\0') {
if (!i_isalnum(*p) && *p != '+' && *p != '-' && *p != '.')
break;
p++;
len++;
}
if (*p != ':')
return -1;
*scheme_r = t_strdup_until(*uri_p, p);
*uri_p = p + 1;
return 0;
}
int uri_parse_scheme(struct uri_parser *parser, const char **scheme_r)
{
const char *p;
if (parser->cur >= parser->end)
return 0;
p = (const char *)parser->cur;
if (uri_cut_scheme(&p, scheme_r) < 0)
return -1;
parser->cur = (const unsigned char *)p;
return 1;
}
static int
uri_parse_dec_octet(struct uri_parser *parser, string_t *literal,
uint8_t *octet_r)
{
uint8_t octet = 0;
int count = 0;
/* RFC 3986:
*
* dec-octet = DIGIT ; 0-9
* / %x31-39 DIGIT ; 10-99
* / "1" 2DIGIT ; 100-199
* / "2" %x30-34 DIGIT ; 200-249
* / "25" %x30-35 ; 250-255
*/
while (parser->cur < parser->end && i_isdigit(*parser->cur)) {
uint8_t prev = octet;
octet = octet * 10 + (parser->cur[0] - '0');
if (octet < prev)
return -1;
if (literal != NULL)
str_append_c(literal, *parser->cur);
parser->cur++;
count++;
}
if (count > 0) {
*octet_r = octet;
return 1;
}
return 0;
}
static int
uri_parse_ipv4address(struct uri_parser *parser, string_t *literal,
struct in_addr *ip4_r)
{
uint8_t octet;
uint32_t ip = 0;
int ret;
int i;
/* RFC 3986:
*
* IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
*/
if ((ret = uri_parse_dec_octet(parser, literal, &octet)) <= 0)
return ret;
ip = octet;
for (i = 0; i < 3 && parser->cur < parser->end; i++) {
if (*parser->cur != '.')
return -1;
if (literal != NULL)
str_append_c(literal, '.');
parser->cur++;
if ((ret = uri_parse_dec_octet(parser, literal, &octet)) <= 0)
return -1;
ip = (ip << 8) + octet;
}
if (ip4_r != NULL)
ip4_r->s_addr = htonl(ip);
return 1;
}
static int uri_parse_reg_name(struct uri_parser *parser, string_t *reg_name)
{
int len = 0;
/* RFC 3986:
*
* reg-name = *( unreserved / pct-encoded / sub-delims )
*/
while (parser->cur < parser->end) {
int ret;
unsigned char c;
/* unreserved / pct-encoded */
if ((ret = uri_parse_unreserved_char(parser, &c)) < 0)
return -1;
if (ret > 0) {
if (reg_name != NULL)
str_append_c(reg_name, c);
len++;
continue;
}
/* sub-delims */
c = *parser->cur;
if ((c & 0x80) == 0 && (_uri_char_lookup[c] & 0x02) != 0) {
if (reg_name != NULL)
str_append_c(reg_name, *parser->cur);
parser->cur++;
len++;
continue;
}
break;
}
return len > 0 ? 1 : 0;
}
#ifdef HAVE_IPV6
static int
uri_parse_ip_literal(struct uri_parser *parser, string_t *literal,
struct in6_addr *ip6_r)
{
const unsigned char *p;
const char *address;
int ret;
/* IP-literal = "[" ( IPv6address / IPvFuture ) "]"
* IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
* IPv6address = ; Syntax not relevant: parsed using inet_pton()
*/
/* "[" already verified */
/* Scan for end of address */
for (p = parser->cur+1; p < parser->end; p++) {
if (*p == ']')
break;
}
if (p >= parser->end || *p != ']') {
parser->error = "Expecting ']' at end of IP-literal";
return -1;
}
if (literal != NULL)
str_append_n(literal, parser->cur, parser->end-parser->cur+1);
address = t_strdup_until(parser->cur+1, p);
parser->cur = p + 1;
if (*address == '\0') {
parser->error = "Empty IPv6 host address";
return -1;
}
if (*address == 'v') {
parser->error = t_strdup_printf(
"Future IP host address '%s' not supported", address);
return -1;
}
if ((ret = inet_pton(AF_INET6, address, ip6_r)) <= 0) {
parser->error = t_strdup_printf(
"Invalid IPv6 host address '%s'", address);
return -1;
}
return 1;
}
#endif
static int uri_parse_host(struct uri_parser *parser, struct uri_authority *auth)
{
const unsigned char *preserve;
struct in_addr ip4;
struct in6_addr ip6;
string_t *literal = NULL;
int ret;
/* RFC 3986:
*
* host = IP-literal / IPv4address / reg-name
*/
literal = uri_parser_get_tmpbuf(parser, 256);
/* IP-literal / */
if (parser->cur < parser->end && *parser->cur == '[') {
#ifdef HAVE_IPV6
if ((ret=uri_parse_ip_literal(parser, literal, &ip6)) <= 0)
return -1;
if (auth != NULL) {
auth->host_literal = t_strdup(str_c(literal));
auth->host_ip.family = AF_INET6;
auth->host_ip.u.ip6 = ip6;
auth->have_host_ip = TRUE;
}
return 1;
#else
parser->error = "IPv6 host address is not supported";
return -1;
#endif
}
/* IPv4address /
*
* If it fails to parse, we try to parse it as a reg-name
*/
preserve = parser->cur;
if ((ret = uri_parse_ipv4address(parser, literal, &ip4)) > 0) {
if (auth != NULL) {
auth->host_literal = t_strdup(str_c(literal));
auth->host_ip.family = AF_INET;
auth->host_ip.u.ip4 = ip4;
auth->have_host_ip = TRUE;
}
return ret;
}
parser->cur = preserve;
str_truncate(literal, 0);
/* reg-name */
if ((ret = uri_parse_reg_name(parser, literal)) != 0) {
if (ret > 0 && auth != NULL) {
auth->host_literal = t_strdup(str_c(literal));
auth->have_host_ip = FALSE;
}
return ret;
}
return 0;
}
static int uri_parse_port(struct uri_parser *parser, struct uri_authority *auth)
{
in_port_t port = 0;
int count = 0;
/* RFC 3986:
*
* port = *DIGIT
*/
while (parser->cur < parser->end && i_isdigit(*parser->cur)) {
in_port_t prev = port;
port = port * 10 + (in_port_t)(parser->cur[0] - '0');
if (port < prev) {
parser->error = "Port number is too high";
return -1;
}
parser->cur++;
count++;
}
if (count > 0) {
if (auth != NULL) {
auth->port = port;
auth->have_port = TRUE;
}
return 1;
}
return 0;
}
int uri_parse_authority(struct uri_parser *parser, struct uri_authority *auth)
{
const unsigned char *p;
int ret;
/* hier-part = "//" authority {...}
* relative-part = "//" authority {...}
* authority = [ userinfo "@" ] host [ ":" port ]
*/
/* Parse "//" as part of authority */
if ((parser->end - parser->cur) <= 2 || parser->cur[0] != '/' ||
parser->cur[1] != '/')
return 0;
parser->cur += 2;
if (auth != NULL)
memset(auth, 0, sizeof(*auth));
/* Scan ahead to check whether there is a [userinfo "@"] uri component */
for (p = parser->cur; p < parser->end; p++){
/* refuse 8bit characters */
if ((*p & 0x80) != 0)
break;
/* break at first delimiter */
if (*p != '%' && (_uri_char_lookup[*p] & 0x13) == 0)
break;
}
/* Extract userinfo */
if (p < parser->end && *p == '@') {
if (auth != NULL)
auth->enc_userinfo = t_strdup_until(parser->cur, p);
parser->cur = p+1;
}
/* host */
if ((ret = uri_parse_host(parser, auth)) <= 0) {
if (ret == 0) {
parser->error = "Missing 'host' component";
return -1;
}
return ret;
}
/* [":" ... */
if (parser->cur >= parser->end || *parser->cur != ':')
return 1;
parser->cur++;
/* ... port] */
if ((ret = uri_parse_port(parser, auth)) < 0)
return ret;
return 1;
}
int uri_parse_path_segment(struct uri_parser *parser, const char **segment_r)
{
const unsigned char *p = parser->cur;
while (p < parser->end) {
if (*p == '%') {
p++;
continue;
}
if ((*p & 0x80) != 0 || (_uri_char_lookup[*p] & 0x0B) == 0)
break;
p++;
}
if (p == parser->cur)
return 0;
if (segment_r != NULL)
*segment_r = t_strdup_until(parser->cur, p);
parser->cur = p;
return 1;
}
int uri_parse_path(struct uri_parser *parser,
int *relative_r, const char *const **path_r)
{
ARRAY_TYPE(const_string) segments;
const char *segment;
unsigned int count;
int relative = 1;
int ret;
t_array_init(&segments, 16);
if (parser->cur < parser->end && *parser->cur == '/') {
parser->cur++;
relative = 0;
}
if ((ret = uri_parse_path_segment(parser, &segment)) < 0)
return -1;
for (;;) {
if (ret > 0) {
/* strip dot segments */
if (segment[0] == '.') {
if (segment[1] == '.') {
/* '..' -> pop last segment (if any) */
count = array_count(&segments);
if (count > 0) {
array_delete(&segments, count-1, 1);
} else if ( relative > 0 ) {
relative++;
}
} else {
/* '.' -> skip */
}
} else {
array_append(&segments, &segment, 1);
}
} else {
segment = "";
array_append(&segments, &segment, 1);
}
if (parser->cur >= parser->end || *parser->cur != '/')
break;
parser->cur++;
if ((ret = uri_parse_path_segment(parser, &segment)) < 0)
return -1;
}
(void)array_append_space(&segments);
*path_r = array_get(&segments, &count);
*relative_r = relative;
return 1;
}
int uri_parse_query(struct uri_parser *parser, const char **query_r)
{
const unsigned char *p = parser->cur;
/* RFC 3986:
*
* URI = { ... } [ "?" query ] { ... }
* query = *( pchar / "/" / "?" )
* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
*/
if (p >= parser->end || *p != '?')
return 0;
p++;
while (p < parser->end) {
if (*p == '%') {
p++;
continue;
}
if ((*p & 0x80) != 0 || (_uri_char_lookup[*p] & 0x2B) == 0)
break;
p++;
}
if (query_r != NULL)
*query_r = t_strdup_until(parser->cur+1, p);
parser->cur = p;
return 1;
}
int uri_parse_fragment(struct uri_parser *parser, const char **fragment_r)
{
const unsigned char *p = parser->cur;
/* RFC 3986:
*
* URI = { ... } [ "#" fragment ]
* fragment = *( pchar / "/" / "?" )
* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
*/
if (p >= parser->end || *p != '#')
return 0;
p++;
while (p < parser->end) {
if (*p == '%') {
p++;
continue;
}
if ((*p & 0x80) != 0 || (_uri_char_lookup[*p] & 0x2B) == 0)
break;
p++;
}
if (fragment_r != NULL)
*fragment_r = t_strdup_until(parser->cur+1, p);
parser->cur = p;
return 1;
}
void uri_parser_init(struct uri_parser *parser, pool_t pool, const char *data)
{
parser->pool = pool;
parser->begin = parser->cur = (unsigned char *)data;
parser->end = (unsigned char *)data + strlen(data);
parser->error = NULL;
parser->tmpbuf = NULL;
}
string_t *uri_parser_get_tmpbuf(struct uri_parser *parser, size_t size)
{
if (parser->tmpbuf == NULL)
parser->tmpbuf = t_str_new(size);
else
str_truncate(parser->tmpbuf, 0);
return parser->tmpbuf;
}