#ifndef URI_UTIL_H
#define URI_UTIL_H
#include "net.h"
/*
* Generic URI parsing.
*/
enum uri_parse_flags {
/* Scheme part 'scheme:' is already parsed externally. */
URI_PARSE_SCHEME_EXTERNAL = BIT(0),
/* Allow '#fragment' part in URI */
URI_PARSE_ALLOW_FRAGMENT_PART = BIT(1),
};
struct uri_host {
const char *name;
struct ip_addr ip;
};
struct uri_authority {
/* encoded userinfo part; e.g. "user:pass" */
const char *enc_userinfo;
struct uri_host host;
in_port_t port; /* 0 means no port specified */
};
struct uri_parser {
pool_t pool;
const char *error;
const unsigned char *begin, *cur, *end;
string_t *tmpbuf;
bool allow_pct_nul:1;
};
/* parse one instance of percent encoding. Returns 1 for success,
0 if none is preset at the current parser position, and -1 in
case of error. The decoded character is returned in ch_r upon
success */
int uri_parse_pct_encoded(struct uri_parser *parser,
unsigned char *ch_r);
/* parse characters as long as these comply with the the 'unreserved'
syntax. Returns 1 if characters were found, 0 if none were found,
and -1 if there was an error */
int uri_parse_unreserved(struct uri_parser *parser, string_t *part);
/* the same as uri_parse_unreserved(), but the allowed characters are
extended to 'unreserved / pct-encoded', meaning that percent encoding
is allowed */
int uri_parse_unreserved_pct(struct uri_parser *parser, string_t *part);
/* decode percent-encoded data from the 'data' parameter, up until the
'until' parameter. If the latter is NULL, data is decoded up until the
'\0' character. The decoded data is allocated on the parser pool and
returned in decoded_r. Any errors are written to the parser object. */
bool uri_data_decode(struct uri_parser *parser, const char *data,
const char *until, const char **decoded_r) ATTR_NULL(3);
/* cut the 'scheme ":"' part from the URI. The uri_p pointer is updated to
point just past the ":". Returns 0 on success and -1 on error. The
result is returned in the scheme_r parameter. This can be NULL to use
this function for merely checking the presence of a valid scheme. */
int uri_cut_scheme(const char **uri_p, const char **scheme_r)
ATTR_NULL(2);
/* parse the URI 'scheme ":"' part. Returns 1 if successful, 0 if the first
character is not valid for a scheme, and -1 in case of error. The
result parameter scheme_r can be NULL to use this function for merely
checking the presence of a valid scheme. */
int uri_parse_scheme(struct uri_parser *parser, const char **scheme_r)
ATTR_NULL(2);
/* parse the URI 'reg-name' syntax. Returns 1 if successful, 0 if the first
character is not valid for a host name, and -1 in case of error. The
result parameter reg_name_r can be NULL to use this function for merely
checking the presence of a valid host name. The result is allocated from
the data stack.
*/
int uri_parse_reg_name(struct uri_parser *parser,
const char **reg_name_r) ATTR_NULL(2);
/* parse the URI 'reg-name' part as an Internet host name, which is a
sequence of domain name labels separated by '.', as defined in
Section 3.5 of RFC 1034 and Section 2.1 of RFC 1123. Returns 1 if
successful, 0 if the first character is not valid for a host name,
and -1 in case of error. The result parameter host_name_r can be NULL
to use this function for merely checking the presence of a valid host
name. The result is allocated from the data stack.
*/
int uri_parse_host_name(struct uri_parser *parser,
const char **host_name_r) ATTR_NULL(2);
/* parse the URI 'host' syntax, which is either an IP address literal or
a an Internet host name, as defined in Section 3.5 of RFC 1034 and
Section 2.1 of RFC 1123. An IP address literal is always allowed.
Returns 1 if successful, 0 if the first character is not valid for a
host name, and -1 in case of error. The provided host struct is filled
in with the parsed data, all allocated from the parser pool. The host
parameter can be NULL to use this function for merely checking for
valid 'host' syntax.
*/
int uri_parse_host(struct uri_parser *parser,
struct uri_host *host) ATTR_NULL(2);
/* parse the URI 'authority' syntax. Returns 1 if successful, 0 if the
first character is not valid for the 'authority' syntax and -1 in case
of error. The provided uri_authority struct is filled in with the parsed
data, all allocated from the parser pool. The auth parameter can be
NULL to use this function for merely checking for valid 'authority'
syntax.
*/
int uri_parse_authority(struct uri_parser *parser,
struct uri_authority *auth) ATTR_NULL(2);
/* identical to uri_parse_authority(), except that this function parses
'"//" authority', rather than 'authority'.
*/
int uri_parse_slashslash_authority(struct uri_parser *parser,
struct uri_authority *auth) ATTR_NULL(2);
/* identical to uri_parse_authority(), except that this function parses
the registered name ('reg-name' syntax) as an Internet host name, as
defined in Section 3.5 of RFC 1034 and Section 2.1 of RFC 1123.
*/
int uri_parse_host_authority(struct uri_parser *parser,
struct uri_authority *auth) ATTR_NULL(2);
/* identical to uri_parse_slashslash_authority(), except that this
function parses the registered name ('reg-name' syntax) as an Internet
host name, as defined in Section 3.5 of RFC 1034 and Section 2.1 of
RFC 1123.
*/
int uri_parse_slashslash_host_authority(struct uri_parser *parser,
struct uri_authority *auth) ATTR_NULL(2);
/* parse the URI 'segment' syntax. Returns 1 if successful, 0 if the first
character is not valid for the 'segment' syntax and -1 in case of
error. The result is allocated from the parser pool. Percent encoding is
not decoded in the result. The result parameter can be NULL to use this
function for merely checking for valid 'segment' syntax.
*/
int uri_parse_path_segment(struct uri_parser *parser,
const char **segment_r) ATTR_NULL(2);
/* parse the URI 'path' syntax. This also resolves '..' and '.' segments in
the path. If the path is relative, the relative_r parameter indicates
how many segments the base path must be moved towards root (as caused by
leading '..' segments). Returns 1 if successful, 0 if the first character
is not valid for the 'segment' syntax and -1 in case of error. The result
is a NULL-terminated string list allocated from the parser pool. Percent
encoding is not decoded in the result. The result parameter can be NULL
to use this function for merely checking for valid 'path' syntax.
*/
int uri_parse_path(struct uri_parser *parser, int *relative_r,
const char *const **path_r) ATTR_NULL(2,3);
/* parse the URI 'query' syntax. Returns 1 if successful, 0 if the first
character is not valid for the 'query' syntax and -1 in case of
error. The result is allocated from the parser pool. Percent encoding is
not decoded in the result. The result parameter can be NULL to use this
function for merely checking for valid 'query' syntax.
*/
int uri_parse_query(struct uri_parser *parser,
const char **query_r) ATTR_NULL(2);
/* parse the URI 'fragment' syntax. Returns 1 if successful, 0 if the first
character is not valid for the 'fragment' syntax and -1 in case of
error. The result is allocated from the parser pool. Percent encoding is
not decoded in the result. The result parameter can be NULL to use this
function for merely checking for valid 'fragment' syntax.
*/
int uri_parse_fragment(struct uri_parser *parser,
const char **fragment_r) ATTR_NULL(2);
/* initialize the URI parser with the provided data */
void uri_parser_init_data(struct uri_parser *parser,
pool_t pool, const unsigned char *data, size_t size);
/* initialize the URI parser with the provided '\0'-terminated string */
void uri_parser_init(struct uri_parser *parser,
pool_t pool, const char *uri);
/* returns the temporary buffer associated with this parser. Can be used
for higher-level parsing activities. */
string_t *uri_parser_get_tmpbuf(struct uri_parser *parser,
size_t size);
/* Parse a generic (RFC3986) absolute URI for validity.
Returns 0 if valid and -1 otherwise. Note that some URI formats like
"sip", "aix" and "aaa" violate RFC3986 and will currently fail with
this function.
*/
int uri_parse_absolute_generic(struct uri_parser *parser,
enum uri_parse_flags flags);
/*
* Generic URI manipulation
*/
/* copy uri_host struct from src to dest and allocate it on pool */
void uri_host_copy(pool_t pool, struct uri_host *dest,
const struct uri_host *src);
/*
* Generic URI validation
*/
/* Check whether the provided data is a valid absolute RFC3986 URI.
Returns 0 if valid and -1 otherwise. */
int uri_check_data(const unsigned char *data, size_t size,
enum uri_parse_flags flags, const char **error_r);
/* Check whether the provided string is a valid absolute RFC3986 URI.
Returns 0 if valid and -1 otherwise. */
int uri_check(const char *uri, enum uri_parse_flags,
const char **error_r);
/*
* Generic URI construction
*/
/* encodes the '\0'-terminated data using the percent encoding. The
esc_table is a 256 byte lookup table. If none of the esc_mask bits are
set at the character's position in the esc_table, a character needs
to be encoded. Also, when esc_extra contains a character, it needs to
be encoded. All other characters are copied verbatim to the out buffer.
*/
void uri_data_encode(string_t *out,
const unsigned char esc_table[256],
unsigned char esc_mask, const char *esc_extra,
const char *data) ATTR_NULL(4);
/* append the provided scheme to the out buffer */
void uri_append_scheme(string_t *out, const char *scheme);
/* append partial user data (i.e. some part of what comes before '@') to
the out buffer. No '@' is produced. Characters are percent-encoded when
necessary. Characters in esc are always percent-encoded, even when these
are valid 'userinfo' characters. */
void uri_append_user_data(string_t *out,
const char *esc, const char *data) ATTR_NULL(2);
/* append userinfo and '@' to the out buffer. Characters in userinfo are
percent-encoded when necessary.*/
void uri_append_userinfo(string_t *out, const char *userinfo);
/* append the host name to the out buffer. Characters are percent-encoded
when necessary.*/
void uri_append_host_name(string_t *out, const char *name);
/* append the host IP address to the out buffer. */
void uri_append_host_ip(string_t *out, const struct ip_addr *host_ip);
/* encode the URI host struct to the out buffer. */
void uri_append_host(string_t *out, const struct uri_host *host);
/* append the port to the out buffer. */
void uri_append_port(string_t *out, in_port_t port);
/* append partial path segment data to the out buffer. No '/' is produced.
Characters are percent-encoded when necessary. Characters in esc are
always percent-encoded, even when these are valid 'segment' characters.
*/
void uri_append_path_segment_data(string_t *out,
const char *esc, const char *data) ATTR_NULL(2);
/* append a full path segment to the out buffer. A leading '/' is
produced. Characters are percent-encoded when necessary. */
void uri_append_path_segment(string_t *out, const char *segment);
/* append partial path data to the out buffer. The data may include '/',
which is not encoded. Characters are percent-encoded when necessary.
Characters in esc are always percent-encoded, even when these are
valid 'path' characters.*/
void uri_append_path_data(string_t *out,
const char *esc, const char *data) ATTR_NULL(2);
/* append a full path to the out buffer. A leading '/' is produced. The
data may include more '/', which is not encoded. Characters are
percent-encoded when necessary.
*/
void uri_append_path(string_t *out, const char *path);
/* append partial query data to the out buffer. No leading '?' is
produced. Characters are percent-encoded when necessary. Characters
in esc are always percent-encoded, even when these are valid 'query'
characters.*/
void uri_append_query_data(string_t *out,
const char *esc, const char *data) ATTR_NULL(2);
/* append a full URI query part to the out buffer. A leading '?' is
produced. Characters are percent-encoded when necessary. */
void uri_append_query(string_t *out, const char *query);
/* append partial fragment data to the out buffer. No leading '#' is
produced. Characters are percent-encoded when necessary. Characters
in esc are always percent-encoded, even when these are valid
'fragment' characters.*/
void uri_append_fragment_data(string_t *out,
const char *esc, const char *data) ATTR_NULL(2);
/* append a full URI fragment part to the out buffer. A leading '#' is
produced. Characters are percent-encoded when necessary. */
void uri_append_fragment(string_t *out, const char *fragment);
#endif