util_uri.c revision 7e79e8fd53348f9fc6e8009a4a2522425ab6f08f
842ae4bd224140319ae7feec1872b93dfd491143fielding/* ====================================================================
842ae4bd224140319ae7feec1872b93dfd491143fielding * The Apache Software License, Version 1.1
842ae4bd224140319ae7feec1872b93dfd491143fielding * Copyright (c) 2000 The Apache Software Foundation. All rights
842ae4bd224140319ae7feec1872b93dfd491143fielding * reserved.
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * Redistribution and use in source and binary forms, with or without
ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dcnd * modification, are permitted provided that the following conditions
ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dcnd * 1. Redistributions of source code must retain the above copyright
ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dcnd * notice, this list of conditions and the following disclaimer.
ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dcnd * 2. Redistributions in binary form must reproduce the above copyright
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * notice, this list of conditions and the following disclaimer in
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * the documentation and/or other materials provided with the
9d129b55f5a43abf43865c6b0eb6dd19bc22aba8ianh * distribution.
9d129b55f5a43abf43865c6b0eb6dd19bc22aba8ianh * 3. The end-user documentation included with the redistribution,
9d129b55f5a43abf43865c6b0eb6dd19bc22aba8ianh * if any, must include the following acknowledgment:
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * "This product includes software developed by the
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * Apache Software Foundation (http://www.apache.org/)."
1b21d7b3d97def358b2e923655edeb16613a1c31gstein * Alternately, this acknowledgment may appear in the software itself,
1b21d7b3d97def358b2e923655edeb16613a1c31gstein * if and wherever such third-party acknowledgments normally appear.
1b21d7b3d97def358b2e923655edeb16613a1c31gstein * 4. The names "Apache" and "Apache Software Foundation" must
1b21d7b3d97def358b2e923655edeb16613a1c31gstein * not be used to endorse or promote products derived from this
1b21d7b3d97def358b2e923655edeb16613a1c31gstein * software without prior written permission. For written
1b21d7b3d97def358b2e923655edeb16613a1c31gstein * permission, please contact apache@apache.org.
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * 5. Products derived from this software may not be called "Apache",
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * nor may "Apache" appear in their name, without prior written
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * permission of the Apache Software Foundation.
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
51af95bb51b5084e883bad250b2afa2838e9ceebfielding * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
d4f1d9c1ff112a8ab9bee31f196973761329b236rbb * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
7fae9cc4639013f3c04c085547256c68814aee8ftrawick * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
7fae9cc4639013f3c04c085547256c68814aee8ftrawick * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
7fae9cc4639013f3c04c085547256c68814aee8ftrawick * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
7fae9cc4639013f3c04c085547256c68814aee8ftrawick * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
7184de27ec1d62a83c41cdeac0953ca9fd661e8csf * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
7184de27ec1d62a83c41cdeac0953ca9fd661e8csf * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
7184de27ec1d62a83c41cdeac0953ca9fd661e8csf * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
36ef8f77bffe75d1aa327882be1b5bdbe2ff567asf * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * SUCH DAMAGE.
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * ====================================================================
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * This software consists of voluntary contributions made by many
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * individuals on behalf of the Apache Software Foundation. For more
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * information on the Apache Software Foundation, please see
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * Portions of this software are based upon public domain software
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * originally written at the National Center for Supercomputing Applications,
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * University of Illinois, Urbana-Champaign.
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * util_uri.c: URI related utility things
785be1b6298010956622771c870ab3cd8ca57a2faaron/* Some WWW schemes and their default ports; this is basically /etc/services */
785be1b6298010956622771c870ab3cd8ca57a2faaron/* This will become global when the protocol abstraction comes */
785be1b6298010956622771c870ab3cd8ca57a2faaron/* As the schemes are searched by a linear search, */
785be1b6298010956622771c870ab3cd8ca57a2faaron/* they are sorted by their expected frequency */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fieldingAPI_EXPORT(unsigned short) ap_default_port_for_scheme(const char *scheme_str)
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding for (scheme = schemes; scheme->name != NULL; ++scheme)
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fieldingAPI_EXPORT(unsigned short) ap_default_port_for_request(const request_rec *r)
e08076ca56e6cb68b30846b9e9339061058aae6dpoirier/* Create a copy of a "struct hostent" record; it was presumably returned
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * from a call to gethostbyname() and lives in static storage.
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * By creating a copy we can tuck it away for later use.
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fieldingAPI_EXPORT(struct hostent *) ap_pduphostent(ap_context_t *p, const struct hostent *hp)
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding int i = 0, j = 0;
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding /* Count number of alias entries */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding /* Count number of in_addr entries */
45acd673a68181802b112e97e84fa3813ddd3ec1stoddard /* Allocate hostent structure, alias ptrs, addr ptrs, addrs */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding newent = (struct hostent *) ap_palloc(p, sizeof(*hp));
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding aliases = (char **) ap_palloc(p, (j+1) * sizeof(char*));
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding ptrs = (char **) ap_palloc(p, (i+1) * sizeof(char*));
e08076ca56e6cb68b30846b9e9339061058aae6dpoirier addrs = (struct in_addr *) ap_palloc(p, (i+1) * sizeof(struct in_addr));
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding /* Copy Alias Names: */
6b38fca3ec543a0f72efd5683e91a0b30fc752d1trawick /* Copy address entries */
785be1b6298010956622771c870ab3cd8ca57a2faaron/* pgethostbyname(): resolve hostname, if successful return an ALLOCATED
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * COPY OF the hostent structure, intended to be stored and used later.
3c290fd0361d6d9d84d97725eaf299456bddd6cfsf * (gethostbyname() uses static storage that would be overwritten on each call)
6b38fca3ec543a0f72efd5683e91a0b30fc752d1trawickAPI_EXPORT(struct hostent *) ap_pgethostbyname(ap_context_t *p, const char *hostname)
e160b861b50a3a8dcc013b8cd3ef849fe777e52fgregames return (hp == NULL) ? NULL : ap_pduphostent(p, hp);
e160b861b50a3a8dcc013b8cd3ef849fe777e52fgregames/* Unparse a uri_components structure to an URI string.
e160b861b50a3a8dcc013b8cd3ef849fe777e52fgregames * Optionally suppress the password for security reasons.
6b38fca3ec543a0f72efd5683e91a0b30fc752d1trawickAPI_EXPORT(char *) ap_unparse_uri_components(ap_context_t *p, const uri_components *uptr, unsigned flags)
560f6ac786d611b858b2bad932713d9e971f0716trawick /* If suppressing the site part, omit both user name & scheme://hostname */
6b38fca3ec543a0f72efd5683e91a0b30fc752d1trawick /* Construct a "user:password@" string, honoring the passed UNP_ flags: */
7bf77d70b6830636bc36e6b76a228c301be23ff7brianp (uptr->user && !(flags & UNP_OMITUSER)) ? uptr->user : "",
6b38fca3ec543a0f72efd5683e91a0b30fc752d1trawick (uptr->password && !(flags & UNP_OMITPASSWORD)) ? ":" : "",
6b38fca3ec543a0f72efd5683e91a0b30fc752d1trawick ? ((flags & UNP_REVEALPASSWORD) ? uptr->password : "XXXXXXXX")
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding /* Construct scheme://site string */
785be1b6298010956622771c870ab3cd8ca57a2faaron uptr->port == ap_default_port_for_scheme(uptr->scheme));
97c78987224dcd037076d393aad1867c26b2c8cftrawick /* Should we suppress all path info? */
97c78987224dcd037076d393aad1867c26b2c8cftrawick /* Append path, query and fragment strings: */
97c78987224dcd037076d393aad1867c26b2c8cftrawick (uptr->query && !(flags & UNP_OMITQUERY)) ? "?" : "",
97c78987224dcd037076d393aad1867c26b2c8cftrawick (uptr->query && !(flags & UNP_OMITQUERY)) ? uptr->query : "",
97c78987224dcd037076d393aad1867c26b2c8cftrawick (uptr->fragment && !(flags & UNP_OMITQUERY)) ? "#" : NULL,
97c78987224dcd037076d393aad1867c26b2c8cftrawick (uptr->fragment && !(flags & UNP_OMITQUERY)) ? uptr->fragment : NULL,
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding/* The regex version of parse_uri_components has the advantage that it is
785be1b6298010956622771c870ab3cd8ca57a2faaron * relatively easy to understand and extend. But it has the disadvantage
785be1b6298010956622771c870ab3cd8ca57a2faaron * that the regexes are complex enough that regex libraries really
785be1b6298010956622771c870ab3cd8ca57a2faaron * don't do a great job with them performancewise.
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * The default is a hand coded scanner that is two orders of magnitude
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding const char *re_str;
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding /* This is a modified version of the regex that appeared in
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * draft-fielding-uri-syntax-01. It doesnt allow the uri to contain a
785be1b6298010956622771c870ab3cd8ca57a2faaron * scheme but no hostinfo or vice versa.
785be1b6298010956622771c870ab3cd8ca57a2faaron * draft-fielding-uri-syntax-01.txt, section 4.4 tells us:
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * Although the BNF defines what is allowed in each component, it is
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * ambiguous in terms of differentiating between a site component and
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * a path component that begins with two slash characters.
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * RFC2068 disambiguates this for the Request-URI, which may only ever be
c5d006b2861d49c61bcf79316163e30611c6fd08trawick * the "abs_path" portion of the URI. So a request "GET //foo/bar
c5d006b2861d49c61bcf79316163e30611c6fd08trawick * HTTP/1.1" is really referring to the path //foo/bar, not the host foo,
c5d006b2861d49c61bcf79316163e30611c6fd08trawick * path /bar. Nowhere in RFC2068 is it possible to have a scheme but no
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * hostinfo or a hostinfo but no scheme. (Unless you're proxying a
7697b1b7376a532163c621e050b70c90dcb15d66covener * protocol other than HTTP, but this parsing engine probably won't work
5bfaaf573bacb45c1cf290ce85ecc676587e8a64jim * for other protocols.)
5bfaaf573bacb45c1cf290ce85ecc676587e8a64jim * 12 3 4 5 6 7 8 */
5bfaaf573bacb45c1cf290ce85ecc676587e8a64jim re_str = "^(([^:/?#]+)://([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?$";
5bfaaf573bacb45c1cf290ce85ecc676587e8a64jim /* ^scheme--^ ^site---^ ^path--^ ^query^ ^frag */
7697b1b7376a532163c621e050b70c90dcb15d66covener if ((ret = regcomp(&re_uri, re_str, REG_EXTENDED)) != 0) {
7697b1b7376a532163c621e050b70c90dcb15d66covener /* Make a readable error message */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding ap_log_error(APLOG_MARK, APLOG_NOERRNO|APLOG_ERR, NULL,
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding "Internal error: regcomp(\"%s\") returned non-zero (%s) - "
f4b96a996afbc46872f57ad1450e6ee1c8f13707jorton "possibly due to broken regex lib! "
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding "Did you define WANTHSREGEX=yes?",
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding /* This is a sub-RE which will break down the hostinfo part,
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * i.e., user, password, hostname and port.
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * $ 12 3 4 5 6 7 */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding re_str = "^(([^:]*)(:(.*))?@)?([^@:]*)(:([0-9]*))?$";
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding /* ^^user^ :pw ^host^ ^:[port]^ */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding if ((ret = regcomp(&re_hostpart, re_str, REG_EXTENDED)) != 0) {
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding /* Make a readable error message */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding ret = regerror(ret, &re_hostpart, line, sizeof line);
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding ap_log_error(APLOG_MARK, APLOG_NOERRNO|APLOG_ERR, NULL,
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding "Internal error: regcomp(\"%s\") returned non-zero (%s) - "
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding "possibly due to broken regex lib! "
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding "Did you define WANTHSREGEX=yes?",
785be1b6298010956622771c870ab3cd8ca57a2faaron/* parse_uri_components():
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * Parse a given URI, fill in all supplied fields of a uri_components
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * structure. This eliminates the necessity of extracting host, port,
1ccd992d37d62c8cb2056126f2234f64ec189bfddougm * path, query info repeatedly in the modules.
785be1b6298010956622771c870ab3cd8ca57a2faaron * Side effects:
785be1b6298010956622771c870ab3cd8ca57a2faaron * - fills in fields of uri_components *uptr
785be1b6298010956622771c870ab3cd8ca57a2faaron * - none on any of the r->* fields
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fieldingAPI_EXPORT(int) ap_parse_uri_components(ap_context_t *p, const char *uri, uri_components *uptr)
785be1b6298010956622771c870ab3cd8ca57a2faaron regmatch_t match[10]; /* This must have at least as much elements
785be1b6298010956622771c870ab3cd8ca57a2faaron * as there are braces in the re_strings */
785be1b6298010956622771c870ab3cd8ca57a2faaron /* Initialize the structure. parse_uri() and parse_uri_components()
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * can be called more than once per request.
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding ret = ap_regexec(&re_uri, uri, re_uri.re_nsub + 1, match, 0);
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding if (ret != 0) {
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding ap_log_error(APLOG_MARK, APLOG_NOERRNO|APLOG_ERR, NULL,
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding "ap_regexec() could not parse uri (\"%s\")",
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding uptr->scheme = ap_pstrndup (p, uri+match[2].rm_so, match[2].rm_eo - match[2].rm_so);
a7ed9c525f9460187f327cea953bf90ecf1bdc51gstein /* empty hostinfo is valid, that's why we test $1 but use $3 */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding uptr->hostinfo = ap_pstrndup (p, uri+match[3].rm_so, match[3].rm_eo - match[3].rm_so);
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding uptr->path = ap_pstrndup (p, uri+match[4].rm_so, match[4].rm_eo - match[4].rm_so);
17f3ba69f65182426ad4e568bb2d6f192ccd2ed5trawick /* empty query string is valid, that's why we test $5 but use $6 */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding uptr->query = ap_pstrndup (p, uri+match[6].rm_so, match[6].rm_eo - match[6].rm_so);
17f3ba69f65182426ad4e568bb2d6f192ccd2ed5trawick /* empty fragment is valid, test $7 use $8 */
17f3ba69f65182426ad4e568bb2d6f192ccd2ed5trawick uptr->fragment = ap_pstrndup (p, uri+match[8].rm_so, match[8].rm_eo - match[8].rm_so);
17f3ba69f65182426ad4e568bb2d6f192ccd2ed5trawick /* Parse the hostinfo part to extract user, password, host, and port */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding ret = ap_regexec(&re_hostpart, uptr->hostinfo, re_hostpart.re_nsub + 1, match, 0);
1ccd992d37d62c8cb2056126f2234f64ec189bfddougm if (ret != 0) {
785be1b6298010956622771c870ab3cd8ca57a2faaron ap_log_error(APLOG_MARK, APLOG_NOERRNO|APLOG_ERR, NULL,
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding "ap_regexec() could not parse (\"%s\") as host part",
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding /* $ 12 3 4 5 6 7 */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding /* "^(([^:]*)(:(.*))?@)?([^@:]*)(:([0-9]*))?$" */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding /* ^^user^ :pw ^host^ ^:[port]^ */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding /* empty user is valid, that's why we test $1 but use $2 */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding uptr->user = ap_pstrndup (p, uptr->hostinfo+match[2].rm_so, match[2].rm_eo - match[2].rm_so);
785be1b6298010956622771c870ab3cd8ca57a2faaron /* empty password is valid, test $3 but use $4 */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding uptr->password = ap_pstrndup (p, uptr->hostinfo+match[4].rm_so, match[4].rm_eo - match[4].rm_so);
1ccd992d37d62c8cb2056126f2234f64ec189bfddougm /* empty hostname is valid, and implied by the existence of hostinfo */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding uptr->hostname = ap_pstrndup (p, uptr->hostinfo+match[5].rm_so, match[5].rm_eo - match[5].rm_so);
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding /* Note that the port string can be empty.
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * If it is, we use the default port associated with the scheme
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding uptr->port_str = ap_pstrndup (p, uptr->hostinfo+match[7].rm_so, match[7].rm_eo - match[7].rm_so);
17f3ba69f65182426ad4e568bb2d6f192ccd2ed5trawick /* Invalid characters after ':' found */
27c5ebb7d411a214f5b6b55a881086ce086d3dd3covener uptr->port = uptr->scheme ? ap_default_port_for_scheme(uptr->scheme) : DEFAULT_HTTP_PORT;
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding/* Here is the hand-optimized parse_uri_components(). There are some wild
27c5ebb7d411a214f5b6b55a881086ce086d3dd3covener * tricks we could pull in assembly language that we don't pull here... like we
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * can do word-at-time scans for delimiter characters using the same technique
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * that fast memchr()s use. But that would be way non-portable. -djg
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding/* We have a ap_table_t that we can index by character and it tells us if the
27c5ebb7d411a214f5b6b55a881086ce086d3dd3covener * character is one of the interesting delimiters. Note that we even get
27c5ebb7d411a214f5b6b55a881086ce086d3dd3covener * compares for NUL for free -- it's just another delimiter.
27c5ebb7d411a214f5b6b55a881086ce086d3dd3covener/* the uri_delims.h file is autogenerated by gen_uri_delims.c */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding/* it works like this:
27c5ebb7d411a214f5b6b55a881086ce086d3dd3covener if (uri_delims[ch] & NOTEND_foobar) {
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding then we're not at a delimiter for foobar
7c6f514f2ef9b98f58b8f8a5f534eb78a75f29f2jorton/* Note that we optimize the scheme scanning here, we cheat and let the
7c6f514f2ef9b98f58b8f8a5f534eb78a75f29f2jorton * compiler know that it doesn't have to do the & masking.
7c6f514f2ef9b98f58b8f8a5f534eb78a75f29f2jorton#define NOTEND_HOSTINFO (T_SLASH | T_QUESTION | T_HASH | T_NUL)
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding /* nothing to do */
7c6f514f2ef9b98f58b8f8a5f534eb78a75f29f2jorton/* parse_uri_components():
7c6f514f2ef9b98f58b8f8a5f534eb78a75f29f2jorton * Parse a given URI, fill in all supplied fields of a uri_components
785be1b6298010956622771c870ab3cd8ca57a2faaron * structure. This eliminates the necessity of extracting host, port,
785be1b6298010956622771c870ab3cd8ca57a2faaron * path, query info repeatedly in the modules.
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf * Side effects:
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf * - fills in fields of uri_components *uptr
785be1b6298010956622771c870ab3cd8ca57a2faaron * - none on any of the r->* fields
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanfAPI_EXPORT(int) ap_parse_uri_components(ap_context_t *p, const char *uri, uri_components *uptr)
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf const char *s;
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf const char *s1;
785be1b6298010956622771c870ab3cd8ca57a2faaron const char *hostinfo;
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf /* Initialize the structure. parse_uri() and parse_uri_components()
066877f1a045103acfdd376d48cdd473c33f409bdougm * can be called more than once per request.
785be1b6298010956622771c870ab3cd8ca57a2faaron /* We assume the processor has a branch predictor like most --
785be1b6298010956622771c870ab3cd8ca57a2faaron * it assumes forward branches are untaken and backwards are taken. That's
785be1b6298010956622771c870ab3cd8ca57a2faaron * the reason for the gotos. -djg
785be1b6298010956622771c870ab3cd8ca57a2faaron /* we expect uri to point to first character of path ... remember
785be1b6298010956622771c870ab3cd8ca57a2faaron * that the path could be empty -- http://foobar?query for example
b6d9e9d6421b9cebfc74f9c1a870b8b85473f1c1poirier while ((uri_delims[*(unsigned char *)s] & NOTEND_PATH) == 0) {
b6d9e9d6421b9cebfc74f9c1a870b8b85473f1c1poirier if (s != uri) {
b6d9e9d6421b9cebfc74f9c1a870b8b85473f1c1poirier if (*s == 0) {
b6d9e9d6421b9cebfc74f9c1a870b8b85473f1c1poirier if (*s == '?') {
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf /* otherwise it's a fragment */
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf /* find the scheme: */
7697b1b7376a532163c621e050b70c90dcb15d66covener while ((uri_delims[*(unsigned char *)s] & NOTEND_SCHEME) == 0) {
785be1b6298010956622771c870ab3cd8ca57a2faaron /* scheme must be non-empty and followed by :// */
785be1b6298010956622771c870ab3cd8ca57a2faaron if (s == uri || s[0] != ':' || s[1] != '/' || s[2] != '/') {
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf while ((uri_delims[*(unsigned char *)s] & NOTEND_HOSTINFO) == 0) {
7697b1b7376a532163c621e050b70c90dcb15d66covener uri = s; /* whatever follows hostinfo is start of uri */
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf uptr->hostinfo = ap_pstrndup(p, hostinfo, uri - hostinfo);
7697b1b7376a532163c621e050b70c90dcb15d66covener /* If there's a username:password@host:port, the @ we want is the last @...
7697b1b7376a532163c621e050b70c90dcb15d66covener * too bad there's no memrchr()... For the C purists, note that hostinfo
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf * is definately not the first character of the original uri so therefore
7697b1b7376a532163c621e050b70c90dcb15d66covener * &hostinfo[-1] < &hostinfo[0] ... and this loop is valid C.
7697b1b7376a532163c621e050b70c90dcb15d66covener /* again we want the common case to be fall through */
7697b1b7376a532163c621e050b70c90dcb15d66covener /* We expect hostinfo to point to the first character of
5bfaaf573bacb45c1cf290ce85ecc676587e8a64jim * the hostname. If there's a port it is the first colon.
5bfaaf573bacb45c1cf290ce85ecc676587e8a64jim if (s == NULL) {
7697b1b7376a532163c621e050b70c90dcb15d66covener /* we expect the common case to have no port */
7697b1b7376a532163c621e050b70c90dcb15d66covener uptr->hostname = ap_pstrndup(p, hostinfo, uri - hostinfo);
7697b1b7376a532163c621e050b70c90dcb15d66covener uptr->hostname = ap_pstrndup(p, hostinfo, s - hostinfo);
7697b1b7376a532163c621e050b70c90dcb15d66covener if (uri != s) {
7697b1b7376a532163c621e050b70c90dcb15d66covener /* Invalid characters after ':' found */
7697b1b7376a532163c621e050b70c90dcb15d66covener uptr->port = ap_default_port_for_scheme(uptr->scheme);
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding /* first colon delimits username:password */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding uptr->user = ap_pstrndup(p, hostinfo, s1 - hostinfo);
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding uptr->user = ap_pstrndup(p, hostinfo, s - hostinfo);
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding/* Special case for CONNECT parsing: it comes with the hostinfo part only */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding/* See the INTERNET-DRAFT document "Tunneling SSL Through a WWW Proxy"
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * currently at http://www.mcom.com/newsref/std/tunneling_ssl.html
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * for the format of the "CONNECT host:port HTTP/1.0" request
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fieldingAPI_EXPORT(int) ap_parse_hostinfo_components(ap_context_t *p, const char *hostinfo, uri_components *uptr)
f4b96a996afbc46872f57ad1450e6ee1c8f13707jorton const char *s;
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding /* Initialize the structure. parse_uri() and parse_uri_components()
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * can be called more than once per request.
785be1b6298010956622771c870ab3cd8ca57a2faaron /* We expect hostinfo to point to the first character of
900127764fb985c340ee4979cac97146a330c694trawick * the hostname. There must be a port, separated by a colon
785be1b6298010956622771c870ab3cd8ca57a2faaron if (s == NULL) {
9ea14ade0d235bec11e6c221b888a6630a0be849covener uptr->hostname = ap_pstrndup(p, hostinfo, s - hostinfo);
9ea14ade0d235bec11e6c221b888a6630a0be849covener if (*s != '\0') {
7697b1b7376a532163c621e050b70c90dcb15d66covener /* Invalid characters after ':' found */