util_uri.c revision 7e79e8fd53348f9fc6e8009a4a2522425ab6f08f
842ae4bd224140319ae7feec1872b93dfd491143fielding/* ====================================================================
842ae4bd224140319ae7feec1872b93dfd491143fielding * The Apache Software License, Version 1.1
842ae4bd224140319ae7feec1872b93dfd491143fielding *
842ae4bd224140319ae7feec1872b93dfd491143fielding * Copyright (c) 2000 The Apache Software Foundation. All rights
842ae4bd224140319ae7feec1872b93dfd491143fielding * reserved.
842ae4bd224140319ae7feec1872b93dfd491143fielding *
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * Redistribution and use in source and binary forms, with or without
ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dcnd * modification, are permitted provided that the following conditions
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * are met:
ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dcnd *
ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dcnd * 1. Redistributions of source code must retain the above copyright
ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dcnd * notice, this list of conditions and the following disclaimer.
ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dcnd *
ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dcnd * 2. Redistributions in binary form must reproduce the above copyright
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * notice, this list of conditions and the following disclaimer in
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * the documentation and/or other materials provided with the
9d129b55f5a43abf43865c6b0eb6dd19bc22aba8ianh * distribution.
e8f95a682820a599fe41b22977010636be5c2717jim *
9d129b55f5a43abf43865c6b0eb6dd19bc22aba8ianh * 3. The end-user documentation included with the redistribution,
9d129b55f5a43abf43865c6b0eb6dd19bc22aba8ianh * if any, must include the following acknowledgment:
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * "This product includes software developed by the
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * Apache Software Foundation (http://www.apache.org/)."
1b21d7b3d97def358b2e923655edeb16613a1c31gstein * Alternately, this acknowledgment may appear in the software itself,
1b21d7b3d97def358b2e923655edeb16613a1c31gstein * if and wherever such third-party acknowledgments normally appear.
1b21d7b3d97def358b2e923655edeb16613a1c31gstein *
1b21d7b3d97def358b2e923655edeb16613a1c31gstein * 4. The names "Apache" and "Apache Software Foundation" must
1b21d7b3d97def358b2e923655edeb16613a1c31gstein * not be used to endorse or promote products derived from this
1b21d7b3d97def358b2e923655edeb16613a1c31gstein * software without prior written permission. For written
1b21d7b3d97def358b2e923655edeb16613a1c31gstein * permission, please contact apache@apache.org.
2d71630471d1c23f0137309e3c3957c633ecbfd6rbb *
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * 5. Products derived from this software may not be called "Apache",
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * nor may "Apache" appear in their name, without prior written
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * permission of the Apache Software Foundation.
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding *
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
51af95bb51b5084e883bad250b2afa2838e9ceebfielding * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
d4f1d9c1ff112a8ab9bee31f196973761329b236rbb * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
7fae9cc4639013f3c04c085547256c68814aee8ftrawick * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
7fae9cc4639013f3c04c085547256c68814aee8ftrawick * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
7fae9cc4639013f3c04c085547256c68814aee8ftrawick * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
7fae9cc4639013f3c04c085547256c68814aee8ftrawick * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
7184de27ec1d62a83c41cdeac0953ca9fd661e8csf * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
7184de27ec1d62a83c41cdeac0953ca9fd661e8csf * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
7184de27ec1d62a83c41cdeac0953ca9fd661e8csf * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
36ef8f77bffe75d1aa327882be1b5bdbe2ff567asf * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * SUCH DAMAGE.
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * ====================================================================
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding *
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * This software consists of voluntary contributions made by many
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * individuals on behalf of the Apache Software Foundation. For more
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * information on the Apache Software Foundation, please see
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * <http://www.apache.org/>.
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding *
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * Portions of this software are based upon public domain software
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * originally written at the National Center for Supercomputing Applications,
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * University of Illinois, Urbana-Champaign.
785be1b6298010956622771c870ab3cd8ca57a2faaron */
785be1b6298010956622771c870ab3cd8ca57a2faaron
785be1b6298010956622771c870ab3cd8ca57a2faaron/*
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * util_uri.c: URI related utility things
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding *
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding#include "ap_config.h"
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding#include "httpd.h"
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding#include "http_log.h"
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding#include "util_uri.h"
785be1b6298010956622771c870ab3cd8ca57a2faaron#include <string.h>
785be1b6298010956622771c870ab3cd8ca57a2faaron
785be1b6298010956622771c870ab3cd8ca57a2faaron/* Some WWW schemes and their default ports; this is basically /etc/services */
785be1b6298010956622771c870ab3cd8ca57a2faaron/* This will become global when the protocol abstraction comes */
785be1b6298010956622771c870ab3cd8ca57a2faaron/* As the schemes are searched by a linear search, */
785be1b6298010956622771c870ab3cd8ca57a2faaron/* they are sorted by their expected frequency */
5bfaaf573bacb45c1cf290ce85ecc676587e8a64jimstatic schemes_t schemes[] =
7697b1b7376a532163c621e050b70c90dcb15d66covener{
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding {"http", DEFAULT_HTTP_PORT},
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding {"ftp", DEFAULT_FTP_PORT},
f4b96a996afbc46872f57ad1450e6ee1c8f13707jorton {"https", DEFAULT_HTTPS_PORT},
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding {"gopher", DEFAULT_GOPHER_PORT},
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding {"wais", DEFAULT_WAIS_PORT},
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding {"nntp", DEFAULT_NNTP_PORT},
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding {"snews", DEFAULT_SNEWS_PORT},
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding {"prospero", DEFAULT_PROSPERO_PORT},
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding { NULL, 0xFFFF } /* unknown port */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding};
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fieldingAPI_EXPORT(unsigned short) ap_default_port_for_scheme(const char *scheme_str)
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding{
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding schemes_t *scheme;
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding for (scheme = schemes; scheme->name != NULL; ++scheme)
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding if (strcasecmp(scheme_str, scheme->name) == 0)
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding return scheme->default_port;
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding return 0;
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding}
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fieldingAPI_EXPORT(unsigned short) ap_default_port_for_request(const request_rec *r)
e08076ca56e6cb68b30846b9e9339061058aae6dpoirier{
e08076ca56e6cb68b30846b9e9339061058aae6dpoirier return (r->parsed_uri.scheme)
e08076ca56e6cb68b30846b9e9339061058aae6dpoirier ? ap_default_port_for_scheme(r->parsed_uri.scheme)
e08076ca56e6cb68b30846b9e9339061058aae6dpoirier : 0;
e08076ca56e6cb68b30846b9e9339061058aae6dpoirier}
e08076ca56e6cb68b30846b9e9339061058aae6dpoirier
e08076ca56e6cb68b30846b9e9339061058aae6dpoirier/* Create a copy of a "struct hostent" record; it was presumably returned
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * from a call to gethostbyname() and lives in static storage.
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * By creating a copy we can tuck it away for later use.
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fieldingAPI_EXPORT(struct hostent *) ap_pduphostent(ap_context_t *p, const struct hostent *hp)
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding{
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding struct hostent *newent;
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding char **ptrs;
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding char **aliases;
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding struct in_addr *addrs;
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding int i = 0, j = 0;
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding if (hp == NULL)
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding return NULL;
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding /* Count number of alias entries */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding if (hp->h_aliases != NULL)
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding for (; hp->h_aliases[j] != NULL; ++j)
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding continue;
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding /* Count number of in_addr entries */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding if (hp->h_addr_list != NULL)
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding for (; hp->h_addr_list[i] != NULL; ++i)
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding continue;
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
45acd673a68181802b112e97e84fa3813ddd3ec1stoddard /* Allocate hostent structure, alias ptrs, addr ptrs, addrs */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding newent = (struct hostent *) ap_palloc(p, sizeof(*hp));
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding aliases = (char **) ap_palloc(p, (j+1) * sizeof(char*));
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding ptrs = (char **) ap_palloc(p, (i+1) * sizeof(char*));
e08076ca56e6cb68b30846b9e9339061058aae6dpoirier addrs = (struct in_addr *) ap_palloc(p, (i+1) * sizeof(struct in_addr));
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding *newent = *hp;
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding newent->h_name = ap_pstrdup(p, hp->h_name);
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding newent->h_aliases = aliases;
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding newent->h_addr_list = (char**) ptrs;
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding /* Copy Alias Names: */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding for (j = 0; hp->h_aliases[j] != NULL; ++j) {
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding aliases[j] = ap_pstrdup(p, hp->h_aliases[j]);
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding }
1ccd992d37d62c8cb2056126f2234f64ec189bfddougm aliases[j] = NULL;
e8f95a682820a599fe41b22977010636be5c2717jim
6b38fca3ec543a0f72efd5683e91a0b30fc752d1trawick /* Copy address entries */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding for (i = 0; hp->h_addr_list[i] != NULL; ++i) {
8a261a9f7d18d1e862d63f68e93f288d3e1f0d94trawick ptrs[i] = (char*) &addrs[i];
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding addrs[i] = *(struct in_addr *) hp->h_addr_list[i];
6b38fca3ec543a0f72efd5683e91a0b30fc752d1trawick }
6b38fca3ec543a0f72efd5683e91a0b30fc752d1trawick ptrs[i] = NULL;
58fd79b56eb624bf011772994e9761d3c2e228c1orlikowski
6b38fca3ec543a0f72efd5683e91a0b30fc752d1trawick return newent;
8a261a9f7d18d1e862d63f68e93f288d3e1f0d94trawick}
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
6b38fca3ec543a0f72efd5683e91a0b30fc752d1trawick
785be1b6298010956622771c870ab3cd8ca57a2faaron/* pgethostbyname(): resolve hostname, if successful return an ALLOCATED
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * COPY OF the hostent structure, intended to be stored and used later.
3c290fd0361d6d9d84d97725eaf299456bddd6cfsf * (gethostbyname() uses static storage that would be overwritten on each call)
3c290fd0361d6d9d84d97725eaf299456bddd6cfsf */
6b38fca3ec543a0f72efd5683e91a0b30fc752d1trawickAPI_EXPORT(struct hostent *) ap_pgethostbyname(ap_context_t *p, const char *hostname)
e160b861b50a3a8dcc013b8cd3ef849fe777e52fgregames{
e160b861b50a3a8dcc013b8cd3ef849fe777e52fgregames struct hostent *hp = gethostbyname(hostname);
e160b861b50a3a8dcc013b8cd3ef849fe777e52fgregames return (hp == NULL) ? NULL : ap_pduphostent(p, hp);
e160b861b50a3a8dcc013b8cd3ef849fe777e52fgregames}
e160b861b50a3a8dcc013b8cd3ef849fe777e52fgregames
e160b861b50a3a8dcc013b8cd3ef849fe777e52fgregames
e160b861b50a3a8dcc013b8cd3ef849fe777e52fgregames/* Unparse a uri_components structure to an URI string.
e160b861b50a3a8dcc013b8cd3ef849fe777e52fgregames * Optionally suppress the password for security reasons.
e160b861b50a3a8dcc013b8cd3ef849fe777e52fgregames */
6b38fca3ec543a0f72efd5683e91a0b30fc752d1trawickAPI_EXPORT(char *) ap_unparse_uri_components(ap_context_t *p, const uri_components *uptr, unsigned flags)
6b38fca3ec543a0f72efd5683e91a0b30fc752d1trawick{
560f6ac786d611b858b2bad932713d9e971f0716trawick char *ret = "";
560f6ac786d611b858b2bad932713d9e971f0716trawick
560f6ac786d611b858b2bad932713d9e971f0716trawick /* If suppressing the site part, omit both user name & scheme://hostname */
6b38fca3ec543a0f72efd5683e91a0b30fc752d1trawick if (!(flags & UNP_OMITSITEPART)) {
6b38fca3ec543a0f72efd5683e91a0b30fc752d1trawick
6b38fca3ec543a0f72efd5683e91a0b30fc752d1trawick /* Construct a "user:password@" string, honoring the passed UNP_ flags: */
7bf77d70b6830636bc36e6b76a228c301be23ff7brianp if (uptr->user||uptr->password)
7bf77d70b6830636bc36e6b76a228c301be23ff7brianp ret = ap_pstrcat (p,
7bf77d70b6830636bc36e6b76a228c301be23ff7brianp (uptr->user && !(flags & UNP_OMITUSER)) ? uptr->user : "",
6b38fca3ec543a0f72efd5683e91a0b30fc752d1trawick (uptr->password && !(flags & UNP_OMITPASSWORD)) ? ":" : "",
6b38fca3ec543a0f72efd5683e91a0b30fc752d1trawick (uptr->password && !(flags & UNP_OMITPASSWORD))
6b38fca3ec543a0f72efd5683e91a0b30fc752d1trawick ? ((flags & UNP_REVEALPASSWORD) ? uptr->password : "XXXXXXXX")
6b38fca3ec543a0f72efd5683e91a0b30fc752d1trawick : "",
6b38fca3ec543a0f72efd5683e91a0b30fc752d1trawick "@", NULL);
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding /* Construct scheme://site string */
7697b1b7376a532163c621e050b70c90dcb15d66covener if (uptr->hostname) {
7c6f514f2ef9b98f58b8f8a5f534eb78a75f29f2jorton int is_default_port;
de00ec7378227d05be63ecd2053ebbb01b940023jorton
7c6f514f2ef9b98f58b8f8a5f534eb78a75f29f2jorton is_default_port =
de00ec7378227d05be63ecd2053ebbb01b940023jorton (uptr->port_str == NULL ||
de00ec7378227d05be63ecd2053ebbb01b940023jorton uptr->port == 0 ||
785be1b6298010956622771c870ab3cd8ca57a2faaron uptr->port == ap_default_port_for_scheme(uptr->scheme));
785be1b6298010956622771c870ab3cd8ca57a2faaron
066877f1a045103acfdd376d48cdd473c33f409bdougm ret = ap_pstrcat (p,
8a261a9f7d18d1e862d63f68e93f288d3e1f0d94trawick uptr->scheme, "://", ret,
185aa71728867671e105178b4c66fbc22b65ae26sf uptr->hostname ? uptr->hostname : "",
f3aa436e29aa30e29695a18b7f469dd66b39b7e4jorton is_default_port ? "" : ":",
74b39333600dee3260355ad3a06e36ef6c61c8f1dreid is_default_port ? "" : uptr->port_str,
74b39333600dee3260355ad3a06e36ef6c61c8f1dreid NULL);
74b39333600dee3260355ad3a06e36ef6c61c8f1dreid }
74b39333600dee3260355ad3a06e36ef6c61c8f1dreid }
97c78987224dcd037076d393aad1867c26b2c8cftrawick
97c78987224dcd037076d393aad1867c26b2c8cftrawick /* Should we suppress all path info? */
97c78987224dcd037076d393aad1867c26b2c8cftrawick if (!(flags & UNP_OMITPATHINFO)) {
97c78987224dcd037076d393aad1867c26b2c8cftrawick /* Append path, query and fragment strings: */
97c78987224dcd037076d393aad1867c26b2c8cftrawick ret = ap_pstrcat (p,
97c78987224dcd037076d393aad1867c26b2c8cftrawick ret,
97c78987224dcd037076d393aad1867c26b2c8cftrawick uptr->path ? uptr->path : "",
97c78987224dcd037076d393aad1867c26b2c8cftrawick (uptr->query && !(flags & UNP_OMITQUERY)) ? "?" : "",
97c78987224dcd037076d393aad1867c26b2c8cftrawick (uptr->query && !(flags & UNP_OMITQUERY)) ? uptr->query : "",
97c78987224dcd037076d393aad1867c26b2c8cftrawick (uptr->fragment && !(flags & UNP_OMITQUERY)) ? "#" : NULL,
97c78987224dcd037076d393aad1867c26b2c8cftrawick (uptr->fragment && !(flags & UNP_OMITQUERY)) ? uptr->fragment : NULL,
97c78987224dcd037076d393aad1867c26b2c8cftrawick NULL);
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding }
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding return ret;
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding}
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding/* The regex version of parse_uri_components has the advantage that it is
785be1b6298010956622771c870ab3cd8ca57a2faaron * relatively easy to understand and extend. But it has the disadvantage
785be1b6298010956622771c870ab3cd8ca57a2faaron * that the regexes are complex enough that regex libraries really
785be1b6298010956622771c870ab3cd8ca57a2faaron * don't do a great job with them performancewise.
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding *
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * The default is a hand coded scanner that is two orders of magnitude
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * faster.
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding#ifdef UTIL_URI_REGEX
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fieldingstatic regex_t re_uri;
785be1b6298010956622771c870ab3cd8ca57a2faaronstatic regex_t re_hostpart;
785be1b6298010956622771c870ab3cd8ca57a2faaron
785be1b6298010956622771c870ab3cd8ca57a2faaronvoid ap_util_uri_init(void)
785be1b6298010956622771c870ab3cd8ca57a2faaron{
785be1b6298010956622771c870ab3cd8ca57a2faaron int ret;
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding const char *re_str;
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding /* This is a modified version of the regex that appeared in
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * draft-fielding-uri-syntax-01. It doesnt allow the uri to contain a
785be1b6298010956622771c870ab3cd8ca57a2faaron * scheme but no hostinfo or vice versa.
785be1b6298010956622771c870ab3cd8ca57a2faaron *
785be1b6298010956622771c870ab3cd8ca57a2faaron * draft-fielding-uri-syntax-01.txt, section 4.4 tells us:
785be1b6298010956622771c870ab3cd8ca57a2faaron *
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * Although the BNF defines what is allowed in each component, it is
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * ambiguous in terms of differentiating between a site component and
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * a path component that begins with two slash characters.
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding *
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * RFC2068 disambiguates this for the Request-URI, which may only ever be
c5d006b2861d49c61bcf79316163e30611c6fd08trawick * the "abs_path" portion of the URI. So a request "GET //foo/bar
c5d006b2861d49c61bcf79316163e30611c6fd08trawick * HTTP/1.1" is really referring to the path //foo/bar, not the host foo,
c5d006b2861d49c61bcf79316163e30611c6fd08trawick * path /bar. Nowhere in RFC2068 is it possible to have a scheme but no
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * hostinfo or a hostinfo but no scheme. (Unless you're proxying a
7697b1b7376a532163c621e050b70c90dcb15d66covener * protocol other than HTTP, but this parsing engine probably won't work
5bfaaf573bacb45c1cf290ce85ecc676587e8a64jim * for other protocols.)
185aa71728867671e105178b4c66fbc22b65ae26sf *
5bfaaf573bacb45c1cf290ce85ecc676587e8a64jim * 12 3 4 5 6 7 8 */
5bfaaf573bacb45c1cf290ce85ecc676587e8a64jim re_str = "^(([^:/?#]+)://([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?$";
5bfaaf573bacb45c1cf290ce85ecc676587e8a64jim /* ^scheme--^ ^site---^ ^path--^ ^query^ ^frag */
7697b1b7376a532163c621e050b70c90dcb15d66covener if ((ret = regcomp(&re_uri, re_str, REG_EXTENDED)) != 0) {
e08076ca56e6cb68b30846b9e9339061058aae6dpoirier char line[1024];
e08076ca56e6cb68b30846b9e9339061058aae6dpoirier
7697b1b7376a532163c621e050b70c90dcb15d66covener /* Make a readable error message */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding ret = regerror(ret, &re_uri, line, sizeof line);
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding ap_log_error(APLOG_MARK, APLOG_NOERRNO|APLOG_ERR, NULL,
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding "Internal error: regcomp(\"%s\") returned non-zero (%s) - "
f4b96a996afbc46872f57ad1450e6ee1c8f13707jorton "possibly due to broken regex lib! "
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding "Did you define WANTHSREGEX=yes?",
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding re_str, line);
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding exit(1);
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding }
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding /* This is a sub-RE which will break down the hostinfo part,
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * i.e., user, password, hostname and port.
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * $ 12 3 4 5 6 7 */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding re_str = "^(([^:]*)(:(.*))?@)?([^@:]*)(:([0-9]*))?$";
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding /* ^^user^ :pw ^host^ ^:[port]^ */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding if ((ret = regcomp(&re_hostpart, re_str, REG_EXTENDED)) != 0) {
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding char line[1024];
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding /* Make a readable error message */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding ret = regerror(ret, &re_hostpart, line, sizeof line);
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding ap_log_error(APLOG_MARK, APLOG_NOERRNO|APLOG_ERR, NULL,
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding "Internal error: regcomp(\"%s\") returned non-zero (%s) - "
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding "possibly due to broken regex lib! "
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding "Did you define WANTHSREGEX=yes?",
785be1b6298010956622771c870ab3cd8ca57a2faaron re_str, line);
785be1b6298010956622771c870ab3cd8ca57a2faaron
785be1b6298010956622771c870ab3cd8ca57a2faaron exit(1);
785be1b6298010956622771c870ab3cd8ca57a2faaron }
785be1b6298010956622771c870ab3cd8ca57a2faaron}
785be1b6298010956622771c870ab3cd8ca57a2faaron
785be1b6298010956622771c870ab3cd8ca57a2faaron
785be1b6298010956622771c870ab3cd8ca57a2faaron/* parse_uri_components():
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * Parse a given URI, fill in all supplied fields of a uri_components
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * structure. This eliminates the necessity of extracting host, port,
1ccd992d37d62c8cb2056126f2234f64ec189bfddougm * path, query info repeatedly in the modules.
785be1b6298010956622771c870ab3cd8ca57a2faaron * Side effects:
785be1b6298010956622771c870ab3cd8ca57a2faaron * - fills in fields of uri_components *uptr
785be1b6298010956622771c870ab3cd8ca57a2faaron * - none on any of the r->* fields
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fieldingAPI_EXPORT(int) ap_parse_uri_components(ap_context_t *p, const char *uri, uri_components *uptr)
785be1b6298010956622771c870ab3cd8ca57a2faaron{
785be1b6298010956622771c870ab3cd8ca57a2faaron int ret;
785be1b6298010956622771c870ab3cd8ca57a2faaron regmatch_t match[10]; /* This must have at least as much elements
785be1b6298010956622771c870ab3cd8ca57a2faaron * as there are braces in the re_strings */
785be1b6298010956622771c870ab3cd8ca57a2faaron
785be1b6298010956622771c870ab3cd8ca57a2faaron ap_assert (uptr != NULL);
785be1b6298010956622771c870ab3cd8ca57a2faaron
785be1b6298010956622771c870ab3cd8ca57a2faaron /* Initialize the structure. parse_uri() and parse_uri_components()
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * can be called more than once per request.
1ccd992d37d62c8cb2056126f2234f64ec189bfddougm */
785be1b6298010956622771c870ab3cd8ca57a2faaron memset (uptr, '\0', sizeof(*uptr));
2d399cd7535887fceaa9f8f116eb98ce68ddd602trawick uptr->is_initialized = 1;
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding ret = ap_regexec(&re_uri, uri, re_uri.re_nsub + 1, match, 0);
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding if (ret != 0) {
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding ap_log_error(APLOG_MARK, APLOG_NOERRNO|APLOG_ERR, NULL,
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding "ap_regexec() could not parse uri (\"%s\")",
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding uri);
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding return HTTP_BAD_REQUEST;
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding }
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding if (match[2].rm_so != match[2].rm_eo)
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding uptr->scheme = ap_pstrndup (p, uri+match[2].rm_so, match[2].rm_eo - match[2].rm_so);
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
a7ed9c525f9460187f327cea953bf90ecf1bdc51gstein /* empty hostinfo is valid, that's why we test $1 but use $3 */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding if (match[1].rm_so != match[1].rm_eo)
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding uptr->hostinfo = ap_pstrndup (p, uri+match[3].rm_so, match[3].rm_eo - match[3].rm_so);
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding if (match[4].rm_so != match[4].rm_eo)
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding uptr->path = ap_pstrndup (p, uri+match[4].rm_so, match[4].rm_eo - match[4].rm_so);
a7ed9c525f9460187f327cea953bf90ecf1bdc51gstein
17f3ba69f65182426ad4e568bb2d6f192ccd2ed5trawick /* empty query string is valid, that's why we test $5 but use $6 */
17f3ba69f65182426ad4e568bb2d6f192ccd2ed5trawick if (match[5].rm_so != match[5].rm_eo)
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding uptr->query = ap_pstrndup (p, uri+match[6].rm_so, match[6].rm_eo - match[6].rm_so);
17f3ba69f65182426ad4e568bb2d6f192ccd2ed5trawick
17f3ba69f65182426ad4e568bb2d6f192ccd2ed5trawick /* empty fragment is valid, test $7 use $8 */
17f3ba69f65182426ad4e568bb2d6f192ccd2ed5trawick if (match[7].rm_so != match[7].rm_eo)
17f3ba69f65182426ad4e568bb2d6f192ccd2ed5trawick uptr->fragment = ap_pstrndup (p, uri+match[8].rm_so, match[8].rm_eo - match[8].rm_so);
17f3ba69f65182426ad4e568bb2d6f192ccd2ed5trawick
17f3ba69f65182426ad4e568bb2d6f192ccd2ed5trawick if (uptr->hostinfo) {
17f3ba69f65182426ad4e568bb2d6f192ccd2ed5trawick /* Parse the hostinfo part to extract user, password, host, and port */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding ret = ap_regexec(&re_hostpart, uptr->hostinfo, re_hostpart.re_nsub + 1, match, 0);
1ccd992d37d62c8cb2056126f2234f64ec189bfddougm if (ret != 0) {
785be1b6298010956622771c870ab3cd8ca57a2faaron ap_log_error(APLOG_MARK, APLOG_NOERRNO|APLOG_ERR, NULL,
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding "ap_regexec() could not parse (\"%s\") as host part",
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding uptr->hostinfo);
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
1ccd992d37d62c8cb2056126f2234f64ec189bfddougm return HTTP_BAD_REQUEST;
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding }
7697b1b7376a532163c621e050b70c90dcb15d66covener
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding /* $ 12 3 4 5 6 7 */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding /* "^(([^:]*)(:(.*))?@)?([^@:]*)(:([0-9]*))?$" */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding /* ^^user^ :pw ^host^ ^:[port]^ */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding /* empty user is valid, that's why we test $1 but use $2 */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding if (match[1].rm_so != match[1].rm_eo)
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding uptr->user = ap_pstrndup (p, uptr->hostinfo+match[2].rm_so, match[2].rm_eo - match[2].rm_so);
785be1b6298010956622771c870ab3cd8ca57a2faaron
785be1b6298010956622771c870ab3cd8ca57a2faaron /* empty password is valid, test $3 but use $4 */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding if (match[3].rm_so != match[3].rm_eo)
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding uptr->password = ap_pstrndup (p, uptr->hostinfo+match[4].rm_so, match[4].rm_eo - match[4].rm_so);
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
1ccd992d37d62c8cb2056126f2234f64ec189bfddougm /* empty hostname is valid, and implied by the existence of hostinfo */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding uptr->hostname = ap_pstrndup (p, uptr->hostinfo+match[5].rm_so, match[5].rm_eo - match[5].rm_so);
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding if (match[6].rm_so != match[6].rm_eo) {
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding /* Note that the port string can be empty.
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * If it is, we use the default port associated with the scheme
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding uptr->port_str = ap_pstrndup (p, uptr->hostinfo+match[7].rm_so, match[7].rm_eo - match[7].rm_so);
a7ed9c525f9460187f327cea953bf90ecf1bdc51gstein if (uptr->port_str[0] != '\0') {
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding char *endstr;
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding int port;
27c5ebb7d411a214f5b6b55a881086ce086d3dd3covener
27c5ebb7d411a214f5b6b55a881086ce086d3dd3covener port = strtol(uptr->port_str, &endstr, 10);
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding uptr->port = port;
f4b96a996afbc46872f57ad1450e6ee1c8f13707jorton if (*endstr != '\0') {
17f3ba69f65182426ad4e568bb2d6f192ccd2ed5trawick /* Invalid characters after ':' found */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding return HTTP_BAD_REQUEST;
785be1b6298010956622771c870ab3cd8ca57a2faaron }
17f3ba69f65182426ad4e568bb2d6f192ccd2ed5trawick }
17f3ba69f65182426ad4e568bb2d6f192ccd2ed5trawick else {
27c5ebb7d411a214f5b6b55a881086ce086d3dd3covener uptr->port = uptr->scheme ? ap_default_port_for_scheme(uptr->scheme) : DEFAULT_HTTP_PORT;
8fd7c5046d164fb0959222497e5925dfc6a52ff3trawick }
17f3ba69f65182426ad4e568bb2d6f192ccd2ed5trawick }
17f3ba69f65182426ad4e568bb2d6f192ccd2ed5trawick }
e8f95a682820a599fe41b22977010636be5c2717jim
27c5ebb7d411a214f5b6b55a881086ce086d3dd3covener if (ret == 0)
27c5ebb7d411a214f5b6b55a881086ce086d3dd3covener ret = HTTP_OK;
27c5ebb7d411a214f5b6b55a881086ce086d3dd3covener return ret;
5bfaaf573bacb45c1cf290ce85ecc676587e8a64jim}
27c5ebb7d411a214f5b6b55a881086ce086d3dd3covener#else
27c5ebb7d411a214f5b6b55a881086ce086d3dd3covener
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding/* Here is the hand-optimized parse_uri_components(). There are some wild
27c5ebb7d411a214f5b6b55a881086ce086d3dd3covener * tricks we could pull in assembly language that we don't pull here... like we
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * can do word-at-time scans for delimiter characters using the same technique
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * that fast memchr()s use. But that would be way non-portable. -djg
8f8ec0957334f50b7ac11359f90490ee467258eedreid */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding/* We have a ap_table_t that we can index by character and it tells us if the
27c5ebb7d411a214f5b6b55a881086ce086d3dd3covener * character is one of the interesting delimiters. Note that we even get
27c5ebb7d411a214f5b6b55a881086ce086d3dd3covener * compares for NUL for free -- it's just another delimiter.
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding#define T_COLON 0x01 /* ':' */
27c5ebb7d411a214f5b6b55a881086ce086d3dd3covener#define T_SLASH 0x02 /* '/' */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding#define T_QUESTION 0x04 /* '?' */
785be1b6298010956622771c870ab3cd8ca57a2faaron#define T_HASH 0x08 /* '#' */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding#define T_NUL 0x80 /* '\0' */
27c5ebb7d411a214f5b6b55a881086ce086d3dd3covener
27c5ebb7d411a214f5b6b55a881086ce086d3dd3covener/* the uri_delims.h file is autogenerated by gen_uri_delims.c */
27c5ebb7d411a214f5b6b55a881086ce086d3dd3covener#include "uri_delims.h"
5bfaaf573bacb45c1cf290ce85ecc676587e8a64jim
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding/* it works like this:
27c5ebb7d411a214f5b6b55a881086ce086d3dd3covener if (uri_delims[ch] & NOTEND_foobar) {
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding then we're not at a delimiter for foobar
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding }
7c6f514f2ef9b98f58b8f8a5f534eb78a75f29f2jorton*/
7c6f514f2ef9b98f58b8f8a5f534eb78a75f29f2jorton
7c6f514f2ef9b98f58b8f8a5f534eb78a75f29f2jorton/* Note that we optimize the scheme scanning here, we cheat and let the
7c6f514f2ef9b98f58b8f8a5f534eb78a75f29f2jorton * compiler know that it doesn't have to do the & masking.
7c6f514f2ef9b98f58b8f8a5f534eb78a75f29f2jorton */
7c6f514f2ef9b98f58b8f8a5f534eb78a75f29f2jorton#define NOTEND_SCHEME (0xff)
7c6f514f2ef9b98f58b8f8a5f534eb78a75f29f2jorton#define NOTEND_HOSTINFO (T_SLASH | T_QUESTION | T_HASH | T_NUL)
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf#define NOTEND_PATH (T_QUESTION | T_HASH | T_NUL)
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fieldingvoid ap_util_uri_init(void)
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf{
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding /* nothing to do */
6b38fca3ec543a0f72efd5683e91a0b30fc752d1trawick}
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
7c6f514f2ef9b98f58b8f8a5f534eb78a75f29f2jorton/* parse_uri_components():
7c6f514f2ef9b98f58b8f8a5f534eb78a75f29f2jorton * Parse a given URI, fill in all supplied fields of a uri_components
785be1b6298010956622771c870ab3cd8ca57a2faaron * structure. This eliminates the necessity of extracting host, port,
785be1b6298010956622771c870ab3cd8ca57a2faaron * path, query info repeatedly in the modules.
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf * Side effects:
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf * - fills in fields of uri_components *uptr
785be1b6298010956622771c870ab3cd8ca57a2faaron * - none on any of the r->* fields
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf */
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanfAPI_EXPORT(int) ap_parse_uri_components(ap_context_t *p, const char *uri, uri_components *uptr)
785be1b6298010956622771c870ab3cd8ca57a2faaron{
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf const char *s;
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf const char *s1;
785be1b6298010956622771c870ab3cd8ca57a2faaron const char *hostinfo;
785be1b6298010956622771c870ab3cd8ca57a2faaron char *endstr;
785be1b6298010956622771c870ab3cd8ca57a2faaron int port;
785be1b6298010956622771c870ab3cd8ca57a2faaron
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf /* Initialize the structure. parse_uri() and parse_uri_components()
066877f1a045103acfdd376d48cdd473c33f409bdougm * can be called more than once per request.
785be1b6298010956622771c870ab3cd8ca57a2faaron */
785be1b6298010956622771c870ab3cd8ca57a2faaron memset (uptr, '\0', sizeof(*uptr));
785be1b6298010956622771c870ab3cd8ca57a2faaron uptr->is_initialized = 1;
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf
785be1b6298010956622771c870ab3cd8ca57a2faaron /* We assume the processor has a branch predictor like most --
785be1b6298010956622771c870ab3cd8ca57a2faaron * it assumes forward branches are untaken and backwards are taken. That's
785be1b6298010956622771c870ab3cd8ca57a2faaron * the reason for the gotos. -djg
785be1b6298010956622771c870ab3cd8ca57a2faaron */
785be1b6298010956622771c870ab3cd8ca57a2faaron if (uri[0] == '/') {
785be1b6298010956622771c870ab3cd8ca57a2faarondeal_with_path:
785be1b6298010956622771c870ab3cd8ca57a2faaron /* we expect uri to point to first character of path ... remember
785be1b6298010956622771c870ab3cd8ca57a2faaron * that the path could be empty -- http://foobar?query for example
785be1b6298010956622771c870ab3cd8ca57a2faaron */
b6d9e9d6421b9cebfc74f9c1a870b8b85473f1c1poirier s = uri;
b6d9e9d6421b9cebfc74f9c1a870b8b85473f1c1poirier while ((uri_delims[*(unsigned char *)s] & NOTEND_PATH) == 0) {
b6d9e9d6421b9cebfc74f9c1a870b8b85473f1c1poirier ++s;
b6d9e9d6421b9cebfc74f9c1a870b8b85473f1c1poirier }
b6d9e9d6421b9cebfc74f9c1a870b8b85473f1c1poirier if (s != uri) {
b6d9e9d6421b9cebfc74f9c1a870b8b85473f1c1poirier uptr->path = ap_pstrndup(p, uri, s - uri);
b6d9e9d6421b9cebfc74f9c1a870b8b85473f1c1poirier }
b6d9e9d6421b9cebfc74f9c1a870b8b85473f1c1poirier if (*s == 0) {
b6d9e9d6421b9cebfc74f9c1a870b8b85473f1c1poirier return HTTP_OK;
b6d9e9d6421b9cebfc74f9c1a870b8b85473f1c1poirier }
b6d9e9d6421b9cebfc74f9c1a870b8b85473f1c1poirier if (*s == '?') {
b6d9e9d6421b9cebfc74f9c1a870b8b85473f1c1poirier ++s;
b6d9e9d6421b9cebfc74f9c1a870b8b85473f1c1poirier s1 = strchr(s, '#');
b6d9e9d6421b9cebfc74f9c1a870b8b85473f1c1poirier if (s1) {
b6d9e9d6421b9cebfc74f9c1a870b8b85473f1c1poirier uptr->fragment = ap_pstrdup(p, s1 + 1);
b6d9e9d6421b9cebfc74f9c1a870b8b85473f1c1poirier uptr->query = ap_pstrndup(p, s, s1 - s);
b6d9e9d6421b9cebfc74f9c1a870b8b85473f1c1poirier }
b6d9e9d6421b9cebfc74f9c1a870b8b85473f1c1poirier else {
b6d9e9d6421b9cebfc74f9c1a870b8b85473f1c1poirier uptr->query = ap_pstrdup(p, s);
b6d9e9d6421b9cebfc74f9c1a870b8b85473f1c1poirier }
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf return HTTP_OK;
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf }
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf /* otherwise it's a fragment */
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf uptr->fragment = ap_pstrdup(p, s + 1);
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf return HTTP_OK;
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf }
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf /* find the scheme: */
066877f1a045103acfdd376d48cdd473c33f409bdougm s = uri;
7697b1b7376a532163c621e050b70c90dcb15d66covener while ((uri_delims[*(unsigned char *)s] & NOTEND_SCHEME) == 0) {
7697b1b7376a532163c621e050b70c90dcb15d66covener ++s;
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding }
785be1b6298010956622771c870ab3cd8ca57a2faaron /* scheme must be non-empty and followed by :// */
785be1b6298010956622771c870ab3cd8ca57a2faaron if (s == uri || s[0] != ':' || s[1] != '/' || s[2] != '/') {
785be1b6298010956622771c870ab3cd8ca57a2faaron goto deal_with_path; /* backwards predicted taken! */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding }
7697b1b7376a532163c621e050b70c90dcb15d66covener
7697b1b7376a532163c621e050b70c90dcb15d66covener uptr->scheme = ap_pstrndup(p, uri, s - uri);
7697b1b7376a532163c621e050b70c90dcb15d66covener s += 3;
7697b1b7376a532163c621e050b70c90dcb15d66covener hostinfo = s;
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf while ((uri_delims[*(unsigned char *)s] & NOTEND_HOSTINFO) == 0) {
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf ++s;
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf }
7697b1b7376a532163c621e050b70c90dcb15d66covener uri = s; /* whatever follows hostinfo is start of uri */
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf uptr->hostinfo = ap_pstrndup(p, hostinfo, uri - hostinfo);
7697b1b7376a532163c621e050b70c90dcb15d66covener
7697b1b7376a532163c621e050b70c90dcb15d66covener /* If there's a username:password@host:port, the @ we want is the last @...
7697b1b7376a532163c621e050b70c90dcb15d66covener * too bad there's no memrchr()... For the C purists, note that hostinfo
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf * is definately not the first character of the original uri so therefore
7697b1b7376a532163c621e050b70c90dcb15d66covener * &hostinfo[-1] < &hostinfo[0] ... and this loop is valid C.
785be1b6298010956622771c870ab3cd8ca57a2faaron */
785be1b6298010956622771c870ab3cd8ca57a2faaron do {
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf --s;
b88f887ed5554d9050d97f9a56a89ae62bdbd906fanf } while (s >= hostinfo && *s != '@');
7697b1b7376a532163c621e050b70c90dcb15d66covener if (s < hostinfo) {
7697b1b7376a532163c621e050b70c90dcb15d66covener /* again we want the common case to be fall through */
7697b1b7376a532163c621e050b70c90dcb15d66covenerdeal_with_host:
7697b1b7376a532163c621e050b70c90dcb15d66covener /* We expect hostinfo to point to the first character of
5bfaaf573bacb45c1cf290ce85ecc676587e8a64jim * the hostname. If there's a port it is the first colon.
7697b1b7376a532163c621e050b70c90dcb15d66covener */
5bfaaf573bacb45c1cf290ce85ecc676587e8a64jim s = memchr(hostinfo, ':', uri - hostinfo);
5bfaaf573bacb45c1cf290ce85ecc676587e8a64jim if (s == NULL) {
7697b1b7376a532163c621e050b70c90dcb15d66covener /* we expect the common case to have no port */
7697b1b7376a532163c621e050b70c90dcb15d66covener uptr->hostname = ap_pstrndup(p, hostinfo, uri - hostinfo);
7697b1b7376a532163c621e050b70c90dcb15d66covener goto deal_with_path;
7697b1b7376a532163c621e050b70c90dcb15d66covener }
7697b1b7376a532163c621e050b70c90dcb15d66covener uptr->hostname = ap_pstrndup(p, hostinfo, s - hostinfo);
5bfaaf573bacb45c1cf290ce85ecc676587e8a64jim ++s;
7697b1b7376a532163c621e050b70c90dcb15d66covener uptr->port_str = ap_pstrndup(p, s, uri - s);
7697b1b7376a532163c621e050b70c90dcb15d66covener if (uri != s) {
7697b1b7376a532163c621e050b70c90dcb15d66covener port = strtol(uptr->port_str, &endstr, 10);
7697b1b7376a532163c621e050b70c90dcb15d66covener uptr->port = port;
7697b1b7376a532163c621e050b70c90dcb15d66covener if (*endstr == '\0') {
7697b1b7376a532163c621e050b70c90dcb15d66covener goto deal_with_path;
7697b1b7376a532163c621e050b70c90dcb15d66covener }
7697b1b7376a532163c621e050b70c90dcb15d66covener /* Invalid characters after ':' found */
5bfaaf573bacb45c1cf290ce85ecc676587e8a64jim return HTTP_BAD_REQUEST;
7697b1b7376a532163c621e050b70c90dcb15d66covener }
7697b1b7376a532163c621e050b70c90dcb15d66covener uptr->port = ap_default_port_for_scheme(uptr->scheme);
7697b1b7376a532163c621e050b70c90dcb15d66covener goto deal_with_path;
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding }
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding /* first colon delimits username:password */
ebc18d48bea83ee5ed7a1b4e30007e5192539829wrowe s1 = memchr(hostinfo, ':', s - hostinfo);
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding if (s1) {
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding uptr->user = ap_pstrndup(p, hostinfo, s1 - hostinfo);
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding ++s1;
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding uptr->password = ap_pstrndup(p, s1, s - s1);
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding }
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding else {
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding uptr->user = ap_pstrndup(p, hostinfo, s - hostinfo);
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding }
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding hostinfo = s + 1;
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding goto deal_with_host;
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding}
785be1b6298010956622771c870ab3cd8ca57a2faaron
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding/* Special case for CONNECT parsing: it comes with the hostinfo part only */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding/* See the INTERNET-DRAFT document "Tunneling SSL Through a WWW Proxy"
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * currently at http://www.mcom.com/newsref/std/tunneling_ssl.html
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * for the format of the "CONNECT host:port HTTP/1.0" request
785be1b6298010956622771c870ab3cd8ca57a2faaron */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fieldingAPI_EXPORT(int) ap_parse_hostinfo_components(ap_context_t *p, const char *hostinfo, uri_components *uptr)
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding{
f4b96a996afbc46872f57ad1450e6ee1c8f13707jorton const char *s;
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding char *endstr;
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding /* Initialize the structure. parse_uri() and parse_uri_components()
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * can be called more than once per request.
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding memset (uptr, '\0', sizeof(*uptr));
9ea14ade0d235bec11e6c221b888a6630a0be849covener uptr->is_initialized = 1;
785be1b6298010956622771c870ab3cd8ca57a2faaron uptr->hostinfo = ap_pstrdup(p, hostinfo);
785be1b6298010956622771c870ab3cd8ca57a2faaron
785be1b6298010956622771c870ab3cd8ca57a2faaron /* We expect hostinfo to point to the first character of
900127764fb985c340ee4979cac97146a330c694trawick * the hostname. There must be a port, separated by a colon
9ea14ade0d235bec11e6c221b888a6630a0be849covener */
7697b1b7376a532163c621e050b70c90dcb15d66covener s = strchr(hostinfo, ':');
785be1b6298010956622771c870ab3cd8ca57a2faaron if (s == NULL) {
9ea14ade0d235bec11e6c221b888a6630a0be849covener return HTTP_BAD_REQUEST;
9ea14ade0d235bec11e6c221b888a6630a0be849covener }
9ea14ade0d235bec11e6c221b888a6630a0be849covener uptr->hostname = ap_pstrndup(p, hostinfo, s - hostinfo);
9ea14ade0d235bec11e6c221b888a6630a0be849covener ++s;
9ea14ade0d235bec11e6c221b888a6630a0be849covener uptr->port_str = ap_pstrdup(p, s);
9ea14ade0d235bec11e6c221b888a6630a0be849covener if (*s != '\0') {
9ea14ade0d235bec11e6c221b888a6630a0be849covener uptr->port = strtol(uptr->port_str, &endstr, 10);
5bfaaf573bacb45c1cf290ce85ecc676587e8a64jim if (*endstr == '\0') {
7697b1b7376a532163c621e050b70c90dcb15d66covener return HTTP_OK;
7697b1b7376a532163c621e050b70c90dcb15d66covener }
7697b1b7376a532163c621e050b70c90dcb15d66covener /* Invalid characters after ':' found */
785be1b6298010956622771c870ab3cd8ca57a2faaron }
785be1b6298010956622771c870ab3cd8ca57a2faaron return HTTP_BAD_REQUEST;
785be1b6298010956622771c870ab3cd8ca57a2faaron}
9ea14ade0d235bec11e6c221b888a6630a0be849covener#endif
785be1b6298010956622771c870ab3cd8ca57a2faaron