mod_proxy_http.c revision 0ab36076c862846bb33365b35614ae9eb2f13535
402N/A/* Copyright 1999-2005 The Apache Software Foundation or its licensors, as 402N/A * Licensed under the Apache License, Version 2.0 (the "License"); 402N/A * you may not use this file except in compliance with the License. 402N/A * You may obtain a copy of the License at 402N/A * Unless required by applicable law or agreed to in writing, software 402N/A * distributed under the License is distributed on an "AS IS" BASIS, 402N/A * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 402N/A * See the License for the specific language governing permissions and 402N/A * limitations under the License. 402N/A/* HTTP routines for Apache proxy */ 402N/A * Canonicalise http-like URLs. 402N/A * scheme is the scheme for the URL 402N/A * url is the URL starting with the first '/' 402N/A * def_port is the default port for this scheme. /* ap_port_of_scheme() */ "proxy: HTTP: canonicalising URL %s",
url);
* We break the URL into host, port, path, search "error parsing URL %s: %s",
/* now parse path/search args, according to rfc1738 */ /* N.B. if this isn't a true proxy request, then the URL _path_ * has already been decoded. True proxy requests have r->uri * == r->unparsed_uri, and no others have that property. /* Clear all connection-based headers from the incoming headers table */ const char te_hdr[] =
"Transfer-Encoding: chunked" CRLF;
/* add empty line at the end of the headers */ "proxy: pass request data failed to %pI (%s)",
char chunk_hdr[
20];
/* must be here due to transient bucket. */ /* If this brigade contains EOS, either stop or remove it. */ /* As a shortcut, if this brigade is simply an EOS bucket, * don't send anything down the filter chain. /* We can't pass this EOS to the output_filters. */ * Append the end-of-chunk CRLF /* we never sent the header brigade, so go ahead and /* we never sent the header brigade because there was no request body; * send it now without T-E /* input brigade still has an EOS which we can't pass to the output_filters. */ /* If this brigade contains EOS, either stop or remove it. */ /* As a shortcut, if this brigade is simply an EOS bucket, * don't send anything down the filter chain. /* We can't pass this EOS to the output_filters. */ /* we never sent the header brigade, so go ahead and /* we never sent the header brigade since there was no request * body; send it now, and only specify C-L if client specified /* need to flush any pending data */ b =
input_brigade;
/* empty now; pass_brigade() will add flush */ /* If this brigade contains EOS, either stop or remove it. */ /* As a shortcut, if this brigade is simply an EOS bucket, * don't send anything down the filter chain. /* We can't pass this EOS to the output_filters. */ /* can't spool any more in memory; write latest brigade to disk; * what we read into memory before reaching our threshold will * remain there; we just write this and any subsequent data to disk "proxy: search for temporary directory failed");
"proxy: creation of temporary file in directory %s failed",
"proxy: write to temporary file %s failed",
/* For platforms where the size of the file may be larger than * that which can be stored in a single bucket (where the * length field is an apr_size_t), split it into several int cl_zero;
/* client sent "Content-Length: 0", which we forward on to server */ /* send CL or use chunked encoding? * . CL is the most friendly to the origin server since it is the * . CL stinks if we don't know the length since we have to buffer * the data in memory or on disk until we get the entire data * special cases to check for: * . if we're using HTTP/1.0 to origin server, then we must send CL * . if client sent C-L and there are no input resource filters, the * the body size can't change so we send the same CL and stream the * . if client used chunked or proxy-sendchunks is set, we'll use * we have to compute content length by reading the entire request * body; if request body is not small, we'll spool the remaining input * special envvars to override the normal decision: * use chunked encoding; not compatible with force-proxy-request-1.0 * spool the request body to compute C-L * . proxy-sendunchangedcl * use C-L from client and spool the request body * Send the HTTP/1.1 request to the remote server /* strip connection listed hop-by-hop headers from the request */ /* even though in theory a connection: close coming from the client * should not affect the connection to the server, it's unlikely * that subsequent client requests will hit this thread/process, so * we cancel server keepalive if the client does. /* sub-requests never use keepalives */ /* By default, we can not send chunks. That means we must buffer * the entire request before sending it along to ensure we have * the correct Content-Length attached. A special case is when * the client specifies Content-Length and there are no filters * which muck with the request body. In that situation, we don't * have to buffer the entire request and can still send the * Content-Length. Another special case is when the client * specifies a C-L of 0. Pass that through as well. /* don't want to use r->hostname, as the incoming header might have a "proxy: no HTTP 0.9 request (with no host line) " "on incoming request and preserve host set " "forcing hostname to be %s for uri %s",
/* Block all outgoing Via: headers */ /* If USE_CANONICAL_NAME_OFF was configured for the proxy virtual host, * then the server name returned by ap_get_server_name() is the * origin server name (which does make too much sense with Via: headers) * so we use the proxy vhost's name instead. /* Create a "Via:" request header entry and merge it */ /* Generate outgoing Via: header with/without server comment: */ /* X-Forwarded-*: handling * These request headers are only really useful when the mod_proxy * is used in a reverse proxy configuration, so that useful info * about the client can be passed through the reverse proxy and on * to the backend server, which may require the information to * In a forward proxy situation, these options are a potential * privacy violation, as information about clients behind the proxy * are revealed to arbitrary servers out there on the internet. * The HTTP/1.1 Via: header is designed for passing client * information through proxies to a server, and should be used in * a forward proxy configuation instead of X-Forwarded-*. See the * ProxyVia option for details. /* Add X-Forwarded-For: so that the upstream has a chance to * determine, where the original request came from. /* Add X-Forwarded-Host: so that upstream knows what the * original request hostname was. /* Add X-Forwarded-Server: so that upstream knows what the * name of this proxy server is (if there are more than one) * XXX: This duplicates Via: - do we strictly need it? /* send request headers */ /* Clear out hop-by-hop request headers not to send * RFC2616 13.5.1 says we should strip these headers /* We'll add appropriate Content-Length later, if appropriate. /* XXX: @@@ FIXME: "Proxy-Authorization" should *only* be * suppressed if THIS server requested the authentication, * not when a frontend proxy requested it! * The solution to this problem is probably to strip out * the Proxy-Authorisation header in the authorisation * code itself, not here. This saves us having to signal * somehow whether this request was authenticated or not. /* send the request data, if any. */ "proxy: pass request data failed to %pI (%s)",
= {
"Date",
"Expires",
"Last-Modified",
NULL } ;
* Note: pread_len is the length of the response that we've mistakenly * read (assuming that we don't consider that an error via * ProxyBadHeader StartBody). This depends on buffer actually being * local storage to the calling code in order for pread_len to make * any sense at all, since we depend on buffer still containing * what was read by ap_getline() upon return. * Read header lines until we get the empty separator line, a read error, * the connection closes (EOF), or we timeout. /* We may encounter invalid headers, usually from buggy * MS IIS servers, so we need to determine just how to handle * them. We can either ignore them, assume that they mark the * start-of-body (eg: a missing CRLF) or (the default) mark * the headers as totally bogus and return a 500. The sole * exception is an extra "HTTP/1.0 200, OK" line sprinkled * in between the usual MIME headers, which is a favorite /* XXX: The mask check is buggy if we ever see an HTTP/1.10 */ /* Nope, it wasn't even an extra HTTP header. Give up. */ /* if we've already started loading headers_out, then * return what we've accumulated so far, in the hopes * that they are useful; also note that we likely pre-read * the first line of the response. "proxy: Starting body due to bogus non-header in headers " "proxy: No HTTP headers " /* this is the psc->badopt == bad_ignore case */ "proxy: Ignoring bogus HTTP header " /* XXX: RFC2068 defines only SP and HT as whitespace, this test is * wrong... and so are many others probably. ++
value;
/* Skip to start of value */ /* should strip trailing whitespace as well */ /* make sure we add so as not to destroy duplicated headers * Modify headers requiring canonicalisation and/or affected * by ProxyPassReverse and family with process_proxy_header /* the header was too long; at the least we should skip extra data */ /* soak up the extra data */ if (
len == 0)
/* time to exit the larger loop as well */ /* Get response from the remote server, and pass it up the /* In case anyone needs to know, this is a fake request that is really a /* handle one potential stray CRLF */ "proxy: error reading status line from remote " "Error reading from remote server");
/* XXX: Is this a real headers length send from remote? */ /* Is it an HTTP/1 response? * This is buggy if we ever see an HTTP/1.10 /* If not an HTTP/1 message or * if the status line was > 8192 bytes apr_pstrcat(p,
"Corrupt status line returned by remote " /* 2616 requires the space in Status-Line; the origin * server may have sent one but ap_rgetline_core will /* N.B. for HTTP/1.0 clients, we have to fold line-wrapped headers*/ /* Also, take care with headers with multiple occurences. */ /* First, tuck away all already existing cookies */ /* shove the headers direct into r->headers_out */ r->
server,
"proxy: bad HTTP/%d.%d header " * ap_send_error relies on a headers_out to be present. we * are in a bad position here.. so force everything we send out * to have nothing to do with the incoming packet /* Now, add in the just read cookies */ /* and now load 'em all in */ /* strip connection listed hop-by-hop headers from response */ /* handle Via header in response */ /* If USE_CANONICAL_NAME_OFF was configured for the proxy virtual host, * then the server name returned by ap_get_server_name() is the * origin server name (which does make too much sense with Via: headers) * so we use the proxy vhost's name instead. /* create a "Via:" response header entry and merge it */ /* cancel keepalive if HTTP/1.0 or less */ /* an http/0.9 response */ "proxy: HTTP: received interim %d response",
/* Moved the fixups of Date headers and those affected by const char *
wa =
"WWW-Authenticate";
"proxy: origin server sent 401 without WWW-Authenticate header");
* Is it an HTTP/0.9 response or did we maybe preread the 1st line of * the response? If so, load the extra data. These are 2 mutually * exclusive possibilities, that just happen to require very * At this point in response processing of a 0.9 response, * we don't know yet whether data is binary or not. * mod_charset_lite will get control later on, so it cannot * decide on the conversion of this buffer full of data. * However, chances are that we are not really talking to an * HTTP/0.9 server, but to some different protocol, therefore * the best guess IMHO is to always treat the buffer as "text/x": /* send body - but only if a body is expected */ /* We need to copy the output headers and treat them as input * headers as well. BUT, we need to do this before we remove * TE, so that they are preserved accordingly for * ap_http_filter to know where to end. "proxy: start body send");
* if we are overriding the errors, we can't put the content * of the page into the brigade /* read the body, pass it to the output filters */ /* ap_get_brigade will return success with an empty brigade * for a non-blocking read which would block: */ /* flush to the client and switch to blocking mode */ "proxy: error reading response");
/* next time try a non-blocking read */ r->
server,
"proxy (PID %d): readbytes: %#x",
/* found the last brigade? */ /* if this is the last brigade, cleanup the * backend connection first to prevent the * backend server from hanging around waiting * for a slow client to eat these bytes /* signal that we must leave */ /* try send what we read */ /* Ack! Phbtt! Die! User aborted! */ /* make sure we always clean up after ourselves */ /* the code above this checks for 'OK' which is what the hook expects */ /* clear r->status for override error, otherwise ErrorDocument * thinks that this is a recursive error, and doesn't find the /* Discard body, if one is expected */ /* If there are no KeepAlives, or if the connection has been signalled * to close, close the socket and clean up /* if the connection is < HTTP/1.1, or Connection: close, * we close the socket, otherwise we leave it open for KeepAlive support * This handles http:// URLs, and other URLs using a remote proxy over http * If proxyhost is NULL, then contact the server directly, otherwise * Note that if a proxy is used, then URLs other than http: can be accessed, * also, if we have trouble which is clearly specific to the proxy, then * we return DECLINED so that we can try another proxy. (Or the direct /* Note: Memory pool allocation. * A downstream keepalive connection is always connected to the existence * (or not) of an upstream keepalive connection. If this is not done then * load balancing against multiple backend servers breaks (one backend * server ends up taking 100% of the load), and the risk is run of * downstream keepalive connections being kept open unnecessarily. This * keeps webservers busy and ties up resources. * As a result, we allocate all sockets out of the upstream connection * pool, and when we want to reuse a socket, we check first whether the * connection ID of the current upstream connection is the same as that * of the connection when the socket was opened. if (u ==
NULL || u[
1] !=
'/' || u[
2] !=
'/' || u[
3] ==
'\0')
/* scheme is lowercase */ "proxy: HTTPS: declining URL %s" " (mod_ssl not configured?)",
url);
"proxy: HTTP: declining URL %s",
url);
return DECLINED;
/* only interested in HTTP, or FTP via proxy */ "proxy: HTTP: serving URL %s",
url);
/* only use stored info for top-level pages. Sub requests don't share /* create space for state information */ /* Step One: Determine Who To Connect To */ /* Step Two: Make the Connection */ /* Step Three: Create conn_rec */ /* Step Four: Send the Request */ /* Step Five: Receive the Response */ NULL,
/* create per-directory config structure */ NULL,
/* merge per-directory config structures */ NULL,
/* create per-server config structure */ NULL,
/* merge per-server config structures */ NULL,
/* command apr_table_t */