mod_charset_lite.c revision 3fde4c273ea649d1320ec9c51e7d096cd9340a94
2469N/A/* ==================================================================== 2469N/A * The Apache Software License, Version 1.1 2469N/A * Copyright (c) 2000-2001 The Apache Software Foundation. All rights 2469N/A * Redistribution and use in source and binary forms, with or without 2469N/A * modification, are permitted provided that the following conditions 2469N/A * 1. Redistributions of source code must retain the above copyright 2469N/A * notice, this list of conditions and the following disclaimer. 2469N/A * 2. Redistributions in binary form must reproduce the above copyright 2469N/A * notice, this list of conditions and the following disclaimer in 2469N/A * the documentation and/or other materials provided with the 2469N/A * 3. The end-user documentation included with the redistribution, 2469N/A * if any, must include the following acknowledgment: 2469N/A * "This product includes software developed by the 2469N/A * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * 4. The names "Apache" and "Apache Software Foundation" must * not be used to endorse or promote products derived from this * software without prior written permission. For written * permission, please contact apache@apache.org. * 5. Products derived from this software may not be called "Apache", * nor may "Apache" appear in their name, without prior written * permission of the Apache Software Foundation. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * ==================================================================== * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation. For more * information on the Apache Software Foundation, please see * Portions of this software are based upon public domain software * originally written at the National Center for Supercomputing Applications, * University of Illinois, Urbana-Champaign. * simple hokey charset recoding configuration module * See mod_ebcdic and mod_charset for more thought-out examples. This * one is just so Jeff can learn how a module works and experiment with * basic character set recoding configuration. * !!!This is an extremely cheap ripoff of mod_charset.c from Russian Apache!!! * space left in the translation buffer #
define FATTEST_CHAR 8 /* we don't handle chars wider than this that straddle/* extended error status codes; this is used in addition to an apr_status_t to * track errors in the translation filter EES_INIT = 0,
/* no error info yet; value must be 0 for easy init */ EES_LIMIT,
/* built-in restriction encountered */ /* registered name of the output translation filter */ /* registered name of input translation filter */ /** debug level; -1 means uninitialized, 0 means no debug */ /** module does ap_add_*_filter()? */ /* charset_filter_ctx_t is created for each filter instance; because the same * filter code is used for translating in both directions, we need this context * data to tell the filter which translation handle to use; it also can hold a * character which was split between buckets char buf[
FATTEST_CHAR];
/* we want to be able to build a complete char here */ int ran;
/* has filter instance run before? */ int noop;
/* should we pass brigades through unchanged? */ char *
tmp;
/* buffer for input filtering */ /* charset_req_t is available via r->request_config if any translation is /* debug level definitions */ #
define DBGLVL_FLOW 4 /* enough messages to see what happens on#
define DBGLVL_PMC 2 /* messages about possible misconfiguration */ /* If it is defined in the current container, use it. Otherwise, use the one * from the enclosing container. /* CharsetSourceEnc charset /* CharsetDefault charset /* CharsetOptions optionflag... "Invalid CharsetOptions option: ",
/* find_code_page() is a fixup hook that decides if translation should be * enabled; if so, it sets up request data for use by the filter registration * hook so that it knows what to do "uri: %s file: %s method: %d " "imt: %s flags: %s%s%s %s->%s",
r->
main ?
"S" :
"",
/* S if subrequest */ r->
prev ?
"R" :
"",
/* R if redirect */ r->
proxyreq ?
"P" :
"",
/* P if proxy */ /* If we don't have a full directory configuration, bail out. "incomplete configuration: src %s, dst %s",
/* catch proxy requests */ /* mod_rewrite indicators */ /* If mime type isn't text or message, bail out. /* XXX When we handle translation of the request body, watch out here as * 1.3 allowed additional mime types: multipart and /* On an EBCDIC machine, be willing to translate mod_autoindex- * generated output. Otherwise, it doesn't look too cool. * XXX This isn't a perfect fix because this doesn't trigger us * to convert from the charset of the source code to ASCII. The * general solution seems to be to allow a generator to set an * indicator in the r specifying that the body is coded in the * implementation character set (i.e., the charset of the source * code). This would get several different types of documents * translated properly: mod_autoindex output, mod_status output, * mod_info output, hard-coded error documents, etc. "mime type is %s; no translation selected",
"charset_source: %s charset_default: %s",
/* Get storage for the request data and the output filter context. * We rarely need the input filter context, so allocate that separately. "can't open translation %s->%s",
/* Set up input translation. Note: A request body can be included * with the OPTIONS method, but for now we don't set up translation "can't open translation %s->%s",
/* xlate_insert_filter() is a filter hook which decides whether or not * to insert a translation filter for the current request. /* Hey... don't be so quick to use reqinfo->dc here; reqinfo may be NULL */ "xlate output filter not added implicitly because %s",
"no output configuration available" :
"another module added the filter");
"xlate input filter not added implicitly because %s",
"no input configuration available" :
"another module added the filter");
/* stuff that sucks that I know of: * why create an eos bucket when we see it come down the stream? just send the one * passed as input... news flash: this will be fixed when xlate_out_filter() starts * using the more generic xlate_brigade() * we don't handle characters that straddle more than two buckets; an error /* send_downstream() is passed the translated data; it puts it in a single- * bucket brigade and passes the brigade to the next filter /* Keep adding bytes from the input string to the saved string until we * 1) finish the input char * or 3) run out of bytes to add * straddling more than two buckets msg =
"xlate filter - a built-in restriction was encountered";
msg =
"xlate filter - an input character was invalid";
msg =
"xlate filter - bucket read routine failed";
strcpy(
msgbuf,
"xlate filter - incomplete char at end of input - ");
msg =
"xlate filter - an error occurred in a lower filter";
msg =
"xlate filter - returning error";
/* chk_filter_chain() is called once per filter instance; it tries to * determine if the current filter instance should be disabled because * its translation is incompatible with the translation of an existing * instance of the translate filter * configured filter chain for the request: * INCLUDES XLATEOUT(8859-1->UTS-16) * configured filter chain for the subrequest: * XLATEOUT(8859-1->UTS-16) * When the subrequest is processed, the filter chain will be * XLATEOUT(8859-1->UTS-16) XLATEOUT(8859-1->UTS-16) * This makes no sense, so the instance of XLATEOUT added for the * subrequest will be noop-ed. * configured filter chain for the request: * INCLUDES XLATEOUT(8859-1->UTS-16) * configured filter chain for the subrequest: * XLATEOUT(IBM-1047->8859-1) * When the subrequest is processed, the filter chain will be * XLATEOUT(IBM-1047->8859-1) XLATEOUT(8859-1->UTS-16) * This makes sense, so the instance of XLATEOUT added for the * subrequest will be left alone and it will translate from /* walk the filter chain; see if it makes sense for our filter to /* incompatible translation * if our filter instance is incompatible with an instance * already in place, noop our instance * . We are only willing to noop our own instance. * . It is possible to noop another instance which has not * yet run, but this is not currently implemented. * Hopefully it will not be needed. * . It is not possible to noop an instance which has "translation %s%s%s; existing " f->r->
uri ?
"uri" :
"file",
"chk_filter_chain() - can't disable " "translation %s%s%s; existing " /* xlate_brigade() is used to filter request and response bodies * we'll stop when one of the following occurs: * . we run out of buckets * . we run out of space in the output buffer * buffer: storage to hold the translated characters * buffer_size: size of buffer * (and a few more uninteresting parms) * return value: APR_SUCCESS or some error code * bb: we've removed any buckets representing the * translated characters; the eos bucket, if * present, will be left in the brigade * buffer: filled in with translated characters * buffer_size: updated with the bytes remaining * hit_eos: did we hit an EOS bucket? if (!
bucket_avail) {
/* no bytes left to process in the current bucket... */ /* We've got data, so translate it. */ /* Rats... we need to finish a partial character from the previous * Strangely, finish_partial_char() increments the input buffer * pointer but does not increment the output buffer pointer. /* We need to save the final byte(s) for next time; we can't * convert it until we look at the next bucket. /* bad input byte or partial char too big to store */ /* if any data remains in the current bucket, split there */ /* Leave the eos bucket in the brigade for reporting to /* Oops... we have a partial char from the previous bucket * that won't be completed because there's no more data. /* xlate_out_filter() handles (almost) arbitrary conversions from one charset * translation is determined in the fixup hook (find_code_page), which is * where the filter's context data is set up... the context data gives us /* this is SetOutputFilter path; grab the preallocated context, * if any; note that if we decided not to do anything in an earlier * handler, we won't even have a reqinfo * in the filter chain; we can't have two * instances using the same context if (!
ctx) {
/* no idea how to translate; don't do anything */ "charset_source: %s charset_default: %s",
if (!
ctx->
ran) {
/* filter never ran before */ if (!
cur_len) {
/* no bytes left to process in the current bucket... */ cur_len = -
1;
/* XXX yuck, but that tells us to send * eos down; when we minimize our bb construction /* Oops... we have a partial char from the previous bucket * that won't be completed because there's no more data. /* Try to fill up our tmp buffer with translated data. */ if (
cur_len) {
/* maybe we just hit the end of a pipe (len = 0) ? */ /* Rats... we need to finish a partial character from the previous /* Update input ptr and len after consuming some bytes */ /* We need to save the final byte(s) for next time; we can't * convert it until we look at the next bucket. /* bad input byte or partial char too big to store */ /* It is time to flush, as there is not enough space left in the * current output buffer to bother with converting more data. if (
space_avail <
sizeof(
tmp)) {
/* gotta write out what we converted */ /* this is SetInputFilter path; grab the preallocated context, * if any; note that if we decided not to do anything in an earlier * handler, we won't even have a reqinfo * in the filter chain; we can't have two * instances using the same context if (!
ctx) {
/* no idea how to translate; don't do anything */ "charset_source: %s charset_default: %s",
if (!
ctx->
ran) {
/* filter never ran before */ /* move anything leftover into our context for next time; * we don't currently "set aside" since the data came from * down below, but I suspect that for long-term we need to /* make sure we insert at the head, because there may be * an eos bucket already there, and the eos bucket should /* XXX need to get some more data... what if the last brigade * we got had only the first byte of a multibyte char? we need * to grab more data from the network instead of returning an "source (html,cgi,ssi) file charset"),
"name of default charset"),
"valid options: ImplicitAdd, NoImplicitAdd, DebugLevel=n"),