mod_include.c revision d4f351074a8f7af5e41aa0a70410816436608e3d
0N/A/* ==================================================================== 594N/A * The Apache Software License, Version 1.1 0N/A * Copyright (c) 2000-2001 The Apache Software Foundation. All rights 0N/A * Redistribution and use in source and binary forms, with or without 0N/A * modification, are permitted provided that the following conditions 0N/A * 1. Redistributions of source code must retain the above copyright 0N/A * notice, this list of conditions and the following disclaimer. 0N/A * 2. Redistributions in binary form must reproduce the above copyright 0N/A * notice, this list of conditions and the following disclaimer in 0N/A * the documentation and/or other materials provided with the 0N/A * 3. The end-user documentation included with the redistribution, 0N/A * if any, must include the following acknowledgment: 0N/A * "This product includes software developed by the 0N/A * Alternately, this acknowledgment may appear in the software itself, 0N/A * if and wherever such third-party acknowledgments normally appear. 0N/A * 4. The names "Apache" and "Apache Software Foundation" must 0N/A * not be used to endorse or promote products derived from this 0N/A * software without prior written permission. For written 0N/A * permission, please contact apache@apache.org. 0N/A * 5. Products derived from this software may not be called "Apache", 0N/A * nor may "Apache" appear in their name, without prior written 0N/A * permission of the Apache Software Foundation. 0N/A * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 0N/A * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 0N/A * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 0N/A * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR 0N/A * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 0N/A * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 0N/A * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 0N/A * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 0N/A * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 0N/A * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 0N/A * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 0N/A * ==================================================================== 0N/A * This software consists of voluntary contributions made by many 0N/A * individuals on behalf of the Apache Software Foundation. For more 0N/A * information on the Apache Software Foundation, please see 0N/A * Portions of this software are based upon public domain software 0N/A * originally written at the National Center for Supercomputing Applications, 0N/A * University of Illinois, Urbana-Champaign. 0N/A * Original by Rob McCool; substantial fixups by David Robinson; 0N/A * incorporated into the Apache module framework by rst. 0N/A/***************************************************************** 0N/A * XBITHACK. Sigh... NB it's configurable per-directory; the compile-time 0N/A * option only changes the default. 0N/A/* ------------------------ Environment function -------------------------- */ 0N/A/* Sentinel value to store in subprocess_env for items that 0N/A/* XXX: could use ap_table_overlap here */ 0N/A/* --------------------------- Parser functions --------------------------- */ 0N/A/* This is an implementation of the BNDM search algorithm. 0N/A * Fast and Flexible String Matching by Combining Bit-parallelism and 0N/A * Suffix Automata (2001) 0N/A * Gonzalo Navarro, Mathieu Raffinot 0N/A * Initial code submitted by Sascha Schumann. 0N/A unsigned int T[
256];
0N/A/* This is the pattern matcher that holds the STARTING_SEQUENCE bndm_t 0N/A/* Precompile the bndm_t data structure. */ 0N/A for (x =
1; n <
ne; x <<=
1)
0N/A t->T[(
unsigned char) *n++] |= x;
0N/A/* Implements the BNDM search algorithm (as described above). 0N/A * n - the pattern to search for 0N/A * nl - length of the pattern to search for 0N/A * h - the string to look in 0N/A * hl - length of the string to look for 0N/A * t - precompiled bndm structure against the pattern 0N/A * Returns the count of character that is the first match or hl if no 0N/A unsigned int *T, x, d;
0N/A pi = h -
1;
/* pi: p initial */ 0N/A p =
pi +
nl;
/* compare window right to left. point to the first char */ 0N/A d = (d >>
1) & T[(
unsigned char) *p--];
0N/A/* We've now found a start sequence tag... */ 0N/A /* We want to split the bucket at the '<'. */ 0N/A /* Split the bucket with the start of the tag in it */ 0N/A /* If it was a one bucket match */ 0N/A/* This function returns either a pointer to the split bucket containing the 0N/A * first byte of the BEGINNING_SEQUENCE (after finding a complete match) or it 0N/A * returns NULL if no match found. if (
len == 0) {
/* end of pipe? */ /* Set our buffer to use. */ /* The last bucket had a left over partial match that we need to /* Consider the case where we have <!-- at the end of the bucket. */ /* The reason for this, is that we need to make sure * that we catch cases like <<!--#. This makes the * second check after the original check fails. * If parse_pos was already 0 then we already checked this. /* gonna start over parsing the directive next time through */ if (
len == 0) {
/* end of pipe? */ /* We want to split the bucket at the '>'. The * end of the END_SEQUENCE is in the current bucket. * The beginning might be in a previous bucket. /* The reason for this, is that we need to make sure * that we catch cases like --->. This makes the * second check after the original check fails. * If parse_pos was already 0 then we already checked /* This function culls through the buckets that have been set aside in the * ssi_tag_brigade and copies just the directive part of the SSI tag (none * of the start and end delimiter bytes are copied). /* If the tag length is longer than the tmp buffer, allocate space. */ }
/* Else, just use the temp buffer. */ /* Prime the pump. Start at the beginning of the tag... */ /* Adjust the pointer to start at the tag within the bucket... */ /* Loop through the buckets from the tag_start_bucket until before * the tail_start_bucket copying the contents into the buffer. /* Adjust the count to stop at the beginning of the tail. */ * decodes a string containing html entities or numeric character references. * 's' is overwritten with the decoded string. * If 's' is syntatically incorrect, then the followed fixups will be made: * unknown entities will be left undecoded; * references to unused numeric characters will be deleted. * In particular, � will not be decoded, but will be deleted. /* maximum length of any ISO-LATIN-1 HTML entity name. */ /* The following is a shrinking transformation, therefore safe. */ "amp\046ETH\320eth\360",
/* 3 */ "quot\042Auml\304Euml\313Iuml\317Ouml\326Uuml\334auml\344euml\353\ iuml\357ouml\366uuml\374yuml\377",
/* 4 */ "Acirc\302Aring\305AElig\306Ecirc\312Icirc\316Ocirc\324Ucirc\333\ THORN\336szlig\337acirc\342aring\345aelig\346ecirc\352icirc\356ocirc\364\ ucirc\373thorn\376",
/* 5 */ "Agrave\300Aacute\301Atilde\303Ccedil\307Egrave\310Eacute\311\ Igrave\314Iacute\315Ntilde\321Ograve\322Oacute\323Otilde\325Oslash\330\ Ugrave\331Uacute\332Yacute\335agrave\340aacute\341atilde\343ccedil\347\ egrave\350eacute\351igrave\354iacute\355ntilde\361ograve\362oacute\363\ otilde\365oslash\370ugrave\371uacute\372yacute\375" /* 6 */ for (; *s !=
'\0'; s++, p++) {
for (i =
1; s[i] !=
';' && s[i] !=
'\0'; i++) {
if (s[i] ==
'\0') {
/* treat as normal data */ if (j < i ||
val <=
8 || (
val >=
11 &&
val <=
31) ||
p--;
/* no data to output */ * Extract the next tag name and value. * If there are no more tags, set the tag name to NULL. * The tag value is html decoded if dodecode is non-zero. * The tag value may be NULL if there is no tag value.. * [WS]<Tag>[WS]=[WS]['|"]<Value>['|"|WS] *
tag = c;
/* First non-whitespace character (could be NULL). */ while ((*c !=
'\0') && (*c !=
'=') && (!
apr_isspace(*c))) {
*c =
apr_tolower(*c);
/* find end of tag, lowercasing as we go... */ if ((*c ==
'\0') || (**
tag ==
'=')) {
if ((**
tag ==
'\0') || (**
tag ==
'=')) {
return;
/* We have found the end of the buffer. */ }
/* We might have a tag, but definitely no value. */ *c++ =
'\0';
/* Overwrite the '=' with a terminating byte after tag. */ else {
/* Try skipping WS to find the '='. */ *c++ =
'\0';
/* Terminate the tag... */ /* There needs to be an equal sign if there's a value. */ return;
/* There apparently was no value. */ c++;
/* Skip the equals sign. */ if (*c ==
'"' || *c ==
'\'') {
/* Allow quoted values for space inclusion. */ term = *c++;
/* NOTE: This does not pass the quotes on return. */ if (*c ==
'\\') {
/* Accept \" and \' as valid char in string. */ if (*c ==
term) {
/* Overwrite the "\" during the embedded */ shift_val++;
/* escape sequence of '\"' or "\'". Shift */ }
/* bytes from here to next delimiter. */ *(c-
shift_val) =
'\0';
/* Overwrites delimiter (term or WS) with NULL. */ * Do variable substitution on strings /* leave room for nul terminator */ while ((
ch = *
in++) !=
'\0') {
/* guess that the expansion won't happen */ 0, r,
"Missing '}' on variable \"%s\"",
/* what a pain, too bad there's no table_getn where you can * pass a non-nul terminated string */ /* no expansion to be done */ /* zero-length variable name causes just the $ to be /* --------------------------- Action handlers ---------------------------- */ /* ensure that path is relative, and does not contain ".." elements * ensentially ensure that it does not match the regex: * XXX: Simply replace with apr_filepath_merge /* If the name is canonical this is redundant * but in security, redundancy is worthwhile. * Does OS2 belong here (accepts ... for ..)? /* XXX: Port to apr_filepath_merge * be safe; only files in this directory or below allowed error_fmt =
"unable to include \"%s\" in parsed file %s";
error_fmt =
"unable to include potential exec \"%s\" " /* try to avoid recursive includes. We do this by walking * up the r->main list of subrequests, and at each level * walking back through any internal redirects. At each * step, we compare the filenames and the URIs. * The filename comparison catches a recursive include * with an ever-changing URL, eg. * "$REQUEST_URI/$QUERY_STRING?$QUERY_STRING/x"--> * which, although they would eventually be caught because * we have a limit on the length of files, etc., can * The URI comparison catches the case where the filename * is changed while processing the request, so the * current name is never the same as any previous one. * This can happen with "DocumentRoot /foo" when you * request "/" on the server and it includes "/". * This only applies to modules such as mod_dir that * (somewhat improperly) mess with r->filename outside * of a filename translation phase. /* See the Kludge in send_parsed_file for why */ /* Basically, it puts a bread crumb in here, then looks */ /* for the crumb later to see if its been here. */ /* let's not clutter the log on a busy server */ "unable to include \"%s\" in parsed file %s";
/* destroy the sub request if it's not a nested include "unknown parameter \"%s\" to tag include in %s",
"unknown value \"%s\" to parameter \"encoding\" of " "unknown parameter \"%s\" in tag echo of %s",
/* error and tf must point to a string with room for at * least MAX_STRING_LEN characters return 0;
/* Reached the end of the string. */ return 1;
/* tags must have values. */ "unknown parameter \"%s\" to tag config in %s",
/* XXX: Port to apr_filepath_merge * be safe; only files in this directory or below allowed /* note: it is okay to pass NULL for the "next filter" since we never attempt to "run" this sub request. */ error_fmt =
"unable to get information about \"%s\" " error_fmt =
"unable to lookup information about \"%s\" " /* note: it is okay to pass NULL for the "next filter" since we never attempt to "run" this sub request. */ "unable to get information about \"%s\" " "unknown parameter \"%s\" to tag %s in %s",
for (x = 0; x < l; x++) {
if (x && (!((l - x) %
3))) {
"unable to compile pattern \"%s\"",
rexp);
/* there is an implicit assumption here that string is at most MAX_STRING_LEN-1 /* Skip leading white space */ /* We should only be here if we are in a string */ * Yes I know that goto's are BAD. But, c doesn't allow me to * exit a loop from a switch statement. Yes, I could use a flag, * I used the ++string throughout this section so that string * ends up pointing to the next token and I can just return it /* If qs is still set, I have an unmatched ' */ * Hey I still know that goto's are BAD. I don't think that I've ever * used two in the same project, let alone the same file before. But, * I absolutely want to make sure that I clean up the memory in all * cases. And, without rewriting this completely, the easiest way * is to just branch to the return code which cleans it up. /* there is an implicit assumption here that expr is at most MAX_STRING_LEN-1 "Invalid expression \"%s\" in file %s",
sizeof (
" Token: and/or\n"));
"Invalid expression \"%s\" in file %s",
"Invalid expression \"%s\" in file %s",
sizeof(
" Token: not\n"));
"Invalid expression \"%s\" in file %s",
"Invalid expression \"%s\" in file %s",
"Invalid expression \"%s\" in file %s",
sizeof (
" Token: rbrace\n"));
"Unmatched ')' in \"%s\" in file %s",
sizeof (
" Token: lbrace\n"));
"Invalid expression \"%s\" in file %s",
/* Evaluate Parse Tree */ sizeof (
" Evaluate string\n"));
sizeof(
" Evaluate and/or\n"));
"Invalid expression \"%s\" in file %s",
sizeof (
" Evaluate eq/ne\n"));
"Invalid expression \"%s\" in file %s",
"Invalid rexp \"%s\" in file %s",
" Re Compare (%s) with /%s/\n",
" Compare (%s) with (%s)\n",
"Invalid expression \"%s\" in file %s",
" Compare (%s) with (%s)\n",
"Unmatched '(' in \"%s\" in file %s",
"Unmatched ')' in \"%s\" in file %s",
/*-------------------------------------------------------------------------*/ /* XXX overlaying the static string pointed to by cond_txt isn't cool */ char *
cond_txt =
"**** X conditional_status=\"0\"\n"; \
if (
d_buf[0] !=
'\0') { \
/*-------------------------------------------------------------------------*/ /* pjr - These seem to allow expr="fred" expr="joe" where joe overwrites fred. */ "missing expr in if statement: %s",
"unknown parameter \"%s\" to tag if in %s",
tag,
"missing expr in elif statement: %s",
"unknown parameter \"%s\" to tag if in %s",
tag,
"else directive does not take tags in %s", r->
filename);
"endif directive does not take tags in %s", r->
filename);
"variable must precede value in set directive in %s",
"Invalid tag for set directive in %s", r->
filename);
"printenv directive does not take tags in %s",
/* -------------------------- The main function --------------------------- */ if (r->
args) {
/* add QUERY stuff to env cause it ain't yet */ /* State to check for the STARTING_SEQUENCE. */ /* The few bytes stored in the ssi_tag_brigade turned out not to * be a tag after all. This can only happen if the starting * tag actually spans brigades. This should be very rare. /* If I am inside a conditional (if, elif, else) that is false * then I need to throw away anything contained in it. /* Adjust the current bucket position based on what was found... */ /* Send the large chunk of pre-tag bytes... */ /* There was no possible SSI tag in the * remainder of this brigade... */ /* State to check for the ENDING_SEQUENCE. */ /* If some of the tag has already been set aside then set * aside remainder of tag. Now the full tag is in * If none has yet been set aside, then leave it all where it * In any event after this the entire set of tag buckets will * be in one place or another. /* remainder of this brigade... */ /* State to processed the directive... */ /* By now the full tag (all buckets) should either be set aside into * ssi_tag_brigade or contained within the current bb. All tag * processing from here on can assume that. /* At this point, everything between ctx->head_start_bucket and * ctx->tail_start_bucket is an SSI * directive, we just have to deal with it now. "mod_include: error copying directive in %s",
/* DO CLEANUP HERE!!!!! */ /* Can't destroy the tag buckets until I'm done processing * because the combined_tag might just be pointing to * the contents of a single bucket! /* Retrieve the handler function to be called for this directive * from the functions registered in the hash table. * Need to lower case the directive for proper matching. Also need * to have it NULL terminated (and include the NULL in the length) * for proper hash matching. if ((
rv != 0) && (
rv !=
1)) {
"unknown directive \"%s\" in parsed doc %s",
/* This chunk of code starts at the first bucket in the chain * of tag buckets (assuming that by this point the bucket for * the STARTING_SEQUENCE has been split) and loops through to * the end of the tag buckets freeing them all. * Remember that some part of this may have been set aside * into the ssi_tag_brigade and the remainder (possibly as * little as one byte) will be in the current brigade. * The value of dptr should have been set during the * PARSE_TAIL state to the first bucket after the * The value of content_head may have been set during processing * of the directive. If so, the content was inserted in front * of the dptr bucket. The inserted buckets should not be thrown * away here, but they should also not be parsed later. /* Don't reset the flags or the nesting level!!! */ /* If I am in the middle of parsing an SSI tag then I need to set aside * the pertinent trailing buckets and pass on the initial part of the * brigade. The pertinent parts of the next brigades will be added to * these set aside buckets to form the whole tag and will be processed * once the whole tag has been found. /* Inside a false conditional (if, elif, else), so toss it all... */ /* Otherwise pass it along... * No SSI tags in this brigade... */ "Invalid mod_include state during file %s", r->
filename);
else {
/* Entire brigade is middle chunk of SSI tag... */ else {
/* End of brigade contains part of SSI tag... */ /* Set aside tag, pass pre-tag... */ return "XBitHack must be set to Off, On, or Full";
/* Assure the platform supports Group protections */ /* Kludge --- for nested includes, we want to keep the subprocess * environment of the base document (for compatibility); that means * torquing our own last_modified date as well so that the * LAST_MODIFIED variable gets reset to the proper value if the * nested document resets <!--#config timefmt-->. * We also insist that the memory for this subrequest not be * destroyed, that's dealt with in handle_include(). /* we're not a nested include, so we create an initial /* XXX: this is bogus, at some point we're going to do a subrequest, * and when we do it we're going to be subjecting code that doesn't * expect to be signal-ready to SIGALRM. There is no clean way to * fix this, except to put alarm support into BUFF. -djg /* Always unset the content-length. There is no way to know if * the content will be modified at some point by send_parsed_content. * It is very possible for us to not find any content in the first * 9k of the file, but still have to modify the content of the file. * If we are going to pass the file through send_parsed_content, then * the content-length should just be unset. * We don't know if we are going to be including a file or executing * a program which may change the Last-Modified header or make the * content completely dynamic. Therefore, we can't support these /* signify that the sub request should not be killed */ /* compile the pattern used by find_start_sequence */ * Module definition and configuration data structs... "a strftime(3) formatted string"),
/* OS/2 dosen't currently support the xbithack. This is being worked on. */ /* We always return declined, because the default handler will actually * serve the file. All we have to do is add the filter. NULL,
/* dir merger --- default is to override */ NULL,
/* server config */ NULL,
/* merge server config */