/*
** Licensed to the Apache Software Foundation (ASF) under one or more
** contributor license agreements. See the NOTICE file distributed with
** this work for additional information regarding copyright ownership.
** The ASF licenses this file to You under the Apache License, Version 2.0
** (the "License"); you may not use this file except in compliance with
** the License. You may obtain a copy of the License at
**
**
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*/
#include "apreq_util.h"
#include "apreq_error.h"
#include "apr_time.h"
#include "apr_strings.h"
#include "apr_lib.h"
#include <assert.h>
#define MIN(a,b) ( (a) < (b) ? (a) : (b) )
#define MAX(a,b) ( (a) > (b) ? (a) : (b) )
/* used for specifying file sizes */
{
apr_int64_t n = 0;
char *p;
if (s == NULL)
return 0;
n = apr_strtoi64(s, &p, 0);
if (p == NULL)
return n;
while (apr_isspace(*p))
++p;
switch (*p) {
case 'G': /* fall thru */
case 'g': return n * 1024*1024*1024;
case 'M': /* fall thru */
case 'm': return n * 1024*1024;
case 'K': /* fall thru */
case 'k': return n * 1024;
}
return n;
}
/* converts date offsets (e.g. "+3M") to seconds */
{
apr_int64_t n = 0;
char *p;
if (s == NULL)
return 0;
n = apr_strtoi64(s, &p, 0); /* XXX: what about overflow? */
if (p == NULL)
return n;
while (apr_isspace(*p))
++p;
switch (*p) {
case 'Y': /* fall thru */
case 'y': return n * 60*60*24*365;
case 'M': return n * 60*60*24*30;
case 'D': /* fall thru */
case 'd': return n * 60*60*24;
case 'H': /* fall thru */
case 'h': return n * 60*60;
case 'm': return n * 60;
case 's': /* fall thru */
default:
return n;
}
/* should never get here */
return -1;
}
const apreq_match_t type)
{
/* done if matches up to capacity of buffer */
break;
}
--len;
++hay;
}
}
{
register unsigned char digit;
#if !APR_CHARSET_EBCDIC
digit *= 16;
#else /*APR_CHARSET_EBCDIC*/
xstr[0]='0';
#endif /*APR_CHARSET_EBCDIC*/
return (digit);
}
/* Unicode notes: "bmp" refers to the 16-bit
* Unicode Basic Multilingual Plane. Here we're
* restricting our unicode internals to 16-bit
* codepoints, to keep the code as simple as possible.
* This should be sufficient for apreq itself, since
* we really only need to validate RFC3986-encoded utf8.
*/
/* Converts Windows cp1252 to Unicode. */
static APR_INLINE
{
/* We only need to deal with iso-8859-1 control chars
* in the 0x80 - 0x9F range.
*/
if ((c & 0xE0) != 0x80)
return c;
switch (c) {
case 0x80: return 0x20AC;
case 0x82: return 0x201A;
case 0x83: return 0x192;
case 0x84: return 0x201E;
case 0x85: return 0x2026;
case 0x86: return 0x2020;
case 0x87: return 0x2021;
case 0x88: return 0x2C6;
case 0x89: return 0x2030;
case 0x8A: return 0x160;
case 0x8B: return 0x2039;
case 0x8C: return 0x152;
case 0x8E: return 0x17D;
case 0x91: return 0x2018;
case 0x92: return 0x2019;
case 0x93: return 0x201C;
case 0x94: return 0x201D;
case 0x95: return 0x2022;
case 0x96: return 0x2013;
case 0x97: return 0x2014;
case 0x98: return 0x2DC;
case 0x99: return 0x2122;
case 0x9A: return 0x161;
case 0x9B: return 0x203A;
case 0x9C: return 0x153;
case 0x9E: return 0x17E;
case 0x9F: return 0x178;
}
return c;
}
/* converts cp1252 to utf8 */
{
const unsigned char *s = (unsigned const char *)src;
unsigned char *d = (unsigned char *)dest;
apr_uint16_t c;
while (s < end) {
c = cp1252_to_bmp(*s++);
if (c < 0x80) {
*d++ = c;
}
else if (c < 0x800) {
*d++ = 0xC0 | (c >> 6);
*d++ = 0x80 | (c & 0x3F);
}
else {
*d++ = 0xE0 | (c >> 12);
*d++ = 0x80 | ((c >> 6) & 0x3F);
*d++ = 0x80 | (c & 0x3F);
}
}
*d = 0;
return d - (unsigned char *)dest;
}
/**
* Valid utf8 bit patterns: (true utf8 must satisfy a minimality condition)
*
* 0aaaaaaa
* 110bbbba 10aaaaaa minimality mask: 0x1E
* 1110cccc 10cbbbba 10aaaaaa 0x0F || 0x20
* 11110ddd 10ddcccc 10cbbbba 10aaaaaa 0x07 || 0x30
* 111110ee 10eeeddd 10ddcccc 10cbbbba 10aaaaaa 0x03 || 0x38
* 1111110f 10ffffee 10eeeddd 10ddcccc 10cbbbba 10aaaaaa 0x01 || 0x3C
*
* Charset divination heuristics:
* 1) presume ascii; if not, then
* 2) presume utf8; if not, then
* 3) presume latin1; unless there are control chars, in which case
* 4) punt to cp1252.
*
* Note: in downgrading from 2 to 3, we need to be careful
* about earlier control characters presumed to be valid utf8.
*/
{
register const unsigned char *s = (const unsigned char *)src;
for (; s < end; ++s) {
if (trail) {
mask = 0;
--trail;
if ((*s & 0xE0) == 0x80) {
saw_cntrl = 1;
}
}
else {
trail = 0;
if (saw_cntrl)
return APREQ_CHARSET_CP1252;
}
}
else if (*s < 0x80) {
/* do nothing */
}
else if (*s < 0xA0) {
return APREQ_CHARSET_CP1252;
}
else if (*s < 0xC0) {
if (saw_cntrl)
return APREQ_CHARSET_CP1252;
}
else if (rv == APREQ_CHARSET_LATIN1) {
/* do nothing */
}
/* utf8 cases */
else if (*s < 0xE0) {
if (*s & 0x1E) {
trail = 1;
mask = 0;
}
else if (saw_cntrl)
return APREQ_CHARSET_CP1252;
else
}
else if (*s < 0xF0) {
trail = 2;
}
else if (*s < 0xF8) {
trail = 3;
}
else if (*s < 0xFC) {
trail = 4;
}
else if (*s < 0xFE) {
trail = 5;
}
else {
}
}
}
{
#if !APR_CHARSET_EBCDIC
digit *= 16;
digit *= 16;
digit *= 16;
#else /*APR_CHARSET_EBCDIC*/
xstr[0]='0';
#endif /*APR_CHARSET_EBCDIC*/
return (digit);
}
{
register const char *s = src;
register unsigned char *d = (unsigned char *)dest;
for (; s < end; ++d, ++s) {
switch (*s) {
case '+':
*d = ' ';
break;
case '%':
*d = hex2_to_char(s + 1);
s += 2;
}
{
if (c < 0x80) {
*d = c;
}
else if (c < 0x800) {
*d++ = 0xC0 | (c >> 6);
*d = 0x80 | (c & 0x3F);
}
else {
*d++ = 0xE0 | (c >> 12);
*d++ = 0x80 | ((c >> 6) & 0x3F);
*d = 0x80 | (c & 0x3F);
}
s += 5;
}
else {
if (s + 5 < end
&& s[1] != 'u' && s[1] != 'U'))
{
*d = 0;
return APREQ_ERROR_BADSEQ;
}
d[end - s] = 0;
return APR_INCOMPLETE;
}
break;
default:
if (*s > 0) {
*d = *s;
}
else {
*d = 0;
return APREQ_ERROR_BADCHAR;
}
}
}
*d = 0;
return APR_SUCCESS;
}
const char *s, apr_size_t slen)
{
if (s == (const char *)d) { /* optimize for src = dest case */
for ( ; d < end; ++d) {
if (*d == '%' || *d == '+')
break;
else if (*d == 0) {
*dlen = (const char *)d - s;
return APREQ_ERROR_BADCHAR;
}
}
len = (const char *)d - s;
s = (const char *)d;
}
}
{
int n = 0;
*dlen = 0;
while (n < nelts) {
case APR_SUCCESS:
d += len;
++n;
continue;
case APR_INCOMPLETE:
d += len;
if (++n == nelts) {
return status;
}
v[n].iov_base = d;
continue;
default:
return status;
}
}
return status;
}
const apr_size_t slen)
{
char *d = dest;
const unsigned char *s = (const unsigned char *)src;
unsigned char c;
c = *s;
if ( c < 0x80 && (apr_isalnum(c)
|| c == '-' || c == '.'
|| c == '_' || c == '~') )
*d++ = c;
else if ( c == ' ' )
*d++ = '+';
else {
c = apr_xlate_conv_byte(ap_hdrs_to_ascii, (unsigned char)c);
#endif
*d++ = '%';
*d++ = c2x_table[c >> 4];
*d++ = c2x_table[c & 0xf];
}
}
*d = 0;
return d - dest;
}
{
apr_size_t i;
int backslash = 0;
if (p[i] == '\\')
else if (p[i] == 0 || (p[i] == '"' && !backslash))
return 0;
else
backslash = 0;
}
return !backslash;
}
return 0;
}
const apr_size_t slen)
{
/* looks like src is already quoted */
return slen;
}
else
}
const apr_size_t slen)
{
char *d = dest;
const char *s = src;
if (slen == 0) {
*d = 0;
return 0;
}
*d++ = '"';
while (s <= last) {
switch (*s) {
case 0:
*d++ = '\\';
*d++ = '0';
s++;
break;
case '\\':
case '"':
*d++ = '\\';
default:
*d++ = *s++;
}
}
*d++ = '"';
*d = 0;
return d - dest;
}
const char *sep,
const apr_array_header_t *arr,
{
char *rv;
char *d;
int j;
if (n == 0)
return apr_pstrdup(p, "");
for (j=0, len=0; j < n; ++j)
/* Allocated the required space */
switch (mode) {
case APREQ_JOIN_ENCODE:
break;
case APREQ_JOIN_QUOTE:
break;
case APREQ_JOIN_AS_IS:
case APREQ_JOIN_DECODE:
/* nothing special required, just here to keep noisy compilers happy */
break;
}
/* Pass two --- copy the argument strings into the result space */
d = rv;
switch (mode) {
case APREQ_JOIN_ENCODE:
for (j = 1; j < n; ++j) {
d += slen;
}
break;
case APREQ_JOIN_DECODE:
return NULL;
else
d += len;
for (j = 1; j < n; ++j) {
d += slen;
return NULL;
else
d += len;
}
break;
case APREQ_JOIN_QUOTE:
for (j = 1; j < n; ++j) {
d += slen;
}
break;
case APREQ_JOIN_AS_IS:
d += a[0]->dlen;
for (j = 1; j < n ; ++j) {
d += slen;
d += a[j]->dlen;
}
break;
}
*d = 0;
return rv;
}
/*
* This is intentionally not apr_file_writev()
* note, this is iterative and not recursive
*/
{
int n;
apr_status_t s;
*bytes_written = 0;
while (1) {
/* try to write */
*bytes_written += len;
if (s != APR_SUCCESS)
return s;
/* see how far we've come */
n = 0;
#ifdef SOLARIS2
# ifdef __GNUC__
/*
* iovec.iov_len is a long here
* which causes a comparison between
* signed(long) and unsigned(apr_size_t)
*
*/
# else
/*
* Sun C however defines this as size_t which is unsigned
*
*/
# endif /* !__GNUC__ */
#else
/*
* Hopefully everything else does this
* (this was the default for years)
*/
#endif
if (n == *nelts) {
/* nothing left to write, report success */
*nelts = 0;
return APR_SUCCESS;
}
/* incomplete write: must shift v */
if (n > 0) {
/* we're satisfied for now if we can remove one iovec from
the "v" array */
(*nelts) -= n;
return APR_SUCCESS;
}
/* we're still in the first iovec - check for endless loop,
and then try again */
if (len == 0)
return APREQ_ERROR_GENERAL;
}
}
struct cleanup_data {
const char *fname;
};
{
}
/*
* The reason we need the above cleanup is because on Windows, APR_DELONCLOSE
* forces applications to open the file with FILE_SHARED_DELETE
* set, which is, unfortunately, a property that is preserved
* across NTFS "hard" links. This breaks apps that link() the temp
* file to a permanent location, and subsequently expect to open it
* before the original tempfile is closed+deleted. In fact, even
* Apache::Upload does this, so it is a common enough event that the
* apreq_file_cleanup workaround is necessary.
*/
const char *path)
{
char *tmpl;
if (rc != APR_SUCCESS)
return rc;
}
if (rc != APR_SUCCESS)
return rc;
/* cleanups are LIFO, so this one will run just after
the cleanup set by mktemp */
/* NO APR_DELONCLOSE! see comment above */
if (rc == APR_SUCCESS) {
}
else {
}
return rc;
}
/*
* is_2616_token() is the verbatim definition from section 2.2
* in the rfc itself. We try to optimize it around the
* expectation that the argument is not a token, which
* should be the typical usage.
*/
static APR_INLINE
unsigned is_2616_token(const char c) {
switch (c) {
case ' ': case ';': case ',': case '"': case '\t':
/* The chars we are expecting are listed above;
the chars below are just for completeness. */
case '?': case '=': case '@': case ':': case '\\': case '/':
case '(': case ')':
case '<': case '>':
case '{': case '}':
case '[': case ']':
return 0;
default:
if (apr_iscntrl(c))
return 0;
}
return 1;
}
apreq_header_attribute(const char *hdr,
{
const char *key, *v;
/* Must ensure first char isn't '=', so we can safely backstep. */
while (*hdr == '=')
++hdr;
v = key + 1;
--key;
--key;
while (apr_isspace(*v))
++v;
if (*v == '"') {
++v;
*val = v;
switch (*v) {
case '"':
break;
case 0:
return APREQ_ERROR_BADSEQ;
case '\\':
if (v[1] != 0)
++v;
default:
++v;
goto look_for_end_quote;
}
}
else {
*val = v;
switch (*v) {
case 0:
case ' ':
case ';':
case ',':
case '\t':
case '\r':
case '\n':
break;
default:
++v;
goto look_for_terminator;
}
}
return APR_SUCCESS;
}
hdr = v;
}
return APREQ_ERROR_NOATTR;
}
static
{
}
static
{
}
static
{
}
static
{
a->type = &apr_bucket_type_file;
return rv;
}
static
{
(*c)->type = &apr_bucket_type_file;
return rv;
}
};
{
if (BUCKET_IS_SPOOL(last))
return NULL;
}
const char *temp_dir,
{
apr_status_t s;
apr_bucket_file *f;
if (APR_BUCKET_IS_EOS(last_out))
return APR_EOF;
if (s != APR_SUCCESS)
return s;
/* This cast, when out_len = -1, is intentional */
if (s != APR_SUCCESS)
return s;
/* This cast, when in_len = -1, is intentional */
return APR_SUCCESS;
}
}
if (!BUCKET_IS_SPOOL(last_out)) {
if (s != APR_SUCCESS)
return s;
if (s != APR_SUCCESS)
return s;
}
else {
/* Need to seek here, just in case our spool bucket
* was read from between apreq_brigade_concat calls.
*/
if (s != APR_SUCCESS)
return s;
}
return APR_SUCCESS;
if (APR_BUCKET_IS_EOS(last_in))
if (s == APR_SUCCESS) {
/* We have to deal with the possibility that the new
* data may be too large to be represented by a single
* temp_file bucket.
*/
apr_bucket *e;
apr_bucket_copy(last_out, &e);
e->length = 0;
/* Copying makes the bucket types exactly the
* opposite of what we need here.
*/
e->type = &spool_bucket_type;
last_out = e;
}
if (APR_BUCKET_IS_EOS(last_in))
}
else if (APR_BUCKET_IS_EOS(last_in))
return s;
}
{
struct iovec v[APREQ_DEFAULT_NELTS];
apr_status_t s;
int n = 0;
*wlen = 0;
if (s != APR_SUCCESS)
return s;
}
e = APR_BUCKET_NEXT(e))
{
if (n == APREQ_DEFAULT_NELTS) {
s = apreq_fwritev(f, v, &n, &len);
if (s != APR_SUCCESS)
return s;
}
}
s = apr_bucket_read(e, (const char **)&(v[n].iov_base),
&len, APR_BLOCK_READ);
if (s != APR_SUCCESS)
return s;
}
while (n > 0) {
s = apreq_fwritev(f, v, &n, &len);
if (s != APR_SUCCESS)
return s;
}
}
return APR_SUCCESS;
}