mod_xml2enc.c revision d29745cf3b7bc83a2b98a70d42ec5c18bc04c208
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq/* Copyright (c) 2007-11, WebThing Ltd
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq * Copyright (c) 2011-, The Apache Software Foundation
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq *
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq * Licensed to the Apache Software Foundation (ASF) under one or more
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq * contributor license agreements. See the NOTICE file distributed with
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq * this work for additional information regarding copyright ownership.
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq * The ASF licenses this file to You under the Apache License, Version 2.0
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq * (the "License"); you may not use this file except in compliance with
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq * the License. You may obtain a copy of the License at
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq *
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq * http://www.apache.org/licenses/LICENSE-2.0
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq *
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq * Unless required by applicable law or agreed to in writing, software
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq * distributed under the License is distributed on an "AS IS" BASIS,
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq * See the License for the specific language governing permissions and
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq * limitations under the License.
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq#if defined(WIN32)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq#define XML2ENC_DECLARE_EXPORT
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq#endif
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq#include <ctype.h>
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq/* libxml2 */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq#include <libxml/encoding.h>
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq#include "http_protocol.h"
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq#include "http_config.h"
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq#include "http_log.h"
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq#include "apr_strings.h"
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq#include "apr_xlate.h"
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq#include "apr_optional.h"
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq#include "mod_xml2enc.h"
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niqmodule AP_MODULE_DECLARE_DATA xml2enc_module;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq#define BUFLEN 8192
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq#define BUF_MIN 4096
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq#define APR_BRIGADE_DO(b,bb) for (b = APR_BRIGADE_FIRST(bb); \
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq b != APR_BRIGADE_SENTINEL(bb); \
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq b = APR_BUCKET_NEXT(b))
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq#define ENC_INITIALISED 0x100
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq#define ENC_SEEN_EOS 0x200
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq#define ENC_SKIPTO ENCIO_SKIPTO
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq#define HAVE_ENCODING(enc) \
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq (((enc)!=XML_CHAR_ENCODING_NONE)&&((enc)!=XML_CHAR_ENCODING_ERROR))
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
7307da1208ba7db743a5911cfccd2a549f1d3e34sf/*
7307da1208ba7db743a5911cfccd2a549f1d3e34sf * XXX: Check all those ap_assert()s ans replace those that should not happen
7307da1208ba7db743a5911cfccd2a549f1d3e34sf * XXX: with AP_DEBUG_ASSERT and those that may happen with proper error
7307da1208ba7db743a5911cfccd2a549f1d3e34sf * XXX: handling.
7307da1208ba7db743a5911cfccd2a549f1d3e34sf */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niqtypedef struct {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq xmlCharEncoding xml2enc;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq char* buf;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq apr_size_t bytes;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq apr_xlate_t* convset;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq unsigned int flags;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq apr_off_t bblen;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq apr_bucket_brigade* bbnext;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq apr_bucket_brigade* bbsave;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq const char* encoding;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq} xml2ctx;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niqtypedef struct {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq const char* default_charset;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq xmlCharEncoding default_encoding;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq apr_array_header_t* skipto;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq} xml2cfg;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niqtypedef struct {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq const char* val;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq} tattr;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niqstatic ap_regex_t* seek_meta_ctype;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niqstatic ap_regex_t* seek_charset;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niqstatic apr_status_t xml2enc_filter(request_rec* r, const char* enc,
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq unsigned int mode)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq{
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* set up a ready-initialised ctx to convert to enc, and insert filter */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq apr_xlate_t* convset;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq apr_status_t rv;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq unsigned int flags = (mode ^ ENCIO);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if ((mode & ENCIO) == ENCIO_OUTPUT) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq rv = apr_xlate_open(&convset, enc, "UTF-8", r->pool);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq flags |= ENC_INITIALISED;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq else if ((mode & ENCIO) == ENCIO_INPUT) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq rv = apr_xlate_open(&convset, "UTF-8", enc, r->pool);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq flags |= ENC_INITIALISED;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq else if ((mode & ENCIO) == ENCIO_INPUT_CHECKS) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq convset = NULL;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq rv = APR_SUCCESS; /* we'll initialise later by sniffing */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq else {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq rv = APR_EGENERAL;
185aa71728867671e105178b4c66fbc22b65ae26sf ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01426)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq "xml2enc: bad mode %x", mode);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (rv == APR_SUCCESS) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq xml2ctx* ctx = apr_pcalloc(r->pool, sizeof(xml2ctx));
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->flags = flags;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (flags & ENC_INITIALISED) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->convset = convset;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->bblen = BUFLEN;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->buf = apr_palloc(r->pool, (apr_size_t)ctx->bblen);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ap_add_output_filter("xml2enc", ctx, r, r->connection);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq else {
185aa71728867671e105178b4c66fbc22b65ae26sf ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, APLOGNO(01427)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq "xml2enc: Charset %s not supported.", enc) ;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq return rv;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq}
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq/* This needs to operate only when we're using htmlParser */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq/* Different modules may apply different rules here. Ho, hum. */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niqstatic void fix_skipto(request_rec* r, xml2ctx* ctx)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq{
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq apr_status_t rv;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq xml2cfg* cfg = ap_get_module_config(r->per_dir_config, &xml2enc_module);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if ((cfg->skipto != NULL) && (ctx->flags | ENC_SKIPTO)) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq int found = 0;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq char* p = ap_strchr(ctx->buf, '<');
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq tattr* starts = (tattr*) cfg->skipto->elts;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq while (!found && p && *p) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq int i;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq for (i = 0; i < cfg->skipto->nelts; ++i) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (!strncasecmp(p+1, starts[i].val, strlen(starts[i].val))) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* found a starting element. Strip all that comes before. */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq apr_bucket* b;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq apr_bucket* bstart;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq rv = apr_brigade_partition(ctx->bbsave, (p-ctx->buf),
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq &bstart);
7307da1208ba7db743a5911cfccd2a549f1d3e34sf ap_assert(rv == APR_SUCCESS);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq while (b = APR_BRIGADE_FIRST(ctx->bbsave), b != bstart) {
d29745cf3b7bc83a2b98a70d42ec5c18bc04c208jailletc apr_bucket_delete(b);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->bytes -= (p-ctx->buf);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->buf = p ;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq found = 1;
185aa71728867671e105178b4c66fbc22b65ae26sf ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01428)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq "Skipped to first <%s> element",
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq starts[i].val) ;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq break;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq p = ap_strchr(p+1, '<');
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (p == NULL) {
185aa71728867671e105178b4c66fbc22b65ae26sf ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, r, APLOGNO(01429)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq "Failed to find start of recognised HTML!");
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq}
f4d3a92b319b23e2b8d67298acc289d52bc1c517niqstatic void sniff_encoding(request_rec* r, xml2ctx* ctx)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq{
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq xml2cfg* cfg = NULL; /* initialise to shut compiler warnings up */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq char* p ;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq apr_bucket* cutb;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq apr_bucket* cute;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq apr_bucket* b;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ap_regmatch_t match[2] ;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq apr_status_t rv;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq const char* ctype = r->content_type;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (ctype) {
185aa71728867671e105178b4c66fbc22b65ae26sf ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01430)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq "Content-Type is %s", ctype) ;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* If we've got it in the HTTP headers, there's nothing to do */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (ctype && (p = ap_strcasestr(ctype, "charset=") , p != NULL)) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq p += 8 ;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (ctx->encoding = apr_pstrndup(r->pool, p, strcspn(p, " ;") ),
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->encoding) {
185aa71728867671e105178b4c66fbc22b65ae26sf ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, APLOGNO(01431)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq "Got charset %s from HTTP headers", ctx->encoding) ;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->xml2enc = xmlParseCharEncoding(ctx->encoding);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* to sniff, first we look for BOM */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (ctx->xml2enc == XML_CHAR_ENCODING_NONE) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->xml2enc = xmlDetectCharEncoding((const xmlChar*)ctx->buf,
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->bytes);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (HAVE_ENCODING(ctx->xml2enc)) {
185aa71728867671e105178b4c66fbc22b65ae26sf ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, APLOGNO(01432)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq "Got charset from XML rules.") ;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->encoding = xmlGetCharEncodingName(ctx->xml2enc);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* If none of the above, look for a META-thingey */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* also we're probably about to invalidate it, so we remove it. */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (ap_regexec(seek_meta_ctype, ctx->buf, 1, match, 0) == 0 ) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* get markers on the start and end of the match */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq rv = apr_brigade_partition(ctx->bbsave, match[0].rm_eo, &cute);
7307da1208ba7db743a5911cfccd2a549f1d3e34sf ap_assert(rv == APR_SUCCESS);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq rv = apr_brigade_partition(ctx->bbsave, match[0].rm_so, &cutb);
7307da1208ba7db743a5911cfccd2a549f1d3e34sf ap_assert(rv == APR_SUCCESS);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* now set length of useful buf for start-of-data hooks */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->bytes = match[0].rm_so;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (ctx->encoding == NULL) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq p = apr_pstrndup(r->pool, ctx->buf + match[0].rm_so,
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq match[0].rm_eo - match[0].rm_so) ;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (ap_regexec(seek_charset, p, 2, match, 0) == 0) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (ctx->encoding = apr_pstrndup(r->pool, p+match[1].rm_so,
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq match[1].rm_eo - match[1].rm_so),
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->encoding) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->xml2enc = xmlParseCharEncoding(ctx->encoding);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (HAVE_ENCODING(ctx->xml2enc))
185aa71728867671e105178b4c66fbc22b65ae26sf ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, APLOGNO(01433)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq "Got charset %s from HTML META", ctx->encoding) ;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* cut out the <meta> we're invalidating */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq while (cutb != cute) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq b = APR_BUCKET_NEXT(cutb);
d29745cf3b7bc83a2b98a70d42ec5c18bc04c208jailletc apr_bucket_delete(cutb);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq cutb = b;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* and leave a string */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->buf[ctx->bytes] = 0;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* either it's set to something we found or it's still the default */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* Aaargh! libxml2 has undocumented <META-crap> support. So this fails
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq * if metafix is not active. Have to make it conditional.
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq *
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq * No, that means no-metafix breaks things. Deal immediately with
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq * this particular instance of metafix.
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (!HAVE_ENCODING(ctx->xml2enc)) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq cfg = ap_get_module_config(r->per_dir_config, &xml2enc_module);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (!ctx->encoding) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->encoding = cfg->default_charset?cfg->default_charset:"ISO-8859-1";
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* Unsupported charset. Can we get (iconv) support through apr_xlate? */
185aa71728867671e105178b4c66fbc22b65ae26sf ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01434)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq "Charset %s not supported by libxml2; trying apr_xlate",
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->encoding);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (apr_xlate_open(&ctx->convset, "UTF-8", ctx->encoding, r->pool)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq == APR_SUCCESS) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->xml2enc = XML_CHAR_ENCODING_UTF8 ;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq } else {
185aa71728867671e105178b4c66fbc22b65ae26sf ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01435)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq "Charset %s not supported. Consider aliasing it?",
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->encoding) ;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (!HAVE_ENCODING(ctx->xml2enc)) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* Use configuration default as a last resort */
185aa71728867671e105178b4c66fbc22b65ae26sf ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, r, APLOGNO(01436)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq "No usable charset information; using configuration default");
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->xml2enc = (cfg->default_encoding == XML_CHAR_ENCODING_NONE)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ? XML_CHAR_ENCODING_8859_1 : cfg->default_encoding ;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (ctype && ctx->encoding) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (ap_regexec(seek_charset, ctype, 2, match, 0)) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq r->content_type = apr_pstrcat(r->pool, ctype, ";charset=utf-8",
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq NULL);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq } else {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq char* str = apr_palloc(r->pool, strlen(r->content_type) + 13
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq - (match[0].rm_eo - match[0].rm_so) + 1);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq memcpy(str, r->content_type, match[1].rm_so);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq memcpy(str + match[1].rm_so, "utf-8", 5);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq strcpy(str + match[1].rm_so + 5, r->content_type+match[1].rm_eo);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq r->content_type = str;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq}
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niqstatic apr_status_t xml2enc_filter_init(ap_filter_t* f)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq{
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq xml2ctx* ctx;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (!f->ctx) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq xml2cfg* cfg = ap_get_module_config(f->r->per_dir_config,
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq &xml2enc_module);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq f->ctx = ctx = apr_pcalloc(f->r->pool, sizeof(xml2ctx));
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->xml2enc = XML_CHAR_ENCODING_NONE;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (cfg->skipto != NULL) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->flags |= ENC_SKIPTO;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq return APR_SUCCESS;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq}
f4d3a92b319b23e2b8d67298acc289d52bc1c517niqstatic apr_status_t xml2enc_ffunc(ap_filter_t* f, apr_bucket_brigade* bb)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq{
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq xml2ctx* ctx = f->ctx;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq apr_status_t rv;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq apr_bucket* b;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq apr_bucket* bstart;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq apr_size_t insz = 0;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq char *ctype;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq char *p;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (!ctx || !f->r->content_type) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* log error about configuring this */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ap_remove_output_filter(f);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq return ap_pass_brigade(f->next, bb) ;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctype = apr_pstrdup(f->r->pool, f->r->content_type);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq for (p = ctype; *p; ++p)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (isupper(*p))
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq *p = tolower(*p);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* only act if starts-with "text/" or contains "xml" */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (strncmp(ctype, "text/", 5) && !strstr(ctype, "xml")) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ap_remove_output_filter(f);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq return ap_pass_brigade(f->next, bb) ;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (ctx->bbsave == NULL) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->bbsave = apr_brigade_create(f->r->pool,
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq f->r->connection->bucket_alloc);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* append to any data left over from last time */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq APR_BRIGADE_CONCAT(ctx->bbsave, bb);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (!(ctx->flags & ENC_INITIALISED)) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* some kind of initialisation required */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* Turn all this off when post-processing */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* if we don't have enough data to sniff but more's to come, wait */
7307da1208ba7db743a5911cfccd2a549f1d3e34sf apr_brigade_length(ctx->bbsave, 0, &ctx->bblen);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if ((ctx->bblen < BUF_MIN) && (ctx->bblen != -1)) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq APR_BRIGADE_DO(b, ctx->bbsave) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (APR_BUCKET_IS_EOS(b)) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->flags |= ENC_SEEN_EOS;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq break;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (!(ctx->flags & ENC_SEEN_EOS)) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* not enough data to sniff. Wait for more */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq APR_BRIGADE_DO(b, ctx->bbsave) {
7307da1208ba7db743a5911cfccd2a549f1d3e34sf rv = apr_bucket_setaside(b, f->r->pool);
7307da1208ba7db743a5911cfccd2a549f1d3e34sf ap_assert(rv == APR_SUCCESS);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq return APR_SUCCESS;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (ctx->bblen == -1) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->bblen = BUFLEN-1;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* flatten it into a NULL-terminated string */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->buf = apr_palloc(f->r->pool, (apr_size_t)(ctx->bblen+1));
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->bytes = (apr_size_t)ctx->bblen;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq rv = apr_brigade_flatten(ctx->bbsave, ctx->buf, &ctx->bytes);
7307da1208ba7db743a5911cfccd2a549f1d3e34sf ap_assert(rv == APR_SUCCESS);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->buf[ctx->bytes] = 0;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq sniff_encoding(f->r, ctx);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* FIXME: hook here for rewriting start-of-data? */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* nah, we only have one action here - call it inline */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq fix_skipto(f->r, ctx);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
7f40ab64e74b7d1057b5ee6abc349e32e74b1b4cjim /* we might change the Content-Length, so let's force its re-calculation */
7f40ab64e74b7d1057b5ee6abc349e32e74b1b4cjim apr_table_unset(f->r->headers_out, "Content-Length");
7f40ab64e74b7d1057b5ee6abc349e32e74b1b4cjim
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* consume the data we just sniffed */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* we need to omit any <meta> we just invalidated */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->flags |= ENC_INITIALISED;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ap_set_module_config(f->r->request_config, &xml2enc_module, ctx);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (ctx->bbnext == NULL) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->bbnext = apr_brigade_create(f->r->pool,
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq f->r->connection->bucket_alloc);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (!ctx->convset) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq rv = ap_pass_brigade(f->next, ctx->bbsave);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq apr_brigade_cleanup(ctx->bbsave);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ap_remove_output_filter(f);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq return rv;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* move the data back to bb */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq APR_BRIGADE_CONCAT(bb, ctx->bbsave);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq while (b = APR_BRIGADE_FIRST(bb), b != APR_BRIGADE_SENTINEL(bb)) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->bytes = 0;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (APR_BUCKET_IS_METADATA(b)) {
e6a70e704cf27a66177a1e8979ae05bc65385a0esf APR_BUCKET_REMOVE(b);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (APR_BUCKET_IS_EOS(b)) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* send remaining data */
e6a70e704cf27a66177a1e8979ae05bc65385a0esf APR_BRIGADE_INSERT_TAIL(ctx->bbnext, b);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq return ap_fflush(f->next, ctx->bbnext);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq } else if (APR_BUCKET_IS_FLUSH(b)) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ap_fflush(f->next, ctx->bbnext);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq apr_bucket_destroy(b);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq else { /* data bucket */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq char* buf;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq apr_size_t bytes = 0;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq char fixbuf[BUFLEN];
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq apr_bucket* bdestroy = NULL;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (insz > 0) { /* we have dangling data. Flatten it. */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq buf = fixbuf;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq bytes = BUFLEN;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq rv = apr_brigade_flatten(bb, buf, &bytes);
7307da1208ba7db743a5911cfccd2a549f1d3e34sf ap_assert(rv == APR_SUCCESS);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (bytes == insz) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* this is only what we've already tried to convert.
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq * The brigade is exhausted.
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq * Save remaining data for next time round
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
185aa71728867671e105178b4c66fbc22b65ae26sf ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, APLOGNO(01437)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq "xml2enc: Setting aside %" APR_SIZE_T_FMT
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq " unconverted bytes", bytes);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq rv = ap_fflush(f->next, ctx->bbnext);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq APR_BRIGADE_CONCAT(ctx->bbsave, bb);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq APR_BRIGADE_DO(b, ctx->bbsave) {
7307da1208ba7db743a5911cfccd2a549f1d3e34sf ap_assert(apr_bucket_setaside(b, f->r->pool)
7307da1208ba7db743a5911cfccd2a549f1d3e34sf == APR_SUCCESS);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq return rv;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* remove the data we've just read */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq rv = apr_brigade_partition(bb, bytes, &bstart);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq while (b = APR_BRIGADE_FIRST(bb), b != bstart) {
d29745cf3b7bc83a2b98a70d42ec5c18bc04c208jailletc apr_bucket_delete(b);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
185aa71728867671e105178b4c66fbc22b65ae26sf ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, APLOGNO(01438)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq "xml2enc: consuming %" APR_SIZE_T_FMT
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq " bytes flattened", bytes);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq else {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq rv = apr_bucket_read(b, (const char**)&buf, &bytes,
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq APR_BLOCK_READ);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq APR_BUCKET_REMOVE(b);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq bdestroy = b; /* can't destroy until finished with the data */
185aa71728867671e105178b4c66fbc22b65ae26sf ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, APLOGNO(01439)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq "xml2enc: consuming %" APR_SIZE_T_FMT
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq " bytes from bucket", bytes);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* OK, we've got some input we can use in [buf,bytes] */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (rv == APR_SUCCESS) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq apr_size_t consumed;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq xml2enc_run_preprocess(f, &buf, &bytes);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq consumed = insz = bytes;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq while (insz > 0) {
7307da1208ba7db743a5911cfccd2a549f1d3e34sf apr_status_t rv2;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (ctx->bytes == ctx->bblen) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* nothing was converted last time!
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq * break out of this loop!
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq b = apr_bucket_transient_create(buf+(bytes - insz), insz,
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq bb->bucket_alloc);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq APR_BRIGADE_INSERT_HEAD(bb, b);
185aa71728867671e105178b4c66fbc22b65ae26sf ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, APLOGNO(01440)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq "xml2enc: reinserting %" APR_SIZE_T_FMT
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq " unconsumed bytes from bucket", insz);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq break;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->bytes = (apr_size_t)ctx->bblen;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq rv = apr_xlate_conv_buffer(ctx->convset, buf+(bytes - insz),
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq &insz, ctx->buf, &ctx->bytes);
185aa71728867671e105178b4c66fbc22b65ae26sf ap_log_rerror(APLOG_MARK, APLOG_DEBUG, rv, f->r, APLOGNO(01441)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq "xml2enc: converted %" APR_SIZE_T_FMT
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq "/%" APR_OFF_T_FMT " bytes", consumed - insz,
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->bblen - ctx->bytes);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq consumed = insz;
7307da1208ba7db743a5911cfccd2a549f1d3e34sf rv2 = ap_fwrite(f->next, ctx->bbnext, ctx->buf,
7307da1208ba7db743a5911cfccd2a549f1d3e34sf (apr_size_t)ctx->bblen - ctx->bytes);
7307da1208ba7db743a5911cfccd2a549f1d3e34sf if (rv2 != APR_SUCCESS) {
185aa71728867671e105178b4c66fbc22b65ae26sf ap_log_rerror(APLOG_MARK, APLOG_DEBUG, rv2, f->r, APLOGNO(01442)
7307da1208ba7db743a5911cfccd2a549f1d3e34sf "ap_fwrite failed");
7307da1208ba7db743a5911cfccd2a549f1d3e34sf return rv2;
7307da1208ba7db743a5911cfccd2a549f1d3e34sf }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq switch (rv) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq case APR_SUCCESS:
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq continue;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq case APR_EINCOMPLETE:
185aa71728867671e105178b4c66fbc22b65ae26sf ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, APLOGNO(01443)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq "INCOMPLETE");
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq continue; /* If outbuf too small, go round again.
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq * If it was inbuf, we'll break out when
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq * we test ctx->bytes == ctx->bblen
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq */
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq case APR_EINVAL: /* try skipping one bad byte */
185aa71728867671e105178b4c66fbc22b65ae26sf ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r, APLOGNO(01444)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq "Skipping invalid byte(s) in input stream!");
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq --insz;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq continue;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq default:
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq /* Erk! What's this?
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq * Bail out, flush, and hope to eat the buf raw
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq */
185aa71728867671e105178b4c66fbc22b65ae26sf ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, f->r, APLOGNO(01445)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq "Failed to convert input; trying it raw") ;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ctx->convset = NULL;
7307da1208ba7db743a5911cfccd2a549f1d3e34sf rv = ap_fflush(f->next, ctx->bbnext);
7307da1208ba7db743a5911cfccd2a549f1d3e34sf if (rv != APR_SUCCESS)
185aa71728867671e105178b4c66fbc22b65ae26sf ap_log_rerror(APLOG_MARK, APLOG_DEBUG, rv, f->r, APLOGNO(01446)
7307da1208ba7db743a5911cfccd2a549f1d3e34sf "ap_fflush failed");
7307da1208ba7db743a5911cfccd2a549f1d3e34sf else
7307da1208ba7db743a5911cfccd2a549f1d3e34sf rv = ap_pass_brigade(f->next, ctx->bbnext);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq } else {
185aa71728867671e105178b4c66fbc22b65ae26sf ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, f->r, APLOGNO(01447)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq "xml2enc: error reading data") ;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
7307da1208ba7db743a5911cfccd2a549f1d3e34sf if (bdestroy)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq apr_bucket_destroy(bdestroy);
7307da1208ba7db743a5911cfccd2a549f1d3e34sf if (rv != APR_SUCCESS)
7307da1208ba7db743a5911cfccd2a549f1d3e34sf return rv;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq return APR_SUCCESS;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq}
f4d3a92b319b23e2b8d67298acc289d52bc1c517niqstatic apr_status_t xml2enc_charset(request_rec* r, xmlCharEncoding* encp,
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq const char** encoding)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq{
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq xml2ctx* ctx = ap_get_module_config(r->request_config, &xml2enc_module);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (!ctx || !(ctx->flags & ENC_INITIALISED)) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq return APR_EAGAIN;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq *encp = ctx->xml2enc;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq *encoding = ctx->encoding;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq return HAVE_ENCODING(ctx->xml2enc) ? APR_SUCCESS : APR_EGENERAL;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq}
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq#define PROTO_FLAGS AP_FILTER_PROTO_CHANGE|AP_FILTER_PROTO_CHANGE_LENGTH
f4d3a92b319b23e2b8d67298acc289d52bc1c517niqstatic void xml2enc_hooks(apr_pool_t* pool)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq{
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ap_register_output_filter_protocol("xml2enc", xml2enc_ffunc,
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq xml2enc_filter_init,
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq AP_FTYPE_RESOURCE, PROTO_FLAGS);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq APR_REGISTER_OPTIONAL_FN(xml2enc_filter);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq APR_REGISTER_OPTIONAL_FN(xml2enc_charset);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq seek_meta_ctype = ap_pregcomp(pool,
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq "(<meta[^>]*http-equiv[ \t\r\n='\"]*content-type[^>]*>)",
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq AP_REG_EXTENDED|AP_REG_ICASE) ;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq seek_charset = ap_pregcomp(pool, "charset=([A-Za-z0-9_-]+)",
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq AP_REG_EXTENDED|AP_REG_ICASE) ;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq}
f4d3a92b319b23e2b8d67298acc289d52bc1c517niqstatic const char* set_alias(cmd_parms* cmd, void* CFG,
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq const char* charset, const char* alias)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq{
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq const char* errmsg = ap_check_cmd_context(cmd, GLOBAL_ONLY);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (errmsg != NULL)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq return errmsg ;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq else if (xmlAddEncodingAlias(charset, alias) == 0)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq return NULL;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq else
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq return "Error setting charset alias";
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq}
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niqstatic const char* set_default(cmd_parms* cmd, void* CFG, const char* charset)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq{
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq xml2cfg* cfg = CFG;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq cfg->default_charset = charset;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq cfg->default_encoding = xmlParseCharEncoding(charset);
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq switch(cfg->default_encoding) {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq case XML_CHAR_ENCODING_NONE:
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq return "Default charset not found";
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq case XML_CHAR_ENCODING_ERROR:
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq return "Invalid or unsupported default charset";
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq default:
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq return NULL;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq}
f4d3a92b319b23e2b8d67298acc289d52bc1c517niqstatic const char* set_skipto(cmd_parms* cmd, void* CFG, const char* arg)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq{
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq tattr* attr;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq xml2cfg* cfg = CFG;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq if (cfg->skipto == NULL)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq cfg->skipto = apr_array_make(cmd->pool, 4, sizeof(tattr));
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq attr = apr_array_push(cfg->skipto) ;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq attr->val = arg;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq return NULL;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq}
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niqstatic const command_rec xml2enc_cmds[] = {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq AP_INIT_TAKE1("xml2EncDefault", set_default, NULL, OR_ALL,
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq "Usage: xml2EncDefault charset"),
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq AP_INIT_ITERATE2("xml2EncAlias", set_alias, NULL, RSRC_CONF,
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq "EncodingAlias charset alias [more aliases]"),
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq AP_INIT_ITERATE("xml2StartParse", set_skipto, NULL, OR_ALL,
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq "Ignore anything in front of the first of these elements"),
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq { NULL }
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq};
f4d3a92b319b23e2b8d67298acc289d52bc1c517niqstatic void* xml2enc_config(apr_pool_t* pool, char* x)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq{
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq xml2cfg* ret = apr_pcalloc(pool, sizeof(xml2cfg));
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ret->default_encoding = XML_CHAR_ENCODING_NONE ;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq return ret;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq}
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq
f4d3a92b319b23e2b8d67298acc289d52bc1c517niqstatic void* xml2enc_merge(apr_pool_t* pool, void* BASE, void* ADD)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq{
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq xml2cfg* base = BASE;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq xml2cfg* add = ADD;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq xml2cfg* ret = apr_pcalloc(pool, sizeof(xml2cfg));
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ret->default_encoding = (add->default_encoding == XML_CHAR_ENCODING_NONE)
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ? base->default_encoding : add->default_encoding ;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ret->default_charset = add->default_charset
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ? add->default_charset : base->default_charset;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq ret->skipto = add->skipto ? add->skipto : base->skipto;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq return ret;
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq}
c95fba8e1c76d7a16b372b4386efbac8eb4c832dsf
c95fba8e1c76d7a16b372b4386efbac8eb4c832dsfAP_DECLARE_MODULE(xml2enc) = {
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq STANDARD20_MODULE_STUFF,
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq xml2enc_config,
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq xml2enc_merge,
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq NULL,
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq NULL,
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq xml2enc_cmds,
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq xml2enc_hooks
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq};
c95fba8e1c76d7a16b372b4386efbac8eb4c832dsf
f4d3a92b319b23e2b8d67298acc289d52bc1c517niqAPR_IMPLEMENT_OPTIONAL_HOOK_RUN_ALL(xml2enc, XML2ENC, int, preprocess,
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq (ap_filter_t *f, char** bufp, apr_size_t* bytesp),
f4d3a92b319b23e2b8d67298acc289d52bc1c517niq (f, bufp, bytesp), OK, DECLINED)