modules/metadata/mod_unique_id.c

	mod_unique_id.c revision 6865813dee5d3c1ebf12dd810368171792a0190a
830N/A/* ====================================================================
830N/A * The Apache Software License, Version 1.1
830N/A *
830N/A * Copyright (c) 2000-2001 The Apache Software Foundation.  All rights
830N/A * reserved.
830N/A *
830N/A * Redistribution and use in source and binary forms, with or without
830N/A * modification, are permitted provided that the following conditions
830N/A * are met:
830N/A *
830N/A * 1. Redistributions of source code must retain the above copyright
830N/A *    notice, this list of conditions and the following disclaimer.
830N/A *
830N/A * 2. Redistributions in binary form must reproduce the above copyright
830N/A *    notice, this list of conditions and the following disclaimer in
830N/A *    the documentation and/or other materials provided with the
830N/A *    distribution.
830N/A *
830N/A * 3. The end-user documentation included with the redistribution,
830N/A *    if any, must include the following acknowledgment:
1415N/A *       "This product includes software developed by the
830N/A *        Apache Software Foundation (http://www.apache.org/)."
839N/A *    Alternately, this acknowledgment may appear in the software itself,
839N/A *    if and wherever such third-party acknowledgments normally appear.
830N/A *
830N/A * 4. The names "Apache" and "Apache Software Foundation" must
1300N/A *    not be used to endorse or promote products derived from this
1258N/A *    software without prior written permission. For written
830N/A *    permission, please contact apache@apache.org.
830N/A *
830N/A * 5. Products derived from this software may not be called "Apache",
830N/A *    nor may "Apache" appear in their name, without prior written
830N/A *    permission of the Apache Software Foundation.
830N/A *
830N/A * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
830N/A * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
830N/A * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1244N/A * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
1244N/A * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
830N/A * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1244N/A * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
1244N/A * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
844N/A * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
844N/A * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
830N/A * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
830N/A * SUCH DAMAGE.
830N/A * ====================================================================
830N/A *
830N/A * This software consists of voluntary contributions made by many
830N/A * individuals on behalf of the Apache Software Foundation.  For more
830N/A * information on the Apache Software Foundation, please see
830N/A * <http://www.apache.org/>.
830N/A *
830N/A * Portions of this software are based upon public domain software
830N/A * originally written at the National Center for Supercomputing Applications,
830N/A * University of Illinois, Urbana-Champaign.
830N/A */
830N/A
844N/A/*
844N/A * mod_unique_id.c: generate a unique identifier for each request
830N/A *
902N/A * Original author: Dean Gaudet <dgaudet@arctic.org>
902N/A * UUencoding modified by: Alvaro Martinez Echevarria <alvaro@lander.es>
854N/A */
854N/A
830N/A#include "apr_general.h"    /* for APR_XtOffsetOf                */
830N/A#include "apr_network_io.h"
830N/A
902N/A#include "httpd.h"
854N/A#include "http_config.h"
854N/A#include "http_log.h"
854N/A#include "http_protocol.h"  /* for ap_hook_post_read_request */
854N/A
854N/A#if APR_HAVE_UNISTD_H
854N/A#include <unistd.h>         /* for getpid() */
865N/A#endif
1058N/A
1240N/Atypedef struct {
1300N/A    unsigned int stamp;
1444N/A    unsigned int in_addr;
1444N/A    unsigned int pid;
854N/A    unsigned short counter;
902N/A    unsigned int thread_index;
902N/A} unique_id_rec;
902N/A
830N/A/* We are using thread_index (the index into the scoreboard), because we
830N/A * cannot guarantee the thread_id will be an integer.
830N/A *
830N/A * This code looks like it won't give a unique ID with the new thread logic.
830N/A * It will.  The reason is, we don't increment the counter in a thread_safe
830N/A * manner.  Because the thread_index is also in the unique ID now, this does
830N/A * not matter.  In order for the id to not be unique, the same thread would
830N/A * have to get the same counter twice in the same second.
830N/A */
830N/A
830N/A/* Comments:
830N/A *
830N/A * We want an identifier which is unique across all hits, everywhere.
830N/A * "everywhere" includes multiple httpd instances on the same machine, or on
830N/A * multiple machines.  Essentially "everywhere" should include all possible
902N/A * httpds across all servers at a particular "site".  We make some assumptions
902N/A * that if the site has a cluster of machines then their time is relatively
902N/A * synchronized.  We also assume that the first address returned by a
830N/A * gethostbyname (gethostname()) is unique across all the machines at the
902N/A * "site".
830N/A *
830N/A * We also further assume that pids fit in 32-bits.  If something uses more
830N/A * than 32-bits, the fix is trivial, but it requires the unrolled uuencoding
830N/A * loop to be extended.  * A similar fix is needed to support multithreaded
902N/A * servers, using a pid/tid combo.
830N/A *
830N/A * Together, the in_addr and pid are assumed to absolutely uniquely identify
830N/A * this one child from all other currently running children on all servers
830N/A * (including this physical server if it is running multiple httpds) from each
830N/A * other.
830N/A *
830N/A * The stamp and counter are used to distinguish all hits for a particular
830N/A * (in_addr,pid) pair.  The stamp is updated using r->request_time,
830N/A * saving cpu cycles.  The counter is never reset, and is used to permit up to
830N/A * 64k requests in a single second by a single child.
830N/A *
830N/A * The 112-bits of unique_id_rec are encoded using the alphabet
830N/A * [A-Za-z0-9@-], resulting in 19 bytes of printable characters.  That is then
830N/A * stuffed into the environment variable UNIQUE_ID so that it is available to
830N/A * other modules.  The alphabet choice differs from normal base64 encoding
830N/A * [A-Za-z0-9+/] because + and / are special characters in URLs and we want to
990N/A * make it easy to use UNIQUE_ID in URLs.
990N/A *
990N/A * Note that UNIQUE_ID should be considered an opaque token by other
990N/A * applications.  No attempt should be made to dissect its internal components.
990N/A * It is an abstraction that may change in the future as the needs of this
990N/A * module change.
990N/A *
990N/A * It is highly desirable that identifiers exist for "eternity".  But future
990N/A * needs (such as much faster webservers, moving to 64-bit pids, or moving to a
990N/A * multithreaded server) may dictate a need to change the contents of
990N/A * unique_id_rec.  Such a future implementation should ensure that the first
990N/A * field is still a time_t stamp.  By doing that, it is possible for a site to
990N/A * have a "flag second" in which they stop all of their old-format servers,
990N/A * wait one entire second, and then start all of their new-servers.  This
990N/A * procedure will ensure that the new space of identifiers is completely unique
990N/A * from the old space.  (Since the first four unencoded bytes always differ.)
990N/A */
830N/A/*
990N/A * Sun Jun  7 05:43:49 CEST 1998 -- Alvaro
990N/A * More comments:
830N/A * 1) The UUencoding prodecure is now done in a general way, avoiding the problems
830N/A * with sizes and paddings that can arise depending on the architecture. Now the
830N/A * offsets and sizes of the elements of the unique_id_rec structure are calculated
830N/A * in unique_id_global_init; and then used to duplicate the structure without the
830N/A * paddings that might exist. The multithreaded server fix should be now very easy:
830N/A * just add a new "tid" field to the unique_id_rec structure, and increase by one
830N/A * UNIQUE_ID_REC_MAX.
830N/A * 2) unique_id_rec.stamp has been changed from "time_t" to "unsigned int", because
830N/A * its size is 64bits on some platforms (linux/alpha), and this caused problems with
830N/A * htonl/ntohl. Well, this shouldn't be a problem till year 2106.
830N/A */
830N/A
830N/Astatic unsigned global_in_addr;
830N/A
830N/Astatic unique_id_rec cur_unique_id;
830N/A
/*
 * Number of elements in the structure unique_id_rec.
 */
#define UNIQUE_ID_REC_MAX 5

static unsigned short unique_id_rec_offset[UNIQUE_ID_REC_MAX],
                      unique_id_rec_size[UNIQUE_ID_REC_MAX],
                      unique_id_rec_total_size,
                      unique_id_rec_size_uu;

static void unique_id_global_init(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp, server_rec *main_server)
{
    char str[APRMAXHOSTLEN + 1];
    apr_short_interval_time_t pause;
    apr_status_t rv;
    char *ipaddrstr;
    apr_sockaddr_t *sockaddr;

    /*
     * Calculate the sizes and offsets in cur_unique_id.
     */
    unique_id_rec_offset[0] = APR_XtOffsetOf(unique_id_rec, stamp);
    unique_id_rec_size[0] = sizeof(cur_unique_id.stamp);
    unique_id_rec_offset[1] = APR_XtOffsetOf(unique_id_rec, in_addr);
    unique_id_rec_size[1] = sizeof(cur_unique_id.in_addr);
    unique_id_rec_offset[2] = APR_XtOffsetOf(unique_id_rec, pid);
    unique_id_rec_size[2] = sizeof(cur_unique_id.pid);
    unique_id_rec_offset[3] = APR_XtOffsetOf(unique_id_rec, counter);
    unique_id_rec_size[3] = sizeof(cur_unique_id.counter);
    unique_id_rec_offset[4] = APR_XtOffsetOf(unique_id_rec, thread_index);
    unique_id_rec_size[4] = sizeof(cur_unique_id.thread_index);
    unique_id_rec_total_size = unique_id_rec_size[0] + unique_id_rec_size[1] +
                               unique_id_rec_size[2] + unique_id_rec_size[3] +
                               unique_id_rec_size[4];

    /*
     * Calculate the size of the structure when encoded.
     */
    unique_id_rec_size_uu = (unique_id_rec_total_size*8+5)/6;

    /*
     * Now get the global in_addr.  Note that it is not sufficient to use one
     * of the addresses from the main_server, since those aren't as likely to
     * be unique as the physical address of the machine
     */
    if ((rv = apr_gethostname(str, sizeof(str) - 1, p)) != APR_SUCCESS) {
        ap_log_error(APLOG_MARK, APLOG_ALERT, rv, main_server,
          "mod_unique_id: unable to find hostname of the server");
        exit(1);
    }

    /* XXX theoretically there are boxes out there which want to use
     *     mod_unique_id but which have no IPv4 address...  send in a patch :)
     */
    if ((rv = apr_sockaddr_info_get(&sockaddr, str, AF_INET, 0, 0, p)) != APR_SUCCESS) {
        ap_log_error(APLOG_MARK, APLOG_ALERT, rv, main_server,
                    "mod_unique_id: unable to find IPv4 address of \"%s\"", str);
        exit(1);
    }

    global_in_addr = sockaddr->sa.sin.sin_addr.s_addr;

    apr_sockaddr_ip_get(&ipaddrstr, sockaddr);
    ap_log_error(APLOG_MARK, APLOG_NOERRNO|APLOG_INFO, 0, main_server,
                "mod_unique_id: using ip addr %s",
                 ipaddrstr);

    /*
     * If the server is pummelled with restart requests we could possibly end
     * up in a situation where we're starting again during the same second
     * that has been used in previous identifiers.  Avoid that situation.
     *
     * In truth, for this to actually happen not only would it have to restart
     * in the same second, but it would have to somehow get the same pids as
     * one of the other servers that was running in that second. Which would
     * mean a 64k wraparound on pids ... not very likely at all.
     *
     * But protecting against it is relatively cheap.  We just sleep into the
     * next second.
     */
    pause = (apr_short_interval_time_t)(1000000 - (apr_time_now() % APR_USEC_PER_SEC));
    apr_sleep(pause);
}

static void unique_id_child_init(apr_pool_t *p, server_rec *s)
{
    pid_t pid;
    apr_time_t tv;

    /*
     * Note that we use the pid because it's possible that on the same
     * physical machine there are multiple servers (i.e. using Listen). But
     * it's guaranteed that none of them will share the same pids between
     * children.
     *
     * XXX: for multithread this needs to use a pid/tid combo and probably
     * needs to be expanded to 32 bits
     */
    pid = getpid();
    cur_unique_id.pid = pid;

    /*
     * Test our assumption that the pid is 32-bits.  It's possible that
     * 64-bit machines will declare pid_t to be 64 bits but only use 32
     * of them.  It would have been really nice to test this during
     * global_init ... but oh well.
     */
    if ((pid_t)cur_unique_id.pid != pid) {
        ap_log_error(APLOG_MARK, APLOG_NOERRNO|APLOG_CRIT, 0, s,
                    "oh no! pids are greater than 32-bits!  I'm broken!");
    }

    cur_unique_id.in_addr = global_in_addr;

    /*
     * If we use 0 as the initial counter we have a little less protection
     * against restart problems, and a little less protection against a clock
     * going backwards in time.
     */
    tv = apr_time_now();
    /* Some systems have very low variance on the low end of their system
     * counter, defend against that.
     */
    cur_unique_id.counter = (unsigned short)(tv % APR_USEC_PER_SEC / 10);

    /*
     * We must always use network ordering for these bytes, so that
     * identifiers are comparable between machines of different byte
     * orderings.  Note in_addr is already in network order.
     */
    cur_unique_id.pid = htonl(cur_unique_id.pid);
    cur_unique_id.counter = htons(cur_unique_id.counter);
}

/* NOTE: This is *NOT* the same encoding used by base64encode ... the last two
 * characters should be + and /.  But those two characters have very special
 * meanings in URLs, and we want to make it easy to use identifiers in
 * URLs.  So we replace them with @ and -.
 */
static const char uuencoder[64] = {
    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
    'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
    'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
    'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '@', '-',
};

static int gen_unique_id(request_rec *r)
{
    char *str;
    /*
     * Buffer padded with two final bytes, used to copy the unique_id_red
     * structure without the internal paddings that it could have.
     */
    unique_id_rec new_unique_id;
    struct {
    unique_id_rec foo;
    unsigned char pad[2];
    } paddedbuf;
    unsigned char *x,*y;
    unsigned short counter;
    const char *e;
    int i,j,k;

    /* copy the unique_id if this is an internal redirect (we're never
     * actually called for sub requests, so we don't need to test for
     * them) */
    if (r->prev && (e = apr_table_get(r->subprocess_env, "REDIRECT_UNIQUE_ID"))) {
    apr_table_setn(r->subprocess_env, "UNIQUE_ID", e);
    return DECLINED;
    }

    new_unique_id.in_addr = cur_unique_id.in_addr;
    new_unique_id.pid = cur_unique_id.pid;
    new_unique_id.counter = cur_unique_id.counter;

    new_unique_id.stamp = htonl((unsigned int)r->request_time);
    new_unique_id.thread_index = htonl((unsigned int)r->connection->id);

    /* we'll use a temporal buffer to avoid uuencoding the possible internal
     * paddings of the original structure */
    x = (unsigned char *) &paddedbuf;
    y = (unsigned char *) &new_unique_id;
    k = 0;
    for (i = 0; i < UNIQUE_ID_REC_MAX; i++) {
        y = ((unsigned char *) &new_unique_id) + unique_id_rec_offset[i];
        for (j = 0; j < unique_id_rec_size[i]; j++, k++) {
            x[k] = y[j];
        }
    }
    /*
     * We reset two more bytes just in case padding is needed for the uuencoding.
     */
    x[k++] = '\0';
    x[k++] = '\0';

    /* alloc str and do the uuencoding */
    str = (char *)apr_palloc(r->pool, unique_id_rec_size_uu + 1);
    k = 0;
    for (i = 0; i < unique_id_rec_total_size; i += 3) {
        y = x + i;
        str[k++] = uuencoder[y[0] >> 2];
        str[k++] = uuencoder[((y[0] & 0x03) << 4) | ((y[1] & 0xf0) >> 4)];
        if (k == unique_id_rec_size_uu) break;
        str[k++] = uuencoder[((y[1] & 0x0f) << 2) | ((y[2] & 0xc0) >> 6)];
        if (k == unique_id_rec_size_uu) break;
        str[k++] = uuencoder[y[2] & 0x3f];
    }
    str[k++] = '\0';

    /* set the environment variable */
    apr_table_setn(r->subprocess_env, "UNIQUE_ID", str);

    /* and increment the identifier for the next call */

    counter = ntohs(new_unique_id.counter) + 1;
    cur_unique_id.counter = htons(counter);

    return DECLINED;
}

static void register_hooks(apr_pool_t *p)
{
    ap_hook_post_config(unique_id_global_init, NULL, NULL, APR_HOOK_MIDDLE);
    ap_hook_child_init(unique_id_child_init, NULL, NULL, APR_HOOK_MIDDLE);
    ap_hook_post_read_request(gen_unique_id, NULL, NULL, APR_HOOK_MIDDLE);
}

module AP_MODULE_DECLARE_DATA unique_id_module = {
    STANDARD20_MODULE_STUFF,
    NULL,                       /* dir config creater */
    NULL,                       /* dir merger --- default is to override */
    NULL,                       /* server config */
    NULL,                       /* merge server configs */
    NULL,                       /* command apr_table_t */
    register_hooks              /* register hooks */
};