mod_unique_id.c revision 4d12805e6c18253040223ea637acd6b3b3c18f60
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* mod_unique_id.c: generate a unique identifier for each request
*
* Original author: Dean Gaudet <dgaudet@arctic.org>
* UUencoding modified by: Alvaro Martinez Echevarria <alvaro@lander.es>
*/
#define APR_WANT_BYTEFUNC /* for htons() et al */
#include "apr_want.h"
#include "apr_general.h" /* for APR_OFFSETOF */
#include "apr_network_io.h"
#include "httpd.h"
#include "http_config.h"
#include "http_log.h"
#include "http_protocol.h" /* for ap_hook_post_read_request */
#define ROOT_SIZE 10
typedef struct {
unsigned int stamp;
unsigned short counter;
unsigned int thread_index;
/* We are using thread_index (the index into the scoreboard), because we
* cannot guarantee the thread_id will be an integer.
*
* This code looks like it won't give a unique ID with the new thread logic.
* It will. The reason is, we don't increment the counter in a thread_safe
* manner. Because the thread_index is also in the unique ID now, this does
* not matter. In order for the id to not be unique, the same thread would
* have to get the same counter twice in the same second.
*/
/* Comments:
*
* We want an identifier which is unique across all hits, everywhere.
* "everywhere" includes multiple httpd instances on the same machine, or on
* multiple machines. Essentially "everywhere" should include all possible
* httpds across all servers at a particular "site". We make some assumptions
* that if the site has a cluster of machines then their time is relatively
* synchronized. We also assume that the first address returned by a
* gethostbyname (gethostname()) is unique across all the machines at the
* "site".
*
* The root is assumed to absolutely uniquely identify this one child
* from all other currently running children on all servers (including
* this physical server if it is running multiple httpds) from each
* other.
*
* The stamp and counter are used to distinguish all hits for a
* particular root. The stamp is updated using r->request_time,
* saving cpu cycles. The counter is never reset, and is used to
* permit up to 64k requests in a single second by a single child.
*
* The 144-bits of unique_id_rec are encoded using the alphabet
* [A-Za-z0-9@-], resulting in 24 bytes of printable characters. That is then
* stuffed into the environment variable UNIQUE_ID so that it is available to
* other modules. The alphabet choice differs from normal base64 encoding
* [A-Za-z0-9+/] because + and / are special characters in URLs and we want to
* make it easy to use UNIQUE_ID in URLs.
*
* Note that UNIQUE_ID should be considered an opaque token by other
* applications. No attempt should be made to dissect its internal components.
* It is an abstraction that may change in the future as the needs of this
* module change.
*
* It is highly desirable that identifiers exist for "eternity". But future
* needs (such as much faster webservers, or moving to a
* multithreaded server) may dictate a need to change the contents of
* unique_id_rec. Such a future implementation should ensure that the first
* field is still a time_t stamp. By doing that, it is possible for a site to
* have a "flag second" in which they stop all of their old-format servers,
* wait one entire second, and then start all of their new-servers. This
* procedure will ensure that the new space of identifiers is completely unique
* from the old space. (Since the first four unencoded bytes always differ.)
*
* Note: previous implementations used 32-bits of IP address plus pid
* in place of the PRNG output in the "root" field. This was
* insufficient for IPv6-only hosts, required working DNS to determine
* a unique IP address (fragile), and needed a [0, 1) second sleep
* call at startup to avoid pid reuse. Use of the PRNG avoids all
* these issues.
*/
/*
* Sun Jun 7 05:43:49 CEST 1998 -- Alvaro
* More comments:
* 1) The UUencoding prodecure is now done in a general way, avoiding the problems
* with sizes and paddings that can arise depending on the architecture. Now the
* offsets and sizes of the elements of the unique_id_rec structure are calculated
* in unique_id_global_init; and then used to duplicate the structure without the
* paddings that might exist. The multithreaded server fix should be now very easy:
* just add a new "tid" field to the unique_id_rec structure, and increase by one
* UNIQUE_ID_REC_MAX.
* 2) unique_id_rec.stamp has been changed from "time_t" to "unsigned int", because
*/
/*
* XXX: We should have a per-thread counter and not use cur_unique_id.counter
* XXX: in all threads, because this is bad for performance on multi-processor
* XXX: systems: Writing to the same address from several CPUs causes cache
* XXX: thrashing.
*/
static unique_id_rec cur_unique_id;
/*
* Number of elements in the structure unique_id_rec.
*/
#define UNIQUE_ID_REC_MAX 4
static unsigned short unique_id_rec_offset[UNIQUE_ID_REC_MAX],
static int unique_id_global_init(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp, server_rec *main_server)
{
/*
* Calculate the sizes and offsets in cur_unique_id.
*/
/*
* Calculate the size of the structure when encoded.
*/
return OK;
}
{
sizeof(cur_unique_id.root));
/*
* If we use 0 as the initial counter we have a little less protection
* against restart problems, and a little less protection against a clock
* going backwards in time.
*/
sizeof(cur_unique_id.counter));
}
/* NOTE: This is *NOT* the same encoding used by base64encode ... the last two
* characters should be + and /. But those two characters have very special
* meanings in URLs, and we want to make it easy to use identifiers in
* URLs. So we replace them with @ and -.
*/
static const char uuencoder[64] = {
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '@', '-',
};
static const char *gen_unique_id(const request_rec *r)
{
char *str;
/*
* Buffer padded with two final bytes, used to copy the unique_id_red
* structure without the internal paddings that it could have.
*/
struct {
unsigned char pad[2];
} paddedbuf;
unsigned char *x,*y;
unsigned short counter;
int i,j,k;
/* we'll use a temporal buffer to avoid uuencoding the possible internal
* paddings of the original structure */
x = (unsigned char *) &paddedbuf;
k = 0;
for (i = 0; i < UNIQUE_ID_REC_MAX; i++) {
y = ((unsigned char *) &new_unique_id) + unique_id_rec_offset[i];
for (j = 0; j < unique_id_rec_size[i]; j++, k++) {
x[k] = y[j];
}
}
/*
* We reset two more bytes just in case padding is needed for the uuencoding.
*/
x[k++] = '\0';
x[k++] = '\0';
/* alloc str and do the uuencoding */
k = 0;
for (i = 0; i < unique_id_rec_total_size; i += 3) {
y = x + i;
if (k == unique_id_rec_size_uu) break;
if (k == unique_id_rec_size_uu) break;
}
str[k++] = '\0';
/* and increment the identifier for the next call */
return str;
}
/*
* There are two ways the generation of a unique id can be triggered:
*
* - from the post_read_request hook which calls set_unique_id()
* - from error logging via the generate_log_id hook which calls
* generate_log_id(). This may happen before or after set_unique_id()
* has been called, or not at all.
*/
const char **id)
{
/* we do not care about connection ids */
if (r == NULL)
return DECLINED;
/* XXX: do we need special handling for internal redirects? */
/* if set_unique_id() has been called for this request, use it */
if (!*id)
*id = gen_unique_id(r);
return OK;
}
static int set_unique_id(request_rec *r)
{
/* copy the unique_id if this is an internal redirect (we're never
* actually called for sub requests, so we don't need to test for
* them) */
if (r->prev) {
}
if (!id) {
/* if we have a log id, it was set by our generate_log_id() function
* and we should reuse the same id
*/
}
if (!id) {
id = gen_unique_id(r);
}
/* set the environment variable */
return DECLINED;
}
static void register_hooks(apr_pool_t *p)
{
}
NULL, /* dir config creater */
NULL, /* dir merger --- default is to override */
NULL, /* server config */
NULL, /* merge server configs */
NULL, /* command apr_table_t */
register_hooks /* register hooks */
};