e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder/* Licensed to the Apache Software Foundation (ASF) under one or more
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * contributor license agreements. See the NOTICE file distributed with
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * this work for additional information regarding copyright ownership.
45ad02e03fb913ba373d8fdcfe50244be3df31eaChristian Maeder * The ASF licenses this file to You under the Apache License, Version 2.0
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * (the "License"); you may not use this file except in compliance with
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * the License. You may obtain a copy of the License at
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * http://www.apache.org/licenses/LICENSE-2.0
45ad02e03fb913ba373d8fdcfe50244be3df31eaChristian Maeder * Unless required by applicable law or agreed to in writing, software
45ad02e03fb913ba373d8fdcfe50244be3df31eaChristian Maeder * distributed under the License is distributed on an "AS IS" BASIS,
45ad02e03fb913ba373d8fdcfe50244be3df31eaChristian Maeder * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
45ad02e03fb913ba373d8fdcfe50244be3df31eaChristian Maeder * See the License for the specific language governing permissions and
45ad02e03fb913ba373d8fdcfe50244be3df31eaChristian Maeder * limitations under the License.
45ad02e03fb913ba373d8fdcfe50244be3df31eaChristian Maeder * mod_unique_id.c: generate a unique identifier for each request
45ad02e03fb913ba373d8fdcfe50244be3df31eaChristian Maeder * Original author: Dean Gaudet <dgaudet@arctic.org>
45ad02e03fb913ba373d8fdcfe50244be3df31eaChristian Maeder * UUencoding modified by: Alvaro Martinez Echevarria <alvaro@lander.es>
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder#define APR_WANT_BYTEFUNC /* for htons() et al */
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder#include "apr_general.h" /* for APR_OFFSETOF */
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder#include "http_protocol.h" /* for ap_hook_post_read_request */
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maedertypedef struct {
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder/* We are using thread_index (the index into the scoreboard), because we
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder * cannot guarantee the thread_id will be an integer.
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * This code looks like it won't give a unique ID with the new thread logic.
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder * It will. The reason is, we don't increment the counter in a thread_safe
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * manner. Because the thread_index is also in the unique ID now, this does
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * not matter. In order for the id to not be unique, the same thread would
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * have to get the same counter twice in the same second.
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * We want an identifier which is unique across all hits, everywhere.
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * "everywhere" includes multiple httpd instances on the same machine, or on
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * multiple machines. Essentially "everywhere" should include all possible
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * httpds across all servers at a particular "site". We make some assumptions
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * that if the site has a cluster of machines then their time is relatively
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder * synchronized. We also assume that the first address returned by a
45ad02e03fb913ba373d8fdcfe50244be3df31eaChristian Maeder * gethostbyname (gethostname()) is unique across all the machines at the
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder * The root is assumed to absolutely uniquely identify this one child
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * from all other currently running children on all servers (including
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * this physical server if it is running multiple httpds) from each
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * The stamp and counter are used to distinguish all hits for a
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * particular root. The stamp is updated using r->request_time,
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * saving cpu cycles. The counter is never reset, and is used to
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * permit up to 64k requests in a single second by a single child.
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * The 144-bits of unique_id_rec are encoded using the alphabet
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * [A-Za-z0-9@-], resulting in 24 bytes of printable characters. That is then
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * stuffed into the environment variable UNIQUE_ID so that it is available to
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * other modules. The alphabet choice differs from normal base64 encoding
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder * [A-Za-z0-9+/] because + and / are special characters in URLs and we want to
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder * make it easy to use UNIQUE_ID in URLs.
45ad02e03fb913ba373d8fdcfe50244be3df31eaChristian Maeder * Note that UNIQUE_ID should be considered an opaque token by other
45ad02e03fb913ba373d8fdcfe50244be3df31eaChristian Maeder * applications. No attempt should be made to dissect its internal components.
45ad02e03fb913ba373d8fdcfe50244be3df31eaChristian Maeder * It is an abstraction that may change in the future as the needs of this
45ad02e03fb913ba373d8fdcfe50244be3df31eaChristian Maeder * module change.
45ad02e03fb913ba373d8fdcfe50244be3df31eaChristian Maeder * It is highly desirable that identifiers exist for "eternity". But future
45ad02e03fb913ba373d8fdcfe50244be3df31eaChristian Maeder * needs (such as much faster webservers, or moving to a
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * multithreaded server) may dictate a need to change the contents of
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder * unique_id_rec. Such a future implementation should ensure that the first
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * field is still a time_t stamp. By doing that, it is possible for a site to
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * have a "flag second" in which they stop all of their old-format servers,
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * wait one entire second, and then start all of their new-servers. This
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder * procedure will ensure that the new space of identifiers is completely unique
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * from the old space. (Since the first four unencoded bytes always differ.)
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * Note: previous implementations used 32-bits of IP address plus pid
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder * in place of the PRNG output in the "root" field. This was
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * insufficient for IPv6-only hosts, required working DNS to determine
45ad02e03fb913ba373d8fdcfe50244be3df31eaChristian Maeder * a unique IP address (fragile), and needed a [0, 1) second sleep
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder * call at startup to avoid pid reuse. Use of the PRNG avoids all
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder * these issues.
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder * Sun Jun 7 05:43:49 CEST 1998 -- Alvaro
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * More comments:
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder * 1) The UUencoding prodecure is now done in a general way, avoiding the problems
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * with sizes and paddings that can arise depending on the architecture. Now the
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * offsets and sizes of the elements of the unique_id_rec structure are calculated
7bffb8b0e6cae4bb7ecb59b99327add6106c06b9Christian Maeder * in unique_id_global_init; and then used to duplicate the structure without the
7bffb8b0e6cae4bb7ecb59b99327add6106c06b9Christian Maeder * paddings that might exist. The multithreaded server fix should be now very easy:
f52a4838c101d52bbbd689f6b51f2c1c9202f0a8Christian Maeder * just add a new "tid" field to the unique_id_rec structure, and increase by one
7bffb8b0e6cae4bb7ecb59b99327add6106c06b9Christian Maeder * UNIQUE_ID_REC_MAX.
45ad02e03fb913ba373d8fdcfe50244be3df31eaChristian Maeder * 2) unique_id_rec.stamp has been changed from "time_t" to "unsigned int", because
45ad02e03fb913ba373d8fdcfe50244be3df31eaChristian Maeder * its size is 64bits on some platforms (linux/alpha), and this caused problems with
7bffb8b0e6cae4bb7ecb59b99327add6106c06b9Christian Maeder * htonl/ntohl. Well, this shouldn't be a problem till year 2106.
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder * XXX: We should have a per-thread counter and not use cur_unique_id.counter
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder * XXX: in all threads, because this is bad for performance on multi-processor
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder * XXX: systems: Writing to the same address from several CPUs causes cache
7bffb8b0e6cae4bb7ecb59b99327add6106c06b9Christian Maeder * XXX: thrashing.
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder * Number of elements in the structure unique_id_rec.
adda0e6252b14215228e4071b347c49b808894f8Christian Maederstatic unsigned short unique_id_rec_offset[UNIQUE_ID_REC_MAX],
adda0e6252b14215228e4071b347c49b808894f8Christian Maederstatic int unique_id_global_init(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp, server_rec *main_server)
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder * Calculate the sizes and offsets in cur_unique_id.
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder unique_id_rec_offset[0] = APR_OFFSETOF(unique_id_rec, stamp);
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder unique_id_rec_size[0] = sizeof(cur_unique_id.stamp);
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder unique_id_rec_offset[1] = APR_OFFSETOF(unique_id_rec, root);
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder unique_id_rec_size[1] = sizeof(cur_unique_id.root);
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder unique_id_rec_offset[2] = APR_OFFSETOF(unique_id_rec, counter);
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder unique_id_rec_size[2] = sizeof(cur_unique_id.counter);
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder unique_id_rec_offset[3] = APR_OFFSETOF(unique_id_rec, thread_index);
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder unique_id_rec_size[3] = sizeof(cur_unique_id.thread_index);
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder unique_id_rec_total_size = unique_id_rec_size[0] + unique_id_rec_size[1] +
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder unique_id_rec_size[2] + unique_id_rec_size[3];
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * Calculate the size of the structure when encoded.
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder unique_id_rec_size_uu = (unique_id_rec_total_size*8+5)/6;
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maederstatic void unique_id_child_init(apr_pool_t *p, server_rec *s)
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder ap_random_insecure_bytes(&cur_unique_id.root,
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * If we use 0 as the initial counter we have a little less protection
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * against restart problems, and a little less protection against a clock
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * going backwards in time.
45ad02e03fb913ba373d8fdcfe50244be3df31eaChristian Maeder ap_random_insecure_bytes(&cur_unique_id.counter,
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder/* NOTE: This is *NOT* the same encoding used by base64encode ... the last two
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * characters should be + and /. But those two characters have very special
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * meanings in URLs, and we want to make it easy to use identifiers in
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * URLs. So we replace them with @ and -.
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '@', '-',
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maederstatic const char *gen_unique_id(const request_rec *r)
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * Buffer padded with two final bytes, used to copy the unique_id_red
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * structure without the internal paddings that it could have.
45ad02e03fb913ba373d8fdcfe50244be3df31eaChristian Maeder unsigned char *x,*y;
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder memcpy(&new_unique_id.root, &cur_unique_id.root, ROOT_SIZE);
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder new_unique_id.counter = cur_unique_id.counter;
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder new_unique_id.stamp = htonl((unsigned int)apr_time_sec(r->request_time));
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder new_unique_id.thread_index = htonl((unsigned int)r->connection->id);
45ad02e03fb913ba373d8fdcfe50244be3df31eaChristian Maeder /* we'll use a temporal buffer to avoid uuencoding the possible internal
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder * paddings of the original structure */
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder x = (unsigned char *) &paddedbuf;
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder for (i = 0; i < UNIQUE_ID_REC_MAX; i++) {
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder y = ((unsigned char *) &new_unique_id) + unique_id_rec_offset[i];
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder for (j = 0; j < unique_id_rec_size[i]; j++, k++) {
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder * We reset two more bytes just in case padding is needed for the uuencoding.
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder x[k++] = '\0';
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder x[k++] = '\0';
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder /* alloc str and do the uuencoding */
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder str = (char *)apr_palloc(r->pool, unique_id_rec_size_uu + 1);
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder for (i = 0; i < unique_id_rec_total_size; i += 3) {
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder str[k++] = uuencoder[((y[0] & 0x03) << 4) | ((y[1] & 0xf0) >> 4)];
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder str[k++] = uuencoder[((y[1] & 0x0f) << 2) | ((y[2] & 0xc0) >> 6)];
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder /* and increment the identifier for the next call */
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder counter = ntohs(new_unique_id.counter) + 1;
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder * There are two ways the generation of a unique id can be triggered:
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder * - from the post_read_request hook which calls set_unique_id()
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder * - from error logging via the generate_log_id hook which calls
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder * generate_log_id(). This may happen before or after set_unique_id()
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder * has been called, or not at all.
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maederstatic int generate_log_id(const conn_rec *c, const request_rec *r,
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder const char **id)
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder /* we do not care about connection ids */
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder /* XXX: do we need special handling for internal redirects? */
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder /* if set_unique_id() has been called for this request, use it */
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder *id = apr_table_get(r->subprocess_env, "UNIQUE_ID");
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder /* copy the unique_id if this is an internal redirect (we're never
1fac054baed931dc57f0e41dd0ade39adac28c49Christian Maeder * actually called for sub requests, so we don't need to test for
7bffb8b0e6cae4bb7ecb59b99327add6106c06b9Christian Maeder id = apr_table_get(r->subprocess_env, "REDIRECT_UNIQUE_ID");
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder /* if we have a log id, it was set by our generate_log_id() function
1fac054baed931dc57f0e41dd0ade39adac28c49Christian Maeder * and we should reuse the same id
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder /* set the environment variable */
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder apr_table_setn(r->subprocess_env, "UNIQUE_ID", id);
ee31a8a5f5d786472f2b5dfb271b38e6d401fa35Christian Maeder ap_hook_post_config(unique_id_global_init, NULL, NULL, APR_HOOK_MIDDLE);
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder ap_hook_child_init(unique_id_child_init, NULL, NULL, APR_HOOK_MIDDLE);
7bffb8b0e6cae4bb7ecb59b99327add6106c06b9Christian Maeder ap_hook_post_read_request(set_unique_id, NULL, NULL, APR_HOOK_MIDDLE);
e2ca90217abd35b3d5f98bfe73ecffb34badd837Christian Maeder ap_hook_generate_log_id(generate_log_id, NULL, NULL, APR_HOOK_MIDDLE);
7bffb8b0e6cae4bb7ecb59b99327add6106c06b9Christian Maeder NULL, /* dir merger --- default is to override */