readbuf.c revision 1160694128cd3980cc06abe31af529a887efd310
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* nfs log - read buffer file and return structs in usable form
*/
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <signal.h>
#include <strings.h>
#include <errno.h>
#include <syslog.h>
#include <time.h>
#include <limits.h>
#include <libintl.h>
#include <values.h>
#include <search.h>
#include <pwd.h>
#include <netdb.h>
#include <netconfig.h>
#include <netdir.h>
#include "nfslogd.h"
#define MAX_LRS_READ_AHEAD 2048
#define MAX_RECS_TO_DELAY 32768
static int nfslog_init_buf(char *, struct nfslog_buf *, int *);
static void nfslog_free_buf(struct nfslog_buf *, int);
struct nfslog_lr *);
static void insert_lrp_to_lb(struct nfslog_buf *,
struct nfslog_lr *);
static void nfslog_rewrite_bufheader(struct nfslog_buf *);
/*
* Treat the provided path name as an NFS log buffer file.
* Allocate a data structure for its handling and initialize it.
* *error contains the previous error condition encountered for
* this object. This value can be used to avoid printing the last
* error endlessly.
* It will set *error appropriately after processing.
*/
struct nfslog_buf *
{
return (NULL);
}
return (NULL);
}
return (NULL);
}
return (lbp);
}
/*
* Free the log buffer struct with all of its baggage and free the data struct
*/
void
{
}
/*
* Set up the log buffer struct; simple things are opening and locking
* the buffer file and then on to mmap()ing it for later use by the
* XDR decode path. Make sure to read the buffer header before
* returning so that we will be at the first true log record.
*
* *error contains the last error encountered on this object. It can
* be used to avoid reporting the same error endlessly. It is reset
* to the current error code on return.
*/
static int
{
/*
* set these values so that the free routine will know what to do
*/
lbp->last_record_offset = 0;
lbp->num_pr_queued = 0;
}
return (*error);
}
}
return (*error);
}
/*
* Lock the entire buffer file to prevent conflicting access.
* We get a write lock because we want only 1 process to be
* generating records from it.
*/
}
return (*error);
}
}
return (*error);
}
/* This is part of the duality of the use of either mmap()|read() */
} else {
}
/* Read the header */
"error in reading file '%s': %s"),
}
return (*error);
}
"error in reading file '%s': %s"),
}
return (*error);
}
/*
* Set the pointer to the next record based on the buffer header.
* 'lbp->bh.bh_offset' contains the offset of where to begin
* processing relative to the buffer header.
*/
/*
* If we are going to be using read() for file data, then we may
* have to adjust the current file pointer to take into account
* a starting point other than the beginning of the file.
* If mmap is being used, this is taken care of as a side effect of
* setting up the value of next_rec.
*/
/* This is a special case of setting the last_record_offset */
} else {
}
return (*error = 0);
}
/*
* Free the nfslog buffer and its associated allocations
*/
static void
{
int error;
/* work to free the offset records and rewrite header */
/* adjust the offset for the entire buffer */
}
if (close_quick)
return;
do {
}
if (close_quick)
return;
/* Take care of the queue log records first */
do {
}
/* The buffer header was decoded and needs to be freed */
}
/* get rid of the bufheader lrp */
}
/* Clean up for mmap() usage */
}
}
/* Finally close the buffer file */
gettext("Cannot unlock file %s: %s"),
}
}
}
/*
* We are reading a record from the log buffer file. Since we are reading
* an XDR stream, we first have to read the first integer to determine
* how much to read in whole for this record. Our preference is to use
* mmap() but if failed initially we will be using read(). Need to be
* careful about proper initialization of the log record both from a field
* perspective and for XDR decoding.
*/
static struct nfslog_lr *
{
unsigned int record_size;
/* Check to see if mmap worked */
/*
* EOF or other failure; we don't try to recover, just return
*/
return (NULL);
}
} else {
/* EOF check for the mmap() case */
return (NULL);
}
}
/* We have to XDR the first int so we know how much is in this record */
return (NULL);
}
/*
* Read() case - shouldn't be used very much.
* Note: The 'buffer' field is used later on
* to determine which method is being used mmap()|read()
*/
/* partial record from buffer */
"Last partial record in work buffer %s "
return (NULL);
}
return (NULL);
}
return (NULL);
}
/* partial record from buffer */
"Last partial record in work buffer %s "
return (NULL);
}
/* other initializations */
/* Keep track of the offset at which this record was read */
else
/* This is the true address of the record */
/* Here is the logic for mmap() vs. read() */
/* Setup for the 'real' XDR decode of the entire record */
/* calculate the offset for the next record */
return (lrp);
}
/*
* Simple removal of the log record from the log buffer queue.
* Make sure to manage the count of records queued.
*/
static struct nfslog_lr *
{
} else {
}
} else {
}
return (lrp);
}
/*
* Insert a log record struct on the log buffer struct. The log buffer
* has a pointer to the head of a queue of log records that have been
* read from the buffer file but have not been processed yet because
* the record id did not match the sequence desired for processing.
* The insertion must be in the 'correct'/sorted order which adds
* to the complexity of this function.
*/
static void
{
/* that was easy */
} else {
/*
* Does this lrp go before the first on the list?
* If so, do the insertion by hand since insque is not
* as flexible when queueing an element to the head of
* a list.
*/
} else {
/*
* Search the queue for the correct insertion point.
* Be careful about the insque so that the record
* ends up in the right place.
*/
do {
if (ins_rec_id <
break;
else
}
}
/* always keep track of how many we have */
}
/*
* We are rewriting the buffer header at the start of the log buffer
* for the sole purpose of resetting the bh_offset field. This is
* supposed to represent the progress that the nfslogd daemon has made
* in its processing of the log buffer file.
* 'lbp->last_record_offset' contains the absolute offset of the end
* of the last element processed. The on-disk buffer offset is relative
* to the buffer header, therefore we subtract the length of the buffer
* header from the absolute offset.
*/
static void
{
/* size big enough for buffer header encode */
#define XBUFSIZE 128
unsigned int wsize;
/*
* if version 1 buffer is large and the current offset cannot be
* represented, then don't update the offset in the buffer.
*/
/* No need to update the header - offset too big */
return;
}
/*
* build the buffer header from the original that was saved
* on initialization; note that the offset is taken from the
* last record processed (the last offset that represents
* all records processed without any holes in the processing)
*/
/*
* if version 1 buffer is large and the current offset cannot be
* represented in 32 bits, then save only the last valid offset
* in the buffer and mark the flags to indicate that.
*/
} else {
/* don't update the offset in the buffer */
"nfslog_rewrite_bufheader: %s: offset does not fit "
}
"error in re-writing buffer file %s header\n"),
return;
}
/* go to the beginning of the file */
} else {
}
}
/*
* With the provided lrp, we will take and 'insert' the range that the
* record covered in the buffer file into a list of processed ranges
* for the buffer file. These ranges represent the records processed
* but not 'marked' in the buffer header as being processed.
* This insertion process is being done for two reasons. The first is that
* we do not want to pay the performance penalty of re-writing the buffer header
* for each record that we process. The second reason is that the records
* may be processed out of order because of the unique ids. This will occur
* if the kernel has written the records to the buffer file out of order.
* The read routine will 'sort' them as the records are read.
*
* We do not want to re-write the buffer header such that a record is
* represented and being processed when it has not been. In the case
* that the nfslogd daemon restarts processing and the buffer header
* has been re-written improperly, some records could be skipped.
* We will be taking the conservative approach and only writing buffer
* header offsets when the entire offset range has been processed.
*/
static void
{
/* init the data struct as if it were the only one */
/* always add since we know we are going to insert */
/* Is this the first one? If so, take the easy way out */
} else {
/* sort on insertion... */
do {
break;
/* insert where appropriate (before the one we found */
/*
* special case where the insertion was done at the
* head of the list
*/
/*
* now that the entry is in place, we need to see if it can
* be combined with the previous or following entries.
* combination is done by adding to the length.
*/
if (prp->start_offset ==
}
}
}
prp->start_offset) {
/* adjust the offset for the entire buffer */
else
}
}
}
/*
* nfslog_get_logrecord is responsible for retrieving the next log record
* from the buffer file. This would normally be very straightforward but there
* is the added complexity of attempting to order the requests coming out of
* the buffer file. The fundamental problems is that the kernel nfs logging
* functionality does not guarantee that the records were written to the file
* in the order that the NFS server processed them. This can cause a problem
* in the fh -> pathname mapping in the case were a lookup for a file comes
* later in the buffer file than other operations on the lookup's target.
* The fh mapping database will not have an entry and will therefore not
* be able to map the fh to a name.
*
* So to solve this problem, the kernel nfs logging code tags each record
* with a monotonically increasing id and is guaranteed to be allocated
* in the order that the requests were processed. Realize however that
* this processing guarantee is essentially for one thread on one client.
* This id mechanism does not order all requests since it is only the
*
* This function will do the 'sorting' of the requests as they are
* read from the buffer file. The sorting needs to take into account
* that some ids may be missing (operations not logged but ids allocated)
* and that the id field will eventually wrap over MAXINT.
*
* Complexity to solve the fh -> pathname mapping issue.
*/
struct nfslog_lr *
{
/* figure out what the next should be if the world were perfect */
/*
* First we check the queued records on the log buffer struct
* to see if the one we want is there. The records are sorted
* on the record id during the insertions to the queue so that
* this check is easy.
*/
/* Does the first record match ? */
} else {
/*
* Here we are checking for wrap of the record id
* since it is an unsigned in. The idea is that
* if there is a huge span between what we expect
* the queued records first.
*/
if (next_rec_id <
lbp->last_rec_id =
}
}
}
/*
* So the first queued record didn't match (or there were no queued
* records to look at). Now we go to the buffer file looking for
* the expected log record based on its id. We loop looking for
* Note that we will queue a maximum number to handle the case
* of a missing record id or a queue that is very confused. We don't
* want to consume too much memory.
*/
/* Have we queued too many for this buffer? */
break;
}
/*
* Get a record from the buffer file. If none are available,
* this is probably and EOF condition (could be a read error
* as well but that is masked. :-(). No records in the
* file means that we need to pull any queued records
* so that we don't miss any in the processing.
*/
lbp->last_rec_id =
} else {
return (NULL); /* it was really and EOF */
}
} else {
/*
* Just read a record from the buffer file and now we
* need to XDR the record header so that we can take
* a look at the record id.
*/
&lrp->log_record)) {
return (NULL);
}
/*
* If the new record is less than or matches the
* expected record id, then we return this record
*/
next_rec_id) {
lbp->last_rec_id =
} else {
/*
* This is not the one we were looking
* for; queue it for later processing
* (queueing sorts on record id)
*/
}
}
}
return (lrp);
}
/*
* Free the log record provided.
* This is complex because the associated XDR streams also need to be freed
* since allocation could have occured during the DECODE phase. The record
* header, args and results need to be XDR_FREEd. The xdr funtions will
* be provided if a free needs to be done.
*
* Note that caller tells us if the record being freed was processed.
* If so, then the buffer header should be updated. Updating the buffer
* header keeps track of where the nfslogd daemon left off in its processing
* if it is unable to complete the entire file.
*/
void
{
if (processing_complete) {
}
if (reqrec->re_rpc_arg)
if (reqrec->re_rpc_res)
}
static void
{
}
/*
* Utility function used elsewhere
*/
void
int maxoffset)
{
int i, j;
int outbufoffset = *outbufoffsetp;
if (len <= sizeof (int)) {
j++, u_buf++)
"%02x", *u_buf);
return;
}
/* More than 4 bytes, print with spaces in integer offsets */
i = 0;
if (j > 0) {
i = sizeof (int) - j;
for (; (j < sizeof (int)) && (outbufoffset < maxoffset);
j++, u_buf++)
"%02x", *u_buf);
}
/* LINTED */
i += sizeof (int), ip++) {
}
if (i < len) {
/* Last element not int */
if (i > j) /* not first element */
"%02x", *u_buf);
}
}
if (outbufoffset < maxoffset)
}