/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <nfs/nfs4_clnt.h>
/*
* Recovery kstats
*/
typedef struct rkstat {
} rkstat_t;
{ "badhandle", KSTAT_DATA_ULONG },
{ "badowner", KSTAT_DATA_ULONG },
{ "clientid", KSTAT_DATA_ULONG },
{ "dead_file", KSTAT_DATA_ULONG },
{ "delay", KSTAT_DATA_ULONG },
{ "fail_relock", KSTAT_DATA_ULONG },
{ "file_diff", KSTAT_DATA_ULONG },
{ "no_grace", KSTAT_DATA_ULONG },
{ "not_responding", KSTAT_DATA_ULONG },
{ "opens_changed", KSTAT_DATA_ULONG },
{ "siglost", KSTAT_DATA_ULONG },
{ "unexp_action", KSTAT_DATA_ULONG },
{ "unexp_errno", KSTAT_DATA_ULONG },
{ "unexp_status", KSTAT_DATA_ULONG },
{ "wrongsec", KSTAT_DATA_ULONG },
{ "bad_op", KSTAT_DATA_ULONG },
};
/* maximum number of messages allowed on the mi's mi_msg_list */
/*
* Sets the appropiate fields of "ep", given "id" and various parameters.
* re_type and mount point info, which are already set.
*/
static void
{
int len;
switch (id) {
case RE_BAD_SEQID:
else
break;
case RE_BADHANDLE:
else
break;
case RE_CLIENTID:
/* the error we failed with */
break;
case RE_DEAD_FILE:
else
/* why the file got killed */
if (why) {
} else
break;
case RE_END:
/* first rnode */
else
/* second rnode */
else
break;
case RE_FAIL_RELOCK:
/* error on fail relock */
/* process that failed */
/* nfs4 error */
else
break;
case RE_FAIL_REMAP_LEN:
/* length of returned filehandle */
break;
case RE_FAIL_REMAP_OP:
break;
case RE_FAILOVER:
/* server we're failing over to (if not picking original) */
} else {
}
break;
case RE_FILE_DIFF:
else
break;
case RE_LOST_STATE:
else
else
break;
case RE_OPENS_CHANGED:
/* original number of open files */
/* new number of open files */
break;
case RE_SIGLOST:
case RE_SIGLOST_NO_DUMP:
else
break;
case RE_START:
else
else
break;
case RE_UNEXPECTED_ACTION:
case RE_UNEXPECTED_ERRNO:
/* the error that is unexpected */
break;
case RE_UNEXPECTED_STATUS:
/* nfsstat4 error */
break;
case RE_WRONGSEC:
/* the error we failed with */
else
else
break;
case RE_LOST_STATE_BAD_OP:
break;
case RE_REFERRAL:
/* server we're being referred to */
} else {
}
break;
default:
break;
}
}
/*
* Sets the appropiate fields of the 'fact' for this 'id'.
*/
static void
{
switch (id) {
case RF_BADOWNER:
break;
case RF_RENEW_EXPIRED:
break;
case RF_ERR:
break;
case RF_SRV_OK:
break;
case RF_SRV_NOT_RESPOND:
break;
case RF_SRVS_OK:
break;
case RF_SRVS_NOT_RESPOND:
break;
case RF_DELMAP_CB_ERR:
else
break;
case RF_SENDQ_FULL:
break;
default:
break;
}
}
/*
* from the server; 0 otherwise.
*/
static int
{
case RE_BAD_SEQID:
case RE_BADHANDLE:
case RE_FAIL_REMAP_LEN:
case RE_FAIL_REMAP_OP:
case RE_FILE_DIFF:
case RE_START:
case RE_UNEXPECTED_ACTION:
case RE_UNEXPECTED_ERRNO:
case RE_UNEXPECTED_STATUS:
case RE_WRONGSEC:
return (1);
case RE_CLIENTID:
case RE_DEAD_FILE:
case RE_END:
case RE_FAIL_RELOCK:
case RE_FAILOVER:
case RE_LOST_STATE:
case RE_OPENS_CHANGED:
case RE_SIGLOST:
case RE_SIGLOST_NO_DUMP:
case RE_LOST_STATE_BAD_OP:
case RE_REFERRAL:
/* placeholder */
return (0);
default:
return (0);
}
} else {
case RF_BADOWNER:
case RF_ERR:
case RF_RENEW_EXPIRED:
case RF_SRV_OK:
case RF_SRVS_OK:
case RF_DELMAP_CB_ERR:
return (1);
case RF_SRV_NOT_RESPOND:
case RF_SRVS_NOT_RESPOND:
case RF_SENDQ_FULL:
return (0);
default:
return (0);
}
}
}
/*
* Iterate backwards through the mi's mi_msg_list to find the earliest
* message that we should find relevant facts to investigate.
*/
static nfs4_debug_msg_t *
{
if (mi->mi_lease_period > 0)
else
while (cur_msg &&
/* find where we lost contact with the server */
while (cur_msg) {
break;
}
/*
* Find the first successful message before
* we lost contact with the server.
*/
if (cur_msg) {
cur_msg);
}
}
/*
* If we're not at the dummy head pointer,
* set the oldest and current message.
*/
if (cur_msg) {
}
} else
}
return (oldest_msg);
}
/*
* Returns 1 if facts have been found; 0 otherwise.
*/
static int
mntinfo4_t *mi)
{
int found_a_fact = 0;
int len;
/* find the oldest msg to search backwards to */
/*
* Create a fact sheet by searching from our current message
* backwards to the 'oldest_msg', recording facts along the way
* until we found facts that have been inspected by another time.
*/
continue;
}
/*
* If this fact has already been looked at, then so
* have all preceding facts. Return Now.
*/
return (found_a_fact);
found_a_fact = 1;
case RF_BADOWNER:
break;
case RF_ERR:
/*
* Don't want to overwrite a fact that was
* previously found during our current search.
*/
break;
case RF_RENEW_EXPIRED:
}
break;
case RF_SRV_OK:
break;
case RF_SRV_NOT_RESPOND:
/*
* Okay to overwrite this fact as
* we want the earliest time.
*/
break;
case RF_SRVS_OK:
break;
case RF_SRVS_NOT_RESPOND:
break;
case RF_DELMAP_CB_ERR:
break;
case RF_SENDQ_FULL:
break;
default:
break;
}
}
return (found_a_fact);
}
/*
* Returns 1 if this fact is identical to the last fact recorded
* (only checks for a match within the last 2 lease periods).
*/
static int
mntinfo4_t *mi)
{
if (mi->mi_lease_period > 0)
else
while (cur_msg &&
continue;
}
return (0);
/* now actually compare the facts */
return (0);
return (0);
return (0);
return (0);
return (0);
return (0);
return (0);
return (0);
len) != 0)
return (0);
return (0);
}
return (0);
len) != 0)
return (0);
return (0);
}
return (0);
return (0);
return (0);
}
return (1);
}
return (0);
}
/*
* Returns 1 if these two messages are identical; 0 otherwise.
*/
static int
mntinfo4_t *mi)
{
int len;
/* find the last event, bypassing all facts */
if (!cur_msg)
return (0);
return (0);
return (0);
/*
* Since we zalloc the buffer, then the two nfs4_debug_msg's
* must match up even if all the fields weren't filled in
* the first place.
*/
return (0);
return (0);
return (0);
return (0);
return (0);
return (0);
return (0);
return (0);
return (0);
return (0);
return (0);
return (0);
return (0);
}
return (0);
return (0);
return (0);
}
return (0);
return (0);
return (0);
}
return (0);
return (0);
return (0);
}
return (1);
}
/*
* Free up a recovery event.
*/
static void
{
int len;
}
}
}
/*
* Free up a recovery fact.
*/
static void
{
int len;
}
}
/*
* Free up the message.
*/
void
{
int len;
else
}
}
/* free up the data structure itself */
}
/*
* Prints out the interesting facts for recovery events:
* -DEAD_FILE
* -SIGLOST(_NO_DUMP)
*/
static void
{
char *mount_pt;
int len;
return;
"!NFS4 FACT SHEET: %s%s %s%s %s %s%s%s %s%s",
"",
}
if (mount_pt) {
}
/* free the fact struct itself */
if (fp)
}
/*
* The last argument to this fuction dictates the repeat status
* of the event. If set to 1, it means that we are dumping this
* event and it will _never_ be printed after this time. Else if
* set to 0 it will be printed again.
*/
static void
{
case RE_BAD_SEQID:
"Operation %s for file %s (rnode_pt 0x%p), pid %d using "
"seqid %d got %s. Last good seqid was %d for "
"operation %s.",
break;
case RE_BADHANDLE:
"![NFS4][Server: %s][Mntpt: %s]"
"server %s said filehandle was "
"invalid for file: %s (rnode_pt 0x%p) on mount %s",
} else {
"![NFS4][Server: %s][Mntpt: %s]"
"server %s said filehandle was "
"invalid for file: (rnode_pt 0x%p) on mount %s"
}
break;
case RE_CLIENTID:
"Can't recover clientid on mount point %s "
"(mi 0x%p) due to error %d (%s), for server %s. Marking "
"file system as unusable.",
break;
case RE_DEAD_FILE:
"File %s (rnode_pt: %p) was closed due to NFS "
break;
case RE_END:
"NFS Recovery done for mount %s (mi 0x%p) "
"on server %s, rnode_pt1 %s (0x%p), "
break;
case RE_FAIL_RELOCK:
"Couldn't reclaim lock for pid %d for "
"file %s (rnode_pt 0x%p) on (server %s): error %d",
break;
case RE_FAIL_REMAP_LEN:
"remap_lookup: server %s returned bad "
break;
case RE_FAIL_REMAP_OP:
"remap_lookup: didn't get expected OP_GETFH"
break;
case RE_FAILOVER:
"![NFS4][Server: %s][Mntpt: %s]"
else
"![NFS4][Server: %s][Mntpt: %s]"
"NFS4: failing over: selecting "
break;
case RE_FILE_DIFF:
"File %s (rnode_pt: %p) on server %s was closed "
"and failed attempted failover since its is different than "
break;
case RE_LOST_STATE:
"Lost %s request for fs %s, file %s (rnode_pt: 0x%p), "
break;
case RE_OPENS_CHANGED:
"The number of open files to reopen changed "
"for mount %s mi 0x%p (old %d, new %d) on server %s",
break;
case RE_SIGLOST:
case RE_SIGLOST_NO_DUMP:
"![NFS4][Server: %s][Mntpt: %s]"
"Process %d lost its locks on "
"file %s (rnode_pt: %p) due to NFS recovery error "
else
"![NFS4][Server: %s][Mntpt: %s]"
"Process %d lost its locks on "
"file %s (rnode_pt: %p) due to NFS recovery error "
break;
case RE_START:
"NFS Starting recovery for mount %s "
"(mi 0x%p mi_recovflags [0x%x]) on server %s, "
break;
case RE_UNEXPECTED_ACTION:
"NFS recovery: unexpected action (%s) on server %s",
break;
case RE_UNEXPECTED_ERRNO:
"NFS recovery: unexpected errno (%d) on server %s",
break;
case RE_UNEXPECTED_STATUS:
"NFS recovery: unexpected NFS status code (%s) "
break;
case RE_WRONGSEC:
"NFS can't recover from NFS4ERR_WRONGSEC."
" error %d for server %s: rnode_pt1 %s (0x%p)"
break;
case RE_LOST_STATE_BAD_OP:
"NFS lost state with unrecognized op (%d)."
" fs %s, server %s, pid %d, file %s (rnode_pt: 0x%p), "
break;
case RE_REFERRAL:
"![NFS4][Server: %s][Mntpt: %s]"
else
"![NFS4][Server: %s][Mntpt: %s]"
"NFS4: being referred from %s to unknown server",
break;
default:
break;
}
/*
* If set this event will not be printed again and is considered
* dumped.
*/
if (dump)
}
/*
*/
static void
{
case RF_BADOWNER:
"NFSMAPID_DOMAIN does not match the server: %s domain\n"
break;
case RF_ERR:
"![NFS4][Server: %s][Mntpt: %s]NFS op %s got "
"error %d causing recovery action %s.%s",
" Client also suspects that the server rebooted,"
" or experienced a network partition." : "");
else
"![NFS4][Server: %s][Mntpt: %s]NFS op %s got "
"error %s causing recovery action %s.%s",
" Client also suspects that the server rebooted,"
" or experienced a network partition." : "");
break;
case RF_RENEW_EXPIRED:
"NFS4 renew thread detected client's "
break;
case RF_SRV_NOT_RESPOND:
"NFS server %s not responding; still trying\n",
break;
case RF_SRV_OK:
break;
case RF_SRVS_NOT_RESPOND:
break;
case RF_SRVS_OK:
break;
case RF_DELMAP_CB_ERR:
"NFS op %s got error %s when executing delmap on file %s "
"(rnode_pt 0x%p).",
break;
case RF_SENDQ_FULL:
"send queue to NFS server %s is full; still trying\n",
break;
default:
}
/*
* If set this fact will not be printed again and is considered
* dumped.
*/
if (dump)
}
/*
* Returns 1 if the entire queue should be dumped, 0 otherwise.
*/
static int
{
switch (id) {
case RE_DEAD_FILE:
case RE_SIGLOST:
case RE_WRONGSEC:
case RE_CLIENTID:
return (1);
default:
return (0);
}
}
/*
* Returns 1 if the event (but not the entire queue) should be printed;
* 0 otherwise.
*/
static int
{
switch (id) {
case RE_BAD_SEQID:
case RE_BADHANDLE:
case RE_FAIL_REMAP_LEN:
case RE_FAIL_REMAP_OP:
case RE_FAILOVER:
case RE_OPENS_CHANGED:
case RE_SIGLOST_NO_DUMP:
case RE_UNEXPECTED_ACTION:
case RE_UNEXPECTED_ERRNO:
case RE_UNEXPECTED_STATUS:
case RE_LOST_STATE_BAD_OP:
case RE_REFERRAL:
return (1);
default:
return (0);
}
}
/*
* Returns 1 if the fact (but not the entire queue) should be printed;
* 0 otherwise.
*/
static int
{
switch (id) {
case RF_SRV_NOT_RESPOND:
case RF_SRV_OK:
case RF_SRVS_NOT_RESPOND:
case RF_SRVS_OK:
case RF_SENDQ_FULL:
return (1);
default:
return (0);
}
}
/*
* Update a kernel stat
*/
static void
{
if (!mi->mi_recov_ksp)
return;
case RE_BADHANDLE:
break;
case RE_CLIENTID:
break;
case RE_DEAD_FILE:
break;
case RE_FAIL_RELOCK:
break;
case RE_FILE_DIFF:
break;
case RE_OPENS_CHANGED:
break;
case RE_SIGLOST:
case RE_SIGLOST_NO_DUMP:
break;
case RE_UNEXPECTED_ACTION:
break;
case RE_UNEXPECTED_ERRNO:
break;
case RE_UNEXPECTED_STATUS:
break;
case RE_WRONGSEC:
break;
case RE_LOST_STATE_BAD_OP:
break;
default:
break;
}
case RF_BADOWNER:
break;
case RF_SRV_NOT_RESPOND:
break;
default:
break;
}
}
}
/*
* Dump the mi's mi_msg_list of recovery messages.
*/
static void
{
/* update kstats */
/*
* If we aren't supposed to dump the queue then see if we
* should just print this single message, then return.
*/
return;
}
/*
* previously written to disk.
*/
while (tmp_msg) {
}
}
}
/*
* Places the event into mi's debug recovery message queue. Some of the
* fields can be overloaded to be a generic value, depending on the event
* type. These include "count", "why".
*/
void
{
char *cur_srv;
if (vp1)
if (vp2)
/*
* information. Also place the relevent event related info.
*/
/* if this event is the same as the last event, drop it */
return;
}
/* queue the message at the end of the list */
/* remove the queue'd message at the front of the list */
} else {
mi->mi_msg_count++;
}
}
/*
* Places the fact into mi's debug recovery messages queue.
*/
void
{
char *cur_srv;
/*
* information. Also place the relevant fact related info.
*/
if (srvname)
else
/* if this fact is the same as the last fact, drop it */
return;
}
/* queue the message at the end of the list */
queue_print_fact(msg, 0);
/* remove the queue'd message at the front of the list */
} else {
mi->mi_msg_count++;
}
}
/*
* Initialize the 'mi_recov_kstat' kstat.
*/
void
{
/*
* Create the version specific kstats.
*
* PSARC 2001/697 Contract Private Interface
* All nfs kstats are under SunMC contract
* Please refer to the PSARC listed above and contact
* SunMC before making any changes!
*
* Changes must be reviewed by Solaris File Sharing
* Changes must be communicated to contract-2001-697@sun.com
*
*/
sizeof (rkstat_t) / sizeof (kstat_named_t),
"!mi_recov_kstat for mi %p failed\n",
(void *)mi);
return;
}
if (zoneid != GLOBAL_ZONEID)
}
/*
* Increment the "delay" kstat.
*/
void
{
if (!mi->mi_recov_ksp)
return;
}
/*
* Increment the "no_grace" kstat.
*/
void
{
if (!mi->mi_recov_ksp)
return;
}