nskernd.c revision 570de38f63910201fdd77246630b7aa8f9dc5661
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/resource.h>
#include <sys/priocntl.h>
#include <sys/rtpriocntl.h>
#include <sys/tspriocntl.h>
#include <strings.h>
#include <thread.h>
#include <stdlib.h>
#include <signal.h>
#include <errno.h>
#include <stdio.h>
#include <fcntl.h>
#include <locale.h>
#include <unistd.h>
#include <syslog.h>
#include <nsctl.h>
/*
* Define a minimal user stack size in bytes over and above the
* libthread THR_STACK_MIN minimum value.
*
* This stack size needs to be sufficient to run _newlwp() and then
* ioctl() down into the kernel.
*/
#define NSK_STACK_SIZE 512
/*
* LWP scheduling control switches.
*
* allow_pri - set to non-zero to enable priocntl() manipulations of
* created LWPs.
* allow_rt - set to non-zero to use the RT rather than the TS
* scheduling class when manipulating the schduling
* parameters for an LWP. Only used if allow_pri is
* non-zero.
*/
static int allow_pri = 1;
static int allow_rt = 0; /* disallow - bad interactions with timeout() */
static int nsctl_fd = -1;
static int sigterm;
static int nthreads; /* number of threads in the kernel */
static int exiting; /* shutdown in progress flag */
static int cl_nodeid = -1;
static int display_msg = 0;
static int delay_time = 30;
static void
usage(void)
{
exit(255);
}
static void
{
sigterm++;
}
}
/*
* Returns: 1 - can enter kernel; 0 - shutdown in progress, do not enter kernel
*/
int
nthread_inc(void)
{
(void) mutex_lock(&thr_mutex);
if (exiting) {
/* cannot enter kernel as nskernd is being shutdown - exit */
(void) mutex_unlock(&thr_mutex);
return (0);
}
nthreads++;
(void) mutex_unlock(&thr_mutex);
return (1);
}
void
nthread_dec(void)
{
(void) mutex_lock(&thr_mutex);
nthreads--;
(void) mutex_unlock(&thr_mutex);
}
/*
* returns: 1 - can shutdown; 0 - unable to shutdown
*/
int
canshutdown(void)
{
int rc = 1;
(void) mutex_lock(&thr_mutex);
if (nthreads > 0) {
if (display_msg) {
gettext("nskernd: unable to shutdown: "
"%d kernel threads in use\n"), nthreads);
}
start_delay = time(0);
(void) mutex_unlock(&thr_mutex);
(void) sleep(1);
(void) mutex_lock(&thr_mutex);
gettext("nskernd: delay shutdown: "
"%d kernel threads in use\n"), nthreads);
}
if (nthreads > 0) {
rc = 0;
} else {
exiting = 1;
}
} else {
/* flag shutdown in progress */
exiting = 1;
}
(void) mutex_unlock(&thr_mutex);
return (rc);
}
/*
* returns: 1 - shutdown successful; 0 - unable to shutdown
*/
int
shutdown(void)
{
int rc;
if (nsctl_fd < 0)
return (1);
if (!canshutdown()) {
return (0);
}
if (rc < 0) {
gettext("nskernd: NSKERND_STOP failed\n"));
}
}
return (1);
}
/*
* First function run by a NSKERND_NEWLWP thread.
*
* Determines if it needs to change the scheduling priority of the LWP,
* and then calls back into the kernel.
*/
static void *
{
/* copy arguments onto stack and free heap memory */
/* increase the scheduling priority of this LWP */
"nskernd: priocntl(PC_GETCID) failed: %s\n"),
goto pri_done;
}
if (allow_rt) {
(pri_t)0; /* minimum RT priority */
} else {
}
PC_SETPARMS, (char *)&pcparms) < 0) {
"nskernd: priocntl(PC_SETPARMS) failed: %s\n"),
}
}
if (nthread_inc()) {
nthread_dec();
}
return (NULL);
}
/*
* Start a new thread bound to an LWP.
*
* This is the user level side of nsc_create_process().
*/
static void
{
int rc;
if (!nskp) {
#ifdef DEBUG
sizeof (*nskp));
#endif
return;
}
/* copy args for child */
if (rc != 0) {
/* thr_create failed */
#ifdef DEBUG
gettext("nskernd: thr_create failed: %s\n"),
#endif
} else {
/* success - _newlwp() will free nskp */
}
}
static int
{
char key[CFG_MAX_KEY];
char buf[CFG_MAX_BUF];
char newflags[CFG_MAX_BUF];
char outbuf[CFG_MAX_BUF];
int setlen;
int rc = 0;
} else {
return (EINVAL);
}
(void) mutex_lock(&cfg_mutex);
if (!cfg) {
(void) mutex_unlock(&cfg_mutex);
return (ENXIO);
}
(void) mutex_unlock(&cfg_mutex);
if (pid == -1) {
"nskernd: Error forking\n"));
return (errno);
} else if (pid > 0) {
"nskernd: Attempting deferred bitmap error\n"));
return (0);
}
(void) mutex_lock(&cfg_mutex);
if (!cfg) {
(void) mutex_unlock(&cfg_mutex);
"nskernd: Failed cfg_open, deferred bitmap\n"));
return (ENXIO);
}
/* Sooner or later, this lock will be free */
(void) sleep(2);
}
/* find the proper set number */
break;
}
found = 1;
break;
}
}
if (found) {
/* were there flags in the options field already? */
while (opt) {
strlen(NSKERN_II_BMP_OPTION)) != 0) {
}
}
}
} else {
(void) cfg_commit(cfg);
rc = 0;
}
} else {
"nskernd: Failed deferred bitmap [%s]\n"), set);
}
(void) mutex_unlock(&cfg_mutex);
/*
* if we are the fork'ed client, just exit, if parent just return
*/
if (pid == 0) {
/*NOTREACHED*/
} else {
return (rc);
}
}
/*
* First function run by a NSKERND_LOCK thread.
*
* Opens dscfg and locks it,
* and then calls back into the kernel.
*
* Incoming:
* data1 is the kernel address of the sync structure.
* data2 is read(0)/write(1) lock mode.
*
* Returns:
* data1 as incoming.
* data2 errno.
*/
static void *
{
int locked;
int mode;
int rc = 0;
/* copy arguments onto stack and free heap memory */
(void) mutex_lock(&cfg_mutex);
#ifdef DEBUG
gettext("nskernd: cfg_open failed: %s\n"),
#endif
}
mode = CFG_RDLOCK;
} else {
mode = CFG_WRLOCK;
}
locked = 0;
if (rc == 0) {
locked = 1;
} else {
#ifdef DEBUG
gettext("nskernd: cfg_lock failed: %s\n"),
#endif
}
}
/* return to kernel */
if (nthread_inc()) {
nthread_dec();
}
/* cleanup */
if (locked) {
locked = 0;
}
}
(void) mutex_unlock(&cfg_mutex);
return (NULL);
}
/*
* Inter-node lock thread.
*
* This is the user level side of nsc_rmlock().
*/
static void
{
int rc;
/* create a new thread to do the lock and return to kernel */
if (!nskp) {
#ifdef DEBUG
gettext("nskernd:dolock: malloc(%d) failed\n"),
sizeof (*nskp));
#endif
return;
}
/* copy args for child */
if (rc != 0) {
/* thr_create failed */
#ifdef DEBUG
gettext("nskernd: thr_create failed: %s\n"),
#endif
} else {
/* success - _dolock() will free nskp */
}
}
/*
* Convenience code for engineering test of multi-terabyte volumes.
*
* zvol (part of zfs) does not support DKIOCPARTITION but does use EFI
* labels. This code allocates a simple efi label structure and ioctls
* to extract the size of a zvol. It only handles the minimal EFI ioctl
* implementation in zvol.
*/
static void
{
int fd = -1;
int rc;
return;
return;
}
return;
}
if (rc >= 0) {
}
}
/* ARGSUSED */
static void
{
struct nscioc_bsize bsize;
#ifdef DKIOCPARTITION
struct partition64 p64;
#endif
int fd;
*partitionp = -1;
if (fd < 0)
return;
/* assume part# is ok and just the size failed */
#ifdef DKIOCPARTITION
/* see if this is an EFI label */
} else {
/* see if this is a zvol */
} else {
}
}
#endif /* DKIOCPARTITION */
}
return;
}
return;
return;
return;
}
static int
iscluster(void)
{
/*
* Find out if we are running in a cluster
*/
cl_nodeid = cfg_iscluster();
if (cl_nodeid > 0) {
return (TRUE);
} else if (cl_nodeid == 0) {
return (FALSE);
}
gettext("nskernd: unable to ascertain environment"));
exit(1);
/* NOTREACHED */
}
/*
* Runtime Solaris release checking - build release == runtime release
* is always considered success, so only keep entries in the map for
* the special cases.
*/
static nsc_release_t nskernd_rel_map[] = {
/* { "5.10", "5.10" }, */
{ "5.11", "5.10" },
};
#ifdef lint
#define main nskernd_main
#endif
/* ARGSUSED1 */
int
{
const char *dir = "/";
int partition;
char *reqd;
int syncpipe[2];
int startup;
(void) textdomain("nskernd");
if (rc < 0) {
gettext("nskernd: unable to determine the current "
exit(1);
gettext("nskernd: incorrect Solaris release "
"(requires %s)\n"), reqd);
exit(1);
}
rc = 0;
if (argc != 1)
usage();
/*
* Usage: <progname> [-g] [-d <seconds to delay>]
*/
switch (i) {
case 'g':
display_msg = 1;
break;
case 'd':
if (delay_time <= 0) {
delay_time = 30;
}
break;
default:
"Usage: nskernd [-g] [-d <seconds to delay>]");
exit(1);
break;
}
}
exit(1);
}
exit(1);
}
/*
* Determine if we are in a Sun Cluster or not, before fork'ing
*/
(void) iscluster();
/*
* create a pipe to synchronise the parent with the
* child just before it enters its service loop.
*/
gettext("nskernd: cannot create pipe: %s\n"),
exit(1);
}
/*
* Fork off a child that becomes the daemon.
*/
char c;
int n;
/*
* wait for the close of the pipe.
* If we get a char back, indicates good
* status from child, so exit 0.
* If we get a zero length read, then the
* child has failed, so we do too.
*/
exit((n <= 0) ? 1 : 0);
} else if (rc < 0) {
exit(1);
}
/*
* In child - become daemon.
*/
/* use closefrom(3C) from PSARC/2000/193 when possible */
for (i = 0; i < syncpipe[1]; i++) {
(void) close(i);
}
(void) dup(0);
(void) dup(0);
(void) close(0);
(void) setpgrp();
/*
* Ignore all signals apart from SIGTERM.
*/
for (i = 1; i < _sys_nsig; i++)
/*
* Increase the number of fd's that can be open.
*/
gettext("nskernd: could not increase RLIMIT_NOFILE: %s\n"),
gettext("nskernd: the maximum number of nsctl open "
"devices may be reduced\n"));
}
/*
*/
if (nsctl_fd < 0) {
rdev);
exit(1);
}
run = 1;
startup = 1;
while (run) {
if (rc < 0) {
/* try and do kernel cleanup and exit */
if (shutdown()) {
run = 0;
} else {
sigterm = 0;
}
gettext("nskernd: NSCIOC_NSKERND failed: %s\n"),
continue;
} else if (sigterm) {
/* SIGTERM received - terminate */
/* need to do kernel cleanup */
if (shutdown()) {
run = 0;
} else {
sigterm = 0;
}
} else {
/* just quit */
if (canshutdown()) {
run = 0;
} else {
/* cannot shutdown - threads active */
sigterm = 0;
}
}
continue;
}
if (startup) {
char c = 0;
startup = 0;
}
case NSKERND_START: /* (re)start completion */
if (rc == 1) {
gettext("nskernd: already started\n"));
run = 0;
} else if (rc == 2) {
gettext("nskernd: stopped by kernel\n"));
run = 0;
}
break;
case NSKERND_STOP: /* kernel telling daemon to stop */
(void) shutdown();
run = 0;
}
break;
case NSKERND_BSIZE:
/*
* kernel requesting partsize
* data1 - size return
* data2 - raw_fd (entry)
* - partition number (return)
*/
partition = -1;
break;
case NSKERND_NEWLWP: /* kernel requesting a new LWP */
break;
case NSKERND_LOCK: /* kernel requesting lock */
break;
case NSKERND_WAIT: /* kernel retrying wait */
/*
* the kernel thread can be woken by the dr config
* utilities (ie cfgadm) therefore we just reissue
* the wait.
*/
break;
case NSKERND_IIBITMAP:
break;
default:
gettext("nskernd: unknown command %d"),
break;
}
}
return (rc);
}