lx_prvnops.c revision fb2f18f820d90b001aea4fb27dd654bc1263c440
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* lxpr_vnops.c: Vnode operations for the lx /proc file system
*
* Assumptions and Gotchas:
*
* In order to preserve Solaris' security policy. This file system's
* functionality does not override Solaris' security policies even if
* that means breaking Linux compatability.
*
* Linux has no concept of lwps so we only implement procs here as in the
* old /proc interface.
*/
#include <sys/cpupart.h>
#include <sys/cpuvar.h>
#include <sys/session.h>
#include <sys/vmparam.h>
#include <sys/mman.h>
#include <vm/rm.h>
#include <vm/seg_vn.h>
#include <sys/sdt.h>
#include <lx_signum.h>
#include <sys/strlog.h>
#include <sys/stropts.h>
#include <sys/cmn_err.h>
#include <sys/lx_brand.h>
#include <sys/x86_archext.h>
#include <sys/archsystm.h>
#include <sys/fp.h>
#include <sys/pool_pset.h>
#include <sys/pset.h>
#include <sys/zone.h>
#include <sys/pghw.h>
/* Dependent on the Solaris procfs */
extern kthread_t *prchoose(proc_t *);
#include "lx_proc.h"
extern pgcnt_t swapfs_minfree;
extern volatile clock_t lbolt;
extern time_t boot_time;
/*
* Pointer to the vnode ops vector for this fs.
* This is instantiated in lxprinit() in lxpr_vfsops.c
*/
vnodeops_t *lxpr_vnodeops;
static int lxpr_open(vnode_t **, int, cred_t *);
static int lxpr_close(vnode_t *, int, int, offset_t, cred_t *);
static int lxpr_read(vnode_t *, uio_t *, int, cred_t *, caller_context_t *);
static int lxpr_getattr(vnode_t *, vattr_t *, int, cred_t *);
static int lxpr_access(vnode_t *, int, int, cred_t *);
static int lxpr_lookup(vnode_t *, char *, vnode_t **,
pathname_t *, int, vnode_t *, cred_t *);
static int lxpr_readdir(vnode_t *, uio_t *, cred_t *, int *);
static int lxpr_readlink(vnode_t *, uio_t *);
static int lxpr_cmp(vnode_t *, vnode_t *);
static int lxpr_realvp(vnode_t *, vnode_t **);
static int lxpr_sync(void);
static void lxpr_inactive(vnode_t *, cred_t *);
static vnode_t *lxpr_lookup_procdir(vnode_t *, char *);
static vnode_t *lxpr_lookup_piddir(vnode_t *, char *);
static vnode_t *lxpr_lookup_not_a_dir(vnode_t *, char *);
static vnode_t *lxpr_lookup_fddir(vnode_t *, char *);
static vnode_t *lxpr_lookup_netdir(vnode_t *, char *);
static int lxpr_readdir_procdir(lxpr_node_t *, uio_t *, int *);
static int lxpr_readdir_piddir(lxpr_node_t *, uio_t *, int *);
static int lxpr_readdir_not_a_dir(lxpr_node_t *, uio_t *, int *);
static int lxpr_readdir_fddir(lxpr_node_t *, uio_t *, int *);
static int lxpr_readdir_netdir(lxpr_node_t *, uio_t *, int *);
static void lxpr_read_invalid(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_empty(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_cpuinfo(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_isdir(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_fd(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_kmsg(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_loadavg(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_meminfo(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_mounts(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_partitions(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_stat(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_uptime(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_version(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_pid_cmdline(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_pid_maps(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_pid_stat(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_pid_statm(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_pid_status(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_net_arp(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_net_dev(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_net_dev_mcast(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_net_igmp(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_net_ip_mr_cache(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_net_ip_mr_vif(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_net_mcfilter(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_net_netstat(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_net_raw(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_net_route(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_net_rpc(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_net_rt_cache(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_net_sockstat(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_net_snmp(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_net_stat(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_net_tcp(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_net_udp(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_net_unix(lxpr_node_t *, lxpr_uiobuf_t *);
/*
* Simple conversion
*/
#define btok(x) ((x) >> 10) /* bytes to kbytes */
#define ptok(x) ((x) << (PAGESHIFT - 10)) /* pages to kbytes */
/*
* The lx /proc vnode operations vector
*/
const fs_operation_def_t lxpr_vnodeops_template[] = {
VOPNAME_OPEN, lxpr_open,
VOPNAME_CLOSE, lxpr_close,
VOPNAME_READ, lxpr_read,
VOPNAME_GETATTR, lxpr_getattr,
VOPNAME_ACCESS, lxpr_access,
VOPNAME_LOOKUP, lxpr_lookup,
VOPNAME_READDIR, lxpr_readdir,
VOPNAME_READLINK, lxpr_readlink,
VOPNAME_FSYNC, lxpr_sync,
VOPNAME_SEEK, lxpr_sync,
VOPNAME_INACTIVE, (fs_generic_func_p) lxpr_inactive,
VOPNAME_CMP, lxpr_cmp,
VOPNAME_REALVP, lxpr_realvp,
NULL, NULL
};
/*
* file contents of an lx /proc directory.
*/
static lxpr_dirent_t lx_procdir[] = {
{ LXPR_CMDLINE, "cmdline" },
{ LXPR_CPUINFO, "cpuinfo" },
{ LXPR_DEVICES, "devices" },
{ LXPR_DMA, "dma" },
{ LXPR_FILESYSTEMS, "filesystems" },
{ LXPR_INTERRUPTS, "interrupts" },
{ LXPR_IOPORTS, "ioports" },
{ LXPR_KCORE, "kcore" },
{ LXPR_KMSG, "kmsg" },
{ LXPR_LOADAVG, "loadavg" },
{ LXPR_MEMINFO, "meminfo" },
{ LXPR_MOUNTS, "mounts" },
{ LXPR_NETDIR, "net" },
{ LXPR_PARTITIONS, "partitions" },
{ LXPR_SELF, "self" },
{ LXPR_STAT, "stat" },
{ LXPR_UPTIME, "uptime" },
{ LXPR_VERSION, "version" }
};
#define PROCDIRFILES (sizeof (lx_procdir) / sizeof (lx_procdir[0]))
/*
* Contents of an lx /proc/<pid> directory.
*/
static lxpr_dirent_t piddir[] = {
{ LXPR_PID_CMDLINE, "cmdline" },
{ LXPR_PID_CPU, "cpu" },
{ LXPR_PID_CURDIR, "cwd" },
{ LXPR_PID_ENV, "environ" },
{ LXPR_PID_EXE, "exe" },
{ LXPR_PID_MAPS, "maps" },
{ LXPR_PID_MEM, "mem" },
{ LXPR_PID_ROOTDIR, "root" },
{ LXPR_PID_STAT, "stat" },
{ LXPR_PID_STATM, "statm" },
{ LXPR_PID_STATUS, "status" },
{ LXPR_PID_FDDIR, "fd" }
};
#define PIDDIRFILES (sizeof (piddir) / sizeof (piddir[0]))
/*
* contents of lx /proc/net directory
*/
static lxpr_dirent_t netdir[] = {
{ LXPR_NET_ARP, "arp" },
{ LXPR_NET_DEV, "dev" },
{ LXPR_NET_DEV_MCAST, "dev_mcast" },
{ LXPR_NET_IGMP, "igmp" },
{ LXPR_NET_IP_MR_CACHE, "ip_mr_cache" },
{ LXPR_NET_IP_MR_VIF, "ip_mr_vif" },
{ LXPR_NET_MCFILTER, "mcfilter" },
{ LXPR_NET_NETSTAT, "netstat" },
{ LXPR_NET_RAW, "raw" },
{ LXPR_NET_ROUTE, "route" },
{ LXPR_NET_RPC, "rpc" },
{ LXPR_NET_RT_CACHE, "rt_cache" },
{ LXPR_NET_SOCKSTAT, "sockstat" },
{ LXPR_NET_SNMP, "snmp" },
{ LXPR_NET_STAT, "stat" },
{ LXPR_NET_TCP, "tcp" },
{ LXPR_NET_UDP, "udp" },
{ LXPR_NET_UNIX, "unix" }
};
#define NETDIRFILES (sizeof (netdir) / sizeof (netdir[0]))
/*
* lxpr_open(): Vnode operation for VOP_OPEN()
*/
static int
lxpr_open(vnode_t **vpp, int flag, cred_t *cr)
{
vnode_t *vp = *vpp;
lxpr_node_t *lxpnp = VTOLXP(vp);
lxpr_nodetype_t type = lxpnp->lxpr_type;
vnode_t *rvp;
int error = 0;
/*
* We only allow reading in this file systrem
*/
if (flag & FWRITE)
return (EROFS);
/*
* If we are opening an underlying file only allow regular files
* reject the open for anything but a regular file.
* Just do it if we are opening the current or root directory.
*/
if (lxpnp->lxpr_realvp != NULL) {
rvp = lxpnp->lxpr_realvp;
if (type == LXPR_PID_FD_FD && rvp->v_type != VREG)
error = EACCES;
else {
/*
* Need to hold rvp since VOP_OPEN() may release it.
*/
VN_HOLD(rvp);
error = VOP_OPEN(&rvp, flag, cr);
if (error) {
VN_RELE(rvp);
} else {
*vpp = rvp;
VN_RELE(vp);
}
}
}
if (type == LXPR_KMSG) {
ldi_ident_t li = VTOLXPM(vp)->lxprm_li;
struct strioctl str;
int rv;
/*
* Open the zone's console device using the layered driver
* interface.
*/
if ((error = ldi_open_by_name("/dev/log", FREAD, cr,
&lxpnp->lxpr_cons_ldih, li)) != 0)
return (error);
/*
* Send an ioctl to the underlying console device, letting it
* know we're interested in getting console messages.
*/
str.ic_cmd = I_CONSLOG;
str.ic_timout = 0;
str.ic_len = 0;
str.ic_dp = NULL;
if ((error = ldi_ioctl(lxpnp->lxpr_cons_ldih, I_STR,
(intptr_t)&str, FKIOCTL, cr, &rv)) != 0)
return (error);
}
return (error);
}
/*
* lxpr_close(): Vnode operation for VOP_CLOSE()
*/
/* ARGSUSED */
static int
lxpr_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr)
{
lxpr_node_t *lxpr = VTOLXP(vp);
lxpr_nodetype_t type = lxpr->lxpr_type;
int err;
/*
* we should never get here because the close is done on the realvp
* for these nodes
*/
ASSERT(type != LXPR_PID_FD_FD &&
type != LXPR_PID_CURDIR &&
type != LXPR_PID_ROOTDIR &&
type != LXPR_PID_EXE);
if (type == LXPR_KMSG) {
if ((err = ldi_close(lxpr->lxpr_cons_ldih, 0, cr)) != 0)
return (err);
}
return (0);
}
static void (*lxpr_read_function[LXPR_NFILES])() = {
lxpr_read_isdir, /* /proc */
lxpr_read_isdir, /* /proc/<pid> */
lxpr_read_pid_cmdline, /* /proc/<pid>/cmdline */
lxpr_read_empty, /* /proc/<pid>/cpu */
lxpr_read_invalid, /* /proc/<pid>/cwd */
lxpr_read_empty, /* /proc/<pid>/environ */
lxpr_read_invalid, /* /proc/<pid>/exe */
lxpr_read_pid_maps, /* /proc/<pid>/maps */
lxpr_read_empty, /* /proc/<pid>/mem */
lxpr_read_invalid, /* /proc/<pid>/root */
lxpr_read_pid_stat, /* /proc/<pid>/stat */
lxpr_read_pid_statm, /* /proc/<pid>/statm */
lxpr_read_pid_status, /* /proc/<pid>/status */
lxpr_read_isdir, /* /proc/<pid>/fd */
lxpr_read_fd, /* /proc/<pid>/fd/nn */
lxpr_read_empty, /* /proc/cmdline */
lxpr_read_cpuinfo, /* /proc/cpuinfo */
lxpr_read_empty, /* /proc/devices */
lxpr_read_empty, /* /proc/dma */
lxpr_read_empty, /* /proc/filesystems */
lxpr_read_empty, /* /proc/interrupts */
lxpr_read_empty, /* /proc/ioports */
lxpr_read_empty, /* /proc/kcore */
lxpr_read_kmsg, /* /proc/kmsg */
lxpr_read_loadavg, /* /proc/loadavg */
lxpr_read_meminfo, /* /proc/meminfo */
lxpr_read_mounts, /* /proc/mounts */
lxpr_read_isdir, /* /proc/net */
lxpr_read_net_arp, /* /proc/net/arp */
lxpr_read_net_dev, /* /proc/net/dev */
lxpr_read_net_dev_mcast, /* /proc/net/dev_mcast */
lxpr_read_net_igmp, /* /proc/net/igmp */
lxpr_read_net_ip_mr_cache, /* /proc/net/ip_mr_cache */
lxpr_read_net_ip_mr_vif, /* /proc/net/ip_mr_vif */
lxpr_read_net_mcfilter, /* /proc/net/mcfilter */
lxpr_read_net_netstat, /* /proc/net/netstat */
lxpr_read_net_raw, /* /proc/net/raw */
lxpr_read_net_route, /* /proc/net/route */
lxpr_read_net_rpc, /* /proc/net/rpc */
lxpr_read_net_rt_cache, /* /proc/net/rt_cache */
lxpr_read_net_sockstat, /* /proc/net/sockstat */
lxpr_read_net_snmp, /* /proc/net/snmp */
lxpr_read_net_stat, /* /proc/net/stat */
lxpr_read_net_tcp, /* /proc/net/tcp */
lxpr_read_net_udp, /* /proc/net/udp */
lxpr_read_net_unix, /* /proc/net/unix */
lxpr_read_partitions, /* /proc/partitions */
lxpr_read_invalid, /* /proc/self */
lxpr_read_stat, /* /proc/stat */
lxpr_read_uptime, /* /proc/uptime */
lxpr_read_version, /* /proc/version */
};
/*
* Array of lookup functions, indexed by lx /proc file type.
*/
static vnode_t *(*lxpr_lookup_function[LXPR_NFILES])() = {
lxpr_lookup_procdir, /* /proc */
lxpr_lookup_piddir, /* /proc/<pid> */
lxpr_lookup_not_a_dir, /* /proc/<pid>/cmdline */
lxpr_lookup_not_a_dir, /* /proc/<pid>/cpu */
lxpr_lookup_not_a_dir, /* /proc/<pid>/cwd */
lxpr_lookup_not_a_dir, /* /proc/<pid>/environ */
lxpr_lookup_not_a_dir, /* /proc/<pid>/exe */
lxpr_lookup_not_a_dir, /* /proc/<pid>/maps */
lxpr_lookup_not_a_dir, /* /proc/<pid>/mem */
lxpr_lookup_not_a_dir, /* /proc/<pid>/root */
lxpr_lookup_not_a_dir, /* /proc/<pid>/stat */
lxpr_lookup_not_a_dir, /* /proc/<pid>/statm */
lxpr_lookup_not_a_dir, /* /proc/<pid>/status */
lxpr_lookup_fddir, /* /proc/<pid>/fd */
lxpr_lookup_not_a_dir, /* /proc/<pid>/fd/nn */
lxpr_lookup_not_a_dir, /* /proc/cmdline */
lxpr_lookup_not_a_dir, /* /proc/cpuinfo */
lxpr_lookup_not_a_dir, /* /proc/devices */
lxpr_lookup_not_a_dir, /* /proc/dma */
lxpr_lookup_not_a_dir, /* /proc/filesystems */
lxpr_lookup_not_a_dir, /* /proc/interrupts */
lxpr_lookup_not_a_dir, /* /proc/ioports */
lxpr_lookup_not_a_dir, /* /proc/kcore */
lxpr_lookup_not_a_dir, /* /proc/kmsg */
lxpr_lookup_not_a_dir, /* /proc/loadavg */
lxpr_lookup_not_a_dir, /* /proc/meminfo */
lxpr_lookup_not_a_dir, /* /proc/mounts */
lxpr_lookup_netdir, /* /proc/net */
lxpr_lookup_not_a_dir, /* /proc/net/arp */
lxpr_lookup_not_a_dir, /* /proc/net/dev */
lxpr_lookup_not_a_dir, /* /proc/net/dev_mcast */
lxpr_lookup_not_a_dir, /* /proc/net/igmp */
lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_cache */
lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_vif */
lxpr_lookup_not_a_dir, /* /proc/net/mcfilter */
lxpr_lookup_not_a_dir, /* /proc/net/netstat */
lxpr_lookup_not_a_dir, /* /proc/net/raw */
lxpr_lookup_not_a_dir, /* /proc/net/route */
lxpr_lookup_not_a_dir, /* /proc/net/rpc */
lxpr_lookup_not_a_dir, /* /proc/net/rt_cache */
lxpr_lookup_not_a_dir, /* /proc/net/sockstat */
lxpr_lookup_not_a_dir, /* /proc/net/snmp */
lxpr_lookup_not_a_dir, /* /proc/net/stat */
lxpr_lookup_not_a_dir, /* /proc/net/tcp */
lxpr_lookup_not_a_dir, /* /proc/net/udp */
lxpr_lookup_not_a_dir, /* /proc/net/unix */
lxpr_lookup_not_a_dir, /* /proc/partitions */
lxpr_lookup_not_a_dir, /* /proc/self */
lxpr_lookup_not_a_dir, /* /proc/stat */
lxpr_lookup_not_a_dir, /* /proc/uptime */
lxpr_lookup_not_a_dir, /* /proc/version */
};
/*
* Array of readdir functions, indexed by /proc file type.
*/
static int (*lxpr_readdir_function[LXPR_NFILES])() = {
lxpr_readdir_procdir, /* /proc */
lxpr_readdir_piddir, /* /proc/<pid> */
lxpr_readdir_not_a_dir, /* /proc/<pid>/cmdline */
lxpr_readdir_not_a_dir, /* /proc/<pid>/cpu */
lxpr_readdir_not_a_dir, /* /proc/<pid>/cwd */
lxpr_readdir_not_a_dir, /* /proc/<pid>/environ */
lxpr_readdir_not_a_dir, /* /proc/<pid>/exe */
lxpr_readdir_not_a_dir, /* /proc/<pid>/maps */
lxpr_readdir_not_a_dir, /* /proc/<pid>/mem */
lxpr_readdir_not_a_dir, /* /proc/<pid>/root */
lxpr_readdir_not_a_dir, /* /proc/<pid>/stat */
lxpr_readdir_not_a_dir, /* /proc/<pid>/statm */
lxpr_readdir_not_a_dir, /* /proc/<pid>/status */
lxpr_readdir_fddir, /* /proc/<pid>/fd */
lxpr_readdir_not_a_dir, /* /proc/<pid>/fd/nn */
lxpr_readdir_not_a_dir, /* /proc/cmdline */
lxpr_readdir_not_a_dir, /* /proc/cpuinfo */
lxpr_readdir_not_a_dir, /* /proc/devices */
lxpr_readdir_not_a_dir, /* /proc/dma */
lxpr_readdir_not_a_dir, /* /proc/filesystems */
lxpr_readdir_not_a_dir, /* /proc/interrupts */
lxpr_readdir_not_a_dir, /* /proc/ioports */
lxpr_readdir_not_a_dir, /* /proc/kcore */
lxpr_readdir_not_a_dir, /* /proc/kmsg */
lxpr_readdir_not_a_dir, /* /proc/loadavg */
lxpr_readdir_not_a_dir, /* /proc/meminfo */
lxpr_readdir_not_a_dir, /* /proc/mounts */
lxpr_readdir_netdir, /* /proc/net */
lxpr_readdir_not_a_dir, /* /proc/net/arp */
lxpr_readdir_not_a_dir, /* /proc/net/dev */
lxpr_readdir_not_a_dir, /* /proc/net/dev_mcast */
lxpr_readdir_not_a_dir, /* /proc/net/igmp */
lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_cache */
lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_vif */
lxpr_readdir_not_a_dir, /* /proc/net/mcfilter */
lxpr_readdir_not_a_dir, /* /proc/net/netstat */
lxpr_readdir_not_a_dir, /* /proc/net/raw */
lxpr_readdir_not_a_dir, /* /proc/net/route */
lxpr_readdir_not_a_dir, /* /proc/net/rpc */
lxpr_readdir_not_a_dir, /* /proc/net/rt_cache */
lxpr_readdir_not_a_dir, /* /proc/net/sockstat */
lxpr_readdir_not_a_dir, /* /proc/net/snmp */
lxpr_readdir_not_a_dir, /* /proc/net/stat */
lxpr_readdir_not_a_dir, /* /proc/net/tcp */
lxpr_readdir_not_a_dir, /* /proc/net/udp */
lxpr_readdir_not_a_dir, /* /proc/net/unix */
lxpr_readdir_not_a_dir, /* /proc/partitions */
lxpr_readdir_not_a_dir, /* /proc/self */
lxpr_readdir_not_a_dir, /* /proc/stat */
lxpr_readdir_not_a_dir, /* /proc/uptime */
lxpr_readdir_not_a_dir, /* /proc/version */
};
/*
* lxpr_read(): Vnode operation for VOP_READ()
*
* As the format of all the files that can be read in the lx procfs is human
* readable and not binary structures there do not have to be different
* read variants depending on whether the reading process model is 32 or 64 bits
* (at least in general, and certainly the difference is unlikely to be enough
* to justify have different routines for 32 and 64 bit reads
*/
/* ARGSUSED */
static int
lxpr_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
caller_context_t *ct)
{
lxpr_node_t *lxpnp = VTOLXP(vp);
lxpr_nodetype_t type = lxpnp->lxpr_type;
lxpr_uiobuf_t *uiobuf = lxpr_uiobuf_new(uiop);
int error;
ASSERT(type >= 0 && type < LXPR_NFILES);
lxpr_read_function[type](lxpnp, uiobuf);
error = lxpr_uiobuf_flush(uiobuf);
lxpr_uiobuf_free(uiobuf);
return (error);
}
/*
* lxpr_read_invalid(), lxpr_read_isdir(), lxpr_read_empty()
*
* Various special case reads:
* - trying to read a directory
* - invalid file (used to mean a file that should be implemented,
* but isn't yet)
* - empty file
* - wait to be able to read a file that will never have anything to read
*/
/* ARGSUSED */
static void
lxpr_read_isdir(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
lxpr_uiobuf_seterr(uiobuf, EISDIR);
}
/* ARGSUSED */
static void
lxpr_read_invalid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
lxpr_uiobuf_seterr(uiobuf, EINVAL);
}
/* ARGSUSED */
static void
lxpr_read_empty(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
}
/*
* lxpr_read_pid_cmdline():
*
* This is not precisely compatible with linux:
*
* The linux cmdline returns argv with the correct separation
* using \0 between the arguments, we cannot do that without
* copying the real argv from the correct process context.
* This is too difficult to attempt so we pretend that the
* entire cmdline is just argv[0]. This is good enough for
* ps to display correctly, but might cause some other things
* not to work correctly.
*/
static void
lxpr_read_pid_cmdline(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
proc_t *p;
ASSERT(lxpnp->lxpr_type == LXPR_PID_CMDLINE);
p = lxpr_lock(lxpnp->lxpr_pid);
if (p == NULL) {
lxpr_uiobuf_seterr(uiobuf, EINVAL);
return;
}
if (PTOU(p)->u_argv != 0) {
char *buff = PTOU(p)->u_psargs;
int len = strlen(buff);
lxpr_unlock(p);
lxpr_uiobuf_write(uiobuf, buff, len+1);
} else {
lxpr_unlock(p);
}
}
/*
* lxpr_read_pid_maps(): memory map file
*/
static void
lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
proc_t *p;
struct as *as;
struct seg *seg;
char *buf;
int buflen = MAXPATHLEN;
struct print_data {
caddr_t saddr;
caddr_t eaddr;
int type;
char prot[5];
uint32_t offset;
vnode_t *vp;
struct print_data *next;
} *print_head = NULL;
struct print_data **print_tail = &print_head;
struct print_data *pbuf;
ASSERT(lxpnp->lxpr_type == LXPR_PID_MAPS);
p = lxpr_lock(lxpnp->lxpr_pid);
if (p == NULL) {
lxpr_uiobuf_seterr(uiobuf, EINVAL);
return;
}
as = p->p_as;
if (as == &kas) {
lxpr_unlock(p);
return;
}
mutex_exit(&p->p_lock);
/* Iterate over all segments in the address space */
AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
vnode_t *vp;
uint_t protbits;
pbuf = kmem_alloc(sizeof (*pbuf), KM_SLEEP);
pbuf->saddr = seg->s_base;
pbuf->eaddr = seg->s_base+seg->s_size;
pbuf->type = SEGOP_GETTYPE(seg, seg->s_base);
/*
* Cheat and only use the protection bits of the first page
* in the segment
*/
(void) strncpy(pbuf->prot, "----", sizeof (pbuf->prot));
(void) SEGOP_GETPROT(seg, seg->s_base, 0, &protbits);
if (protbits & PROT_READ) pbuf->prot[0] = 'r';
if (protbits & PROT_WRITE) pbuf->prot[1] = 'w';
if (protbits & PROT_EXEC) pbuf->prot[2] = 'x';
if (pbuf->type & MAP_SHARED) pbuf->prot[3] = 's';
else if (pbuf->type & MAP_PRIVATE) pbuf->prot[3] = 'p';
if (seg->s_ops == &segvn_ops &&
SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
vp != NULL && vp->v_type == VREG) {
VN_HOLD(vp);
pbuf->vp = vp;
} else {
pbuf->vp = NULL;
}
pbuf->offset = (uint32_t)SEGOP_GETOFFSET(seg, pbuf->saddr);
pbuf->next = NULL;
*print_tail = pbuf;
print_tail = &pbuf->next;
}
AS_LOCK_EXIT(as, &as->a_lock);
mutex_enter(&p->p_lock);
lxpr_unlock(p);
buf = kmem_alloc(buflen, KM_SLEEP);
/* print the data we've extracted */
pbuf = print_head;
while (pbuf != NULL) {
struct print_data *pbuf_next;
vattr_t vattr;
int maj = 0;
int min = 0;
int inode = 0;
*buf = '\0';
if (pbuf->vp != NULL) {
vattr.va_mask = AT_FSID | AT_NODEID;
if (VOP_GETATTR(pbuf->vp, &vattr, 0, CRED()) == 0) {
maj = getmajor(vattr.va_fsid);
min = getminor(vattr.va_fsid);
inode = vattr.va_nodeid;
}
(void) vnodetopath(NULL, pbuf->vp, buf, buflen, CRED());
VN_RELE(pbuf->vp);
}
if (*buf != '\0') {
lxpr_uiobuf_printf(uiobuf,
"%08x-%08x %s %08x %02d:%03d %d %s\n",
pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset,
maj, min, inode, buf);
} else {
lxpr_uiobuf_printf(uiobuf,
"%08x-%08x %s %08x %02d:%03d %d\n",
pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset,
maj, min, inode);
}
pbuf_next = pbuf->next;
kmem_free(pbuf, sizeof (*pbuf));
pbuf = pbuf_next;
}
kmem_free(buf, buflen);
}
/*
* lxpr_read_pid_statm(): memory status file
*/
static void
lxpr_read_pid_statm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
proc_t *p;
struct as *as;
size_t vsize;
size_t rss;
ASSERT(lxpnp->lxpr_type == LXPR_PID_STATM);
p = lxpr_lock(lxpnp->lxpr_pid);
if (p == NULL) {
lxpr_uiobuf_seterr(uiobuf, EINVAL);
return;
}
as = p->p_as;
mutex_exit(&p->p_lock);
AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
vsize = btopr(rm_assize(as));
rss = rm_asrss(as);
AS_LOCK_EXIT(as, &as->a_lock);
mutex_enter(&p->p_lock);
lxpr_unlock(p);
lxpr_uiobuf_printf(uiobuf,
"%lu %lu %lu %lu %lu %lu %lu\n",
vsize, rss, 0l, rss, 0l, 0l, 0l);
}
/*
* lxpr_read_pid_status(): status file
*/
static void
lxpr_read_pid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
proc_t *p;
kthread_t *t;
user_t *up;
cred_t *cr;
const gid_t *groups;
int ngroups;
struct as *as;
char *status;
pid_t pid, ppid;
size_t vsize;
size_t rss;
k_sigset_t current, ignore, handle;
int i, lx_sig;
ASSERT(lxpnp->lxpr_type == LXPR_PID_STATUS);
p = lxpr_lock(lxpnp->lxpr_pid);
if (p == NULL) {
lxpr_uiobuf_seterr(uiobuf, EINVAL);
return;
}
pid = p->p_pid;
/*
* Convert pid to the Linux default of 1 if we're the zone's init
* process
*/
if (pid == curproc->p_zone->zone_proc_initpid) {
pid = 1;
ppid = 0; /* parent pid for init is 0 */
} else {
/*
* Make sure not to reference parent PIDs that reside outside
* the zone
*/
ppid = ((p->p_flag & SZONETOP)
? curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
/*
* Convert ppid to the Linux default of 1 if our parent is the
* zone's init process
*/
if (ppid == curproc->p_zone->zone_proc_initpid)
ppid = 1;
}
t = prchoose(p);
if (t != NULL) {
switch (t->t_state) {
case TS_SLEEP:
status = "S (sleeping)";
break;
case TS_RUN:
case TS_ONPROC:
status = "R (running)";
break;
case TS_ZOMB:
status = "Z (zombie)";
break;
case TS_STOPPED:
status = "T (stopped)";
break;
default:
status = "! (unknown)";
break;
}
thread_unlock(t);
} else {
/*
* there is a hole in the exit code, where a proc can have
* no threads but it is yet to be flagged SZOMB. We will
* assume we are about to become a zombie
*/
status = "Z (zombie)";
}
up = PTOU(p);
mutex_enter(&p->p_crlock);
crhold(cr = p->p_cred);
mutex_exit(&p->p_crlock);
lxpr_uiobuf_printf(uiobuf,
"Name:\t%s\n"
"State:\t%s\n"
"Tgid:\t%d\n"
"Pid:\t%d\n"
"PPid:\t%d\n"
"TracerPid:\t%d\n"
"Uid:\t%d\t%d\t%d\t%d\n"
"Gid:\t%d\t%d\t%d\t%d\n"
"FDSize:\t%d\n"
"Groups:\t",
up->u_comm,
status,
pid, /* thread group id - same as pid until we map lwps to procs */
pid,
ppid,
0,
crgetruid(cr), crgetuid(cr), crgetsuid(cr), crgetuid(cr),
crgetrgid(cr), crgetgid(cr), crgetsgid(cr), crgetgid(cr),
p->p_fno_ctl);
ngroups = crgetngroups(cr);
groups = crgetgroups(cr);
for (i = 0; i < ngroups; i++) {
lxpr_uiobuf_printf(uiobuf,
"%d ",
groups[i]);
}
crfree(cr);
as = p->p_as;
if ((p->p_stat != SZOMB) && !(p->p_flag & SSYS) && (as != &kas)) {
mutex_exit(&p->p_lock);
AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
vsize = rm_assize(as);
rss = rm_asrss(as);
AS_LOCK_EXIT(as, &as->a_lock);
mutex_enter(&p->p_lock);
lxpr_uiobuf_printf(uiobuf,
"\n"
"VmSize:\t%8lu kB\n"
"VmLck:\t%8lu kB\n"
"VmRSS:\t%8lu kB\n"
"VmData:\t%8lu kB\n"
"VmStk:\t%8lu kB\n"
"VmExe:\t%8lu kB\n"
"VmLib:\t%8lu kB",
btok(vsize),
0l,
ptok(rss),
0l,
btok(p->p_stksize),
ptok(rss),
0l);
}
sigemptyset(&current);
sigemptyset(&ignore);
sigemptyset(&handle);
for (i = 1; i < MAXSIG; i++) {
lx_sig = stol_signo[i];
if ((lx_sig > 0) && (lx_sig < MAXSIG)) {
if (sigismember(&p->p_sig, i))
sigaddset(&current, lx_sig);
if (up->u_signal[i] == SIG_IGN)
sigaddset(&ignore, lx_sig);
else if (up->u_signal[i] != SIG_DFL)
sigaddset(&handle, lx_sig);
}
}
lxpr_uiobuf_printf(uiobuf,
"\n"
"SigPnd:\t%08x%08x\n"
"SigBlk:\t%08x%08x\n"
"SigIgn:\t%08x%08x\n"
"SigCgt:\t%08x%08x\n"
"CapInh:\t%016x\n"
"CapPrm:\t%016x\n"
"CapEff:\t%016x\n",
current.__sigbits[1], current.__sigbits[0],
0, 0, /* signals blocked on per thread basis */
ignore.__sigbits[1], ignore.__sigbits[0],
handle.__sigbits[1], handle.__sigbits[0],
/* Can't do anything with linux capabilities */
0,
0,
0);
lxpr_unlock(p);
}
/*
* lxpr_read_pid_stat(): pid stat file
*/
static void
lxpr_read_pid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
proc_t *p;
kthread_t *t;
struct as *as;
char stat;
pid_t pid, ppid, pgpid, spid;
gid_t psgid;
dev_t psdev;
size_t rss, vsize;
int nice, pri;
caddr_t wchan;
processorid_t cpu;
ASSERT(lxpnp->lxpr_type == LXPR_PID_STAT);
p = lxpr_lock(lxpnp->lxpr_pid);
if (p == NULL) {
lxpr_uiobuf_seterr(uiobuf, EINVAL);
return;
}
pid = p->p_pid;
/*
* Set Linux defaults if we're the zone's init process
*/
if (pid == curproc->p_zone->zone_proc_initpid) {
pid = 1; /* PID for init */
ppid = 0; /* parent PID for init is 0 */
pgpid = 0; /* process group for init is 0 */
psgid = -1; /* credential GID for init is -1 */
spid = 0; /* session id for init is 0 */
psdev = 0; /* session device for init is 0 */
} else {
/*
* Make sure not to reference parent PIDs that reside outside
* the zone
*/
ppid = ((p->p_flag & SZONETOP)
? curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
/*
* Convert ppid to the Linux default of 1 if our parent is the
* zone's init process
*/
if (ppid == curproc->p_zone->zone_proc_initpid)
ppid = 1;
pgpid = p->p_pgrp;
mutex_enter(&p->p_splock);
mutex_enter(&p->p_sessp->s_lock);
spid = p->p_sessp->s_sid;
/* XXBRAND psdev = DEV_TO_LXDEV(p->p_sessp->s_dev, VCHR); */
psdev = p->p_sessp->s_dev;
if (p->p_sessp->s_cred)
psgid = crgetgid(p->p_sessp->s_cred);
else
psgid = crgetgid(p->p_cred);
mutex_exit(&p->p_sessp->s_lock);
mutex_exit(&p->p_splock);
}
t = prchoose(p);
if (t != NULL) {
switch (t->t_state) {
case TS_SLEEP:
stat = 'S'; break;
case TS_RUN:
case TS_ONPROC:
stat = 'R'; break;
case TS_ZOMB:
stat = 'Z'; break;
case TS_STOPPED:
stat = 'T'; break;
default:
stat = '!'; break;
}
if (CL_DONICE(t, NULL, 0, &nice) != 0)
nice = 0;
pri = v.v_maxsyspri - t->t_pri;
wchan = t->t_wchan;
cpu = t->t_cpu->cpu_seqid;
thread_unlock(t);
} else {
/* Only zombies have no threads */
stat = 'Z';
nice = 0;
pri = 0;
wchan = 0;
cpu = 0;
}
as = p->p_as;
mutex_exit(&p->p_lock);
AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
vsize = rm_assize(as);
rss = rm_asrss(as);
AS_LOCK_EXIT(as, &as->a_lock);
mutex_enter(&p->p_lock);
lxpr_uiobuf_printf(uiobuf,
"%d (%s) %c %d %d %d %d %d "
"%lu %lu %lu %lu %lu "
"%lu %lu %ld %ld "
"%d %d "
"0 "
"%ld %lu "
"%lu %ld %llu "
"%lu %lu %u "
"%lu %lu "
"%lu %lu %lu %lu "
"%lu "
"%lu %lu "
"%d "
"%d"
"\n",
pid,
PTOU(p)->u_comm,
stat,
ppid, pgpid,
spid, psdev, psgid,
0l, 0l, 0l, 0l, 0l, /* flags, minflt, cminflt, majflt, cmajflt */
p->p_utime, p->p_stime, p->p_cutime, p->p_cstime,
pri, nice,
0l, PTOU(p)->u_ticks, /* ticks till next SIGALARM, start time */
vsize, rss, p->p_vmem_ctl,
0l, 0l, USRSTACK, /* startcode, endcode, startstack */
0l, 0l, /* kstkesp, kstkeip */
0l, 0l, 0l, 0l, /* signal, blocked, sigignore, sigcatch */
wchan,
0l, 0l, /* nswap, cnswap */
0, /* exit_signal */
cpu);
lxpr_unlock(p);
}
/* ARGSUSED */
static void
lxpr_read_net_arp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
}
/* ARGSUSED */
static void
lxpr_read_net_dev(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
lxpr_uiobuf_printf(uiobuf, "Inter-| Receive "
" | Transmit\n");
lxpr_uiobuf_printf(uiobuf, " face |bytes packets errs drop fifo"
" frame compressed multicast|bytes packets errs drop fifo"
" colls carrier compressed\n");
/*
* XXX: data about each interface should go here, but we'll wait to
* see if anybody wants to use it.
*/
}
/* ARGSUSED */
static void
lxpr_read_net_dev_mcast(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
}
/* ARGSUSED */
static void
lxpr_read_net_igmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
}
/* ARGSUSED */
static void
lxpr_read_net_ip_mr_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
}
/* ARGSUSED */
static void
lxpr_read_net_ip_mr_vif(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
}
/* ARGSUSED */
static void
lxpr_read_net_mcfilter(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
}
/* ARGSUSED */
static void
lxpr_read_net_netstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
}
/* ARGSUSED */
static void
lxpr_read_net_raw(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
}
/* ARGSUSED */
static void
lxpr_read_net_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
}
/* ARGSUSED */
static void
lxpr_read_net_rpc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
}
/* ARGSUSED */
static void
lxpr_read_net_rt_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
}
/* ARGSUSED */
static void
lxpr_read_net_sockstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
}
/* ARGSUSED */
static void
lxpr_read_net_snmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
}
/* ARGSUSED */
static void
lxpr_read_net_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
}
/* ARGSUSED */
static void
lxpr_read_net_tcp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
}
/* ARGSUSED */
static void
lxpr_read_net_udp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
}
/* ARGSUSED */
static void
lxpr_read_net_unix(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
}
/*
* lxpr_read_kmsg(): read the contents of the kernel message queue. We
* translate this into the reception of console messages for this lx zone; each
* read copies out a single zone console message, or blocks until the next one
* is produced.
*/
#define LX_KMSG_PRI "<0>"
static void
lxpr_read_kmsg(lxpr_node_t *lxpnp, struct lxpr_uiobuf *uiobuf)
{
ldi_handle_t lh = lxpnp->lxpr_cons_ldih;
mblk_t *mp;
if (ldi_getmsg(lh, &mp, NULL) == 0) {
/*
* lx procfs doesn't like successive reads to the same file
* descriptor unless we do an explicit rewind each time.
*/
lxpr_uiobuf_seek(uiobuf, 0);
lxpr_uiobuf_printf(uiobuf, "%s%s", LX_KMSG_PRI,
mp->b_cont->b_rptr);
freemsg(mp);
}
}
/*
* lxpr_read_loadavg(): read the contents of the "loadavg" file.
*
* Just enough for uptime to work
*/
extern int nthread;
static void
lxpr_read_loadavg(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
ulong_t avenrun1;
ulong_t avenrun5;
ulong_t avenrun15;
ulong_t avenrun1_cs;
ulong_t avenrun5_cs;
ulong_t avenrun15_cs;
int loadavg[3];
int *loadbuf;
cpupart_t *cp;
uint_t nrunnable = 0;
rctl_qty_t nlwps;
ASSERT(lxpnp->lxpr_type == LXPR_LOADAVG);
mutex_enter(&cpu_lock);
/*
* Need to add up values over all CPU partitions. If pools are active,
* only report the values of the zone's partition, which by definition
* includes the current CPU.
*/
if (pool_pset_enabled()) {
psetid_t psetid = zone_pset_get(curproc->p_zone);
ASSERT(curproc->p_zone != &zone0);
cp = CPU->cpu_part;
nrunnable = cp->cp_nrunning + cp->cp_nrunnable;
(void) cpupart_get_loadavg(psetid, &loadavg[0], 3);
loadbuf = &loadavg[0];
/*
* We'll report the total number of lwps in the zone for the
* "nproc" parameter of /proc/loadavg; good enough for lx.
*/
nlwps = curproc->p_zone->zone_nlwps;
} else {
cp = cp_list_head;
do {
nrunnable += cp->cp_nrunning + cp->cp_nrunnable;
} while ((cp = cp->cp_next) != cp_list_head);
loadbuf = &avenrun[0];
/*
* This will report kernel threads as well as user lwps, but it
* should be good enough for lx consumers.
*/
nlwps = nthread;
}
mutex_exit(&cpu_lock);
avenrun1 = loadbuf[0] >> FSHIFT;
avenrun1_cs = ((loadbuf[0] & (FSCALE-1)) * 100) >> FSHIFT;
avenrun5 = loadbuf[1] >> FSHIFT;
avenrun5_cs = ((loadbuf[1] & (FSCALE-1)) * 100) >> FSHIFT;
avenrun15 = loadbuf[2] >> FSHIFT;
avenrun15_cs = ((loadbuf[2] & (FSCALE-1)) * 100) >> FSHIFT;
lxpr_uiobuf_printf(uiobuf,
"%ld.%02d %ld.%02d %ld.%02d %d/%d %d\n",
avenrun1, avenrun1_cs,
avenrun5, avenrun5_cs,
avenrun15, avenrun15_cs,
nrunnable, nlwps, 0);
}
/*
* lxpr_read_meminfo(): read the contents of the "meminfo" file.
*/
static void
lxpr_read_meminfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
long total_mem = physmem * PAGESIZE;
long free_mem = freemem * PAGESIZE;
long total_swap = k_anoninfo.ani_max * PAGESIZE;
long used_swap = k_anoninfo.ani_phys_resv * PAGESIZE;
ASSERT(lxpnp->lxpr_type == LXPR_MEMINFO);
lxpr_uiobuf_printf(uiobuf,
" total: used: free: shared: buffers: cached:\n"
"Mem: %8lu %8lu %8lu %8u %8u %8u\n"
"Swap: %8lu %8lu %8lu\n"
"MemTotal: %8lu kB\n"
"MemFree: %8lu kB\n"
"MemShared: %8u kB\n"
"Buffers: %8u kB\n"
"Cached: %8u kB\n"
"SwapCached:%8u kB\n"
"Active: %8u kB\n"
"Inactive: %8u kB\n"
"HighTotal: %8u kB\n"
"HighFree: %8u kB\n"
"LowTotal: %8u kB\n"
"LowFree: %8u kB\n"
"SwapTotal: %8lu kB\n"
"SwapFree: %8lu kB\n",
total_mem, total_mem - free_mem, free_mem, 0, 0, 0,
total_swap, used_swap, total_swap - used_swap,
btok(total_mem), /* MemTotal */
btok(free_mem), /* MemFree */
0, /* MemShared */
0, /* Buffers */
0, /* Cached */
0, /* SwapCached */
0, /* Active */
0, /* Inactive */
0, /* HighTotal */
0, /* HighFree */
btok(total_mem), /* LowTotal */
btok(free_mem), /* LowFree */
btok(total_swap), /* SwapTotal */
btok(total_swap - used_swap)); /* SwapFree */
}
/*
* lxpr_read_mounts():
*/
/* ARGSUSED */
static void
lxpr_read_mounts(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
struct vfs *vfsp;
struct vfs *vfslist;
zone_t *zone = LXPTOZ(lxpnp);
struct print_data {
refstr_t *vfs_mntpt;
refstr_t *vfs_resource;
uint_t vfs_flag;
int vfs_fstype;
struct print_data *next;
} *print_head = NULL;
struct print_data **print_tail = &print_head;
struct print_data *printp;
vfs_list_read_lock();
if (zone == global_zone) {
vfsp = vfslist = rootvfs;
} else {
vfsp = vfslist = zone->zone_vfslist;
/*
* If the zone has a root entry, it will be the first in
* the list. If it doesn't, we conjure one up.
*/
if (vfslist == NULL ||
strcmp(refstr_value(vfsp->vfs_mntpt),
zone->zone_rootpath) != 0) {
struct vfs *tvfsp;
/*
* The root of the zone is not a mount point. The vfs
* we want to report is that of the zone's root vnode.
*/
tvfsp = zone->zone_rootvp->v_vfsp;
lxpr_uiobuf_printf(uiobuf,
"/ / %s %s 0 0\n",
vfssw[tvfsp->vfs_fstype].vsw_name,
tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
}
if (vfslist == NULL) {
vfs_list_unlock();
return;
}
}
/*
* Later on we have to do a lookupname, which can end up causing
* another vfs_list_read_lock() to be called. Which can lead to a
* deadlock. To avoid this, we extract the data we need into a local
* list, then we can run this list without holding vfs_list_read_lock()
* We keep the list in the same order as the vfs_list
*/
do {
/* Skip mounts we shouldn't show */
if (vfsp->vfs_flag & VFS_NOMNTTAB) {
goto nextfs;
}
printp = kmem_alloc(sizeof (*printp), KM_SLEEP);
refstr_hold(vfsp->vfs_mntpt);
printp->vfs_mntpt = vfsp->vfs_mntpt;
refstr_hold(vfsp->vfs_resource);
printp->vfs_resource = vfsp->vfs_resource;
printp->vfs_flag = vfsp->vfs_flag;
printp->vfs_fstype = vfsp->vfs_fstype;
printp->next = NULL;
*print_tail = printp;
print_tail = &printp->next;
nextfs:
vfsp = (zone == global_zone) ?
vfsp->vfs_next : vfsp->vfs_zone_next;
} while (vfsp != vfslist);
vfs_list_unlock();
/*
* now we can run through what we've extracted without holding
* vfs_list_read_lock()
*/
printp = print_head;
while (printp != NULL) {
struct print_data *printp_next;
const char *resource;
char *mntpt;
struct vnode *vp;
int error;
mntpt = (char *)refstr_value(printp->vfs_mntpt);
resource = refstr_value(printp->vfs_resource);
if (mntpt != NULL && mntpt[0] != '\0')
mntpt = ZONE_PATH_TRANSLATE(mntpt, zone);
else
mntpt = "-";
error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
if (error != 0)
goto nextp;
if (!(vp->v_flag & VROOT)) {
VN_RELE(vp);
goto nextp;
}
VN_RELE(vp);
if (resource != NULL && resource[0] != '\0') {
if (resource[0] == '/') {
resource = ZONE_PATH_VISIBLE(resource, zone) ?
ZONE_PATH_TRANSLATE(resource, zone) :
mntpt;
}
} else {
resource = "-";
}
lxpr_uiobuf_printf(uiobuf,
"%s %s %s %s 0 0\n",
resource, mntpt, vfssw[printp->vfs_fstype].vsw_name,
printp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
nextp:
printp_next = printp->next;
refstr_rele(printp->vfs_mntpt);
refstr_rele(printp->vfs_resource);
kmem_free(printp, sizeof (*printp));
printp = printp_next;
}
}
/*
* lxpr_read_partitions():
*
* We don't support partitions in a local zone because it requires access to
* physical devices. But we need to fake up enough of the file to show that we
* have no partitions.
*/
/* ARGSUSED */
static void
lxpr_read_partitions(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
lxpr_uiobuf_printf(uiobuf,
"major minor #blocks name rio rmerge rsect ruse "
"wio wmerge wsect wuse running use aveq\n\n");
}
/*
* lxpr_read_version(): read the contents of the "version" file.
*/
/* ARGSUSED */
static void
lxpr_read_version(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
lxpr_uiobuf_printf(uiobuf,
"%s version %s (%s version %d.%d.%d) "
"#%s SMP %s\n",
LX_UNAME_SYSNAME, LX_UNAME_RELEASE,
#if defined(__GNUC__)
"gcc",
__GNUC__,
__GNUC_MINOR__,
__GNUC_PATCHLEVEL__,
#else
"Sun C",
__SUNPRO_C / 0x100,
(__SUNPRO_C & 0xff) / 0x10,
__SUNPRO_C & 0xf,
#endif
LX_UNAME_VERSION,
"00:00:00 %E%");
}
/*
* lxpr_read_stat(): read the contents of the "stat" file.
*
*/
/* ARGSUSED */
static void
lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
cpu_t *cp, *cpstart;
int pools_enabled;
ulong_t idle_cum = 0;
ulong_t sys_cum = 0;
ulong_t user_cum = 0;
ulong_t pgpgin_cum = 0;
ulong_t pgpgout_cum = 0;
ulong_t pgswapout_cum = 0;
ulong_t pgswapin_cum = 0;
ulong_t intr_cum = 0;
ulong_t pswitch_cum = 0;
ulong_t forks_cum = 0;
hrtime_t msnsecs[NCMSTATES];
ASSERT(lxpnp->lxpr_type == LXPR_STAT);
mutex_enter(&cpu_lock);
pools_enabled = pool_pset_enabled();
/* Calculate cumulative stats */
cp = cpstart = CPU;
do {
int i;
/*
* Don't count CPUs that aren't even in the system
* or aren't up yet.
*/
if ((cp->cpu_flags & CPU_EXISTS) == 0) {
continue;
}
get_cpu_mstate(cp, msnsecs);
idle_cum += NSEC_TO_TICK(msnsecs[CMS_IDLE]);
sys_cum += NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
user_cum += NSEC_TO_TICK(msnsecs[CMS_USER]);
pgpgin_cum += CPU_STATS(cp, vm.pgpgin);
pgpgout_cum += CPU_STATS(cp, vm.pgpgout);
pgswapin_cum += CPU_STATS(cp, vm.pgswapin);
pgswapout_cum += CPU_STATS(cp, vm.pgswapout);
for (i = 0; i < PIL_MAX; i++)
intr_cum += CPU_STATS(cp, sys.intr[i]);
pswitch_cum += CPU_STATS(cp, sys.pswitch);
forks_cum += CPU_STATS(cp, sys.sysfork);
forks_cum += CPU_STATS(cp, sys.sysvfork);
if (pools_enabled)
cp = cp->cpu_next_part;
else
cp = cp->cpu_next;
} while (cp != cpstart);
lxpr_uiobuf_printf(uiobuf,
"cpu %ld %ld %ld %ld\n",
user_cum, 0, sys_cum, idle_cum);
/* Do per processor stats */
do {
ulong_t idle_ticks;
ulong_t sys_ticks;
ulong_t user_ticks;
/*
* Don't count CPUs that aren't even in the system
* or aren't up yet.
*/
if ((cp->cpu_flags & CPU_EXISTS) == 0) {
continue;
}
get_cpu_mstate(cp, msnsecs);
idle_ticks = NSEC_TO_TICK(msnsecs[CMS_IDLE]);
sys_ticks = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
user_ticks = NSEC_TO_TICK(msnsecs[CMS_USER]);
lxpr_uiobuf_printf(uiobuf,
"cpu%d %ld %ld %ld %ld\n",
cp->cpu_id,
user_ticks, 0, sys_ticks, idle_ticks);
if (pools_enabled)
cp = cp->cpu_next_part;
else
cp = cp->cpu_next;
} while (cp != cpstart);
mutex_exit(&cpu_lock);
lxpr_uiobuf_printf(uiobuf,
"page %lu %lu\n"
"swap %lu %lu\n"
"intr %lu\n"
"ctxt %lu\n"
"btime %lu\n"
"processes %lu\n",
pgpgin_cum, pgpgout_cum,
pgswapin_cum, pgswapout_cum,
intr_cum,
pswitch_cum,
boot_time,
forks_cum);
}
/*
* lxpr_read_uptime(): read the contents of the "uptime" file.
*
* format is: "%.2lf, %.2lf",uptime_secs, idle_secs
* Use fixed point arithmetic to get 2 decimal places
*/
/* ARGSUSED */
static void
lxpr_read_uptime(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
cpu_t *cp, *cpstart;
int pools_enabled;
ulong_t idle_cum = 0;
ulong_t cpu_count = 0;
ulong_t idle_s;
ulong_t idle_cs;
ulong_t up_s;
ulong_t up_cs;
ASSERT(lxpnp->lxpr_type == LXPR_UPTIME);
/* Calculate cumulative stats */
mutex_enter(&cpu_lock);
pools_enabled = pool_pset_enabled();
cp = cpstart = CPU;
do {
/*
* Don't count CPUs that aren't even in the system
* or aren't up yet.
*/
if ((cp->cpu_flags & CPU_EXISTS) == 0) {
continue;
}
idle_cum += CPU_STATS(cp, sys.cpu_ticks_idle);
idle_cum += CPU_STATS(cp, sys.cpu_ticks_wait);
cpu_count += 1;
if (pools_enabled)
cp = cp->cpu_next_part;
else
cp = cp->cpu_next;
} while (cp != cpstart);
mutex_exit(&cpu_lock);
/* Capture lbolt in case it changes */
up_cs = lbolt;
up_s = up_cs / hz;
up_cs %= hz;
up_cs *= 100;
up_cs /= hz;
ASSERT(cpu_count > 0);
idle_cum /= cpu_count;
idle_s = idle_cum / hz;
idle_cs = idle_cum % hz;
idle_cs *= 100;
idle_cs /= hz;
lxpr_uiobuf_printf(uiobuf,
"%ld.%02d %ld.%02d\n", up_s, up_cs, idle_s, idle_cs);
}
static const char *amd_x_edx[] = {
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, "syscall",
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, "mp",
"nx", NULL, "mmxext", NULL,
NULL, NULL, NULL, NULL,
NULL, "lm", "3dnowext", "3dnow"
};
static const char *amd_x_ecx[] = {
"lahf_lm", NULL, "svm", NULL,
"altmovcr8"
};
static const char *tm_x_edx[] = {
"recovery", "longrun", NULL, "lrti"
};
/*
* Intel calls no-execute "xd" in its docs, but Linux still reports it as "nx."
*/
static const char *intc_x_edx[] = {
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, "syscall",
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
"nx", NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, "lm", NULL, NULL
};
static const char *intc_edx[] = {
"fpu", "vme", "de", "pse",
"tsc", "msr", "pae", "mce",
"cx8", "apic", NULL, "sep",
"mtrr", "pge", "mca", "cmov",
"pat", "pse36", "pn", "clflush",
NULL, "dts", "acpi", "mmx",
"fxsr", "sse", "sse2", "ss",
"ht", "tm", "ia64", "pbe"
};
/*
* "sse3" on linux is called "pni" (Prescott New Instructions).
*/
static const char *intc_ecx[] = {
"pni", NULL, NULL, "monitor",
"ds_cpl", NULL, NULL, "est",
"tm2", NULL, "cid", NULL,
NULL, "cx16", "xtpr"
};
static void
lxpr_read_cpuinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
int i;
uint32_t bits;
cpu_t *cp, *cpstart;
int pools_enabled;
const char **fp;
char brandstr[CPU_IDSTRLEN];
struct cpuid_regs cpr;
int maxeax;
int std_ecx, std_edx, ext_ecx, ext_edx;
ASSERT(lxpnp->lxpr_type == LXPR_CPUINFO);
mutex_enter(&cpu_lock);
pools_enabled = pool_pset_enabled();
cp = cpstart = CPU;
do {
/*
* This returns the maximum eax value for standard cpuid
* functions in eax.
*/
cpr.cp_eax = 0;
(void) cpuid_insn(cp, &cpr);
maxeax = cpr.cp_eax;
/*
* Get standard x86 feature flags.
*/
cpr.cp_eax = 1;
(void) cpuid_insn(cp, &cpr);
std_ecx = cpr.cp_ecx;
std_edx = cpr.cp_edx;
/*
* Now get extended feature flags.
*/
cpr.cp_eax = 0x80000001;
(void) cpuid_insn(cp, &cpr);
ext_ecx = cpr.cp_ecx;
ext_edx = cpr.cp_edx;
(void) cpuid_getbrandstr(cp, brandstr, CPU_IDSTRLEN);
lxpr_uiobuf_printf(uiobuf,
"processor\t: %d\n"
"vendor_id\t: %s\n"
"cpu family\t: %d\n"
"model\t\t: %d\n"
"model name\t: %s\n"
"stepping\t: %d\n"
"cpu MHz\t\t: %u.%03u\n",
cp->cpu_id, cpuid_getvendorstr(cp), cpuid_getfamily(cp),
cpuid_getmodel(cp), brandstr, cpuid_getstep(cp),
(uint32_t)(cpu_freq_hz / 1000000),
((uint32_t)(cpu_freq_hz / 1000)) % 1000);
lxpr_uiobuf_printf(uiobuf, "cache size\t: %u KB\n",
getl2cacheinfo(cp, NULL, NULL, NULL) / 1024);
if (x86_feature & X86_HTT) {
/*
* 'siblings' is used for HT-style threads
*/
lxpr_uiobuf_printf(uiobuf,
"physical id\t: %lu\n"
"siblings\t: %u\n",
pg_plat_hw_instance_id(cp, PGHW_CHIP),
cpuid_get_ncpu_per_chip(cp));
}
/*
* Since we're relatively picky about running on older hardware,
* we can be somewhat cavalier about the answers to these ones.
*
* In fact, given the hardware we support, we just say:
*
* fdiv_bug : no (if we're on a 64-bit kernel)
* hlt_bug : no
* f00f_bug : no
* coma_bug : no
* wp : yes (write protect in supervsr mode)
*/
lxpr_uiobuf_printf(uiobuf,
"fdiv_bug\t: %s\n"
"hlt_bug \t: no\n"
"f00f_bug\t: no\n"
"coma_bug\t: no\n"
"fpu\t\t: %s\n"
"fpu_exception\t: %s\n"
"cpuid level\t: %d\n"
"flags\t\t:",
#if defined(__i386)
fpu_pentium_fdivbug ? "yes" : "no",
#else
"no",
#endif /* __i386 */
fpu_exists ? "yes" : "no", fpu_exists ? "yes" : "no",
maxeax);
for (bits = std_edx, fp = intc_edx, i = 0;
i < sizeof (intc_edx) / sizeof (intc_edx[0]); fp++, i++)
if ((bits & (1 << i)) != 0 && *fp)
lxpr_uiobuf_printf(uiobuf, " %s", *fp);
/*
* name additional features where appropriate
*/
switch (x86_vendor) {
case X86_VENDOR_Intel:
for (bits = ext_edx, fp = intc_x_edx, i = 0;
i < sizeof (intc_x_edx) / sizeof (intc_x_edx[0]);
fp++, i++)
if ((bits & (1 << i)) != 0 && *fp)
lxpr_uiobuf_printf(uiobuf, " %s", *fp);
break;
case X86_VENDOR_AMD:
for (bits = ext_edx, fp = amd_x_edx, i = 0;
i < sizeof (amd_x_edx) / sizeof (amd_x_edx[0]);
fp++, i++)
if ((bits & (1 << i)) != 0 && *fp)
lxpr_uiobuf_printf(uiobuf, " %s", *fp);
for (bits = ext_ecx, fp = amd_x_ecx, i = 0;
i < sizeof (amd_x_ecx) / sizeof (amd_x_ecx[0]);
fp++, i++)
if ((bits & (1 << i)) != 0 && *fp)
lxpr_uiobuf_printf(uiobuf, " %s", *fp);
break;
case X86_VENDOR_TM:
for (bits = ext_edx, fp = tm_x_edx, i = 0;
i < sizeof (tm_x_edx) / sizeof (tm_x_edx[0]);
fp++, i++)
if ((bits & (1 << i)) != 0 && *fp)
lxpr_uiobuf_printf(uiobuf, " %s", *fp);
break;
default:
break;
}
for (bits = std_ecx, fp = intc_ecx, i = 0;
i < sizeof (intc_ecx) / sizeof (intc_ecx[0]); fp++, i++)
if ((bits & (1 << i)) != 0 && *fp)
lxpr_uiobuf_printf(uiobuf, " %s", *fp);
lxpr_uiobuf_printf(uiobuf, "\n\n");
if (pools_enabled)
cp = cp->cpu_next_part;
else
cp = cp->cpu_next;
} while (cp != cpstart);
mutex_exit(&cpu_lock);
}
/* ARGSUSED */
static void
lxpr_read_fd(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
ASSERT(lxpnp->lxpr_type == LXPR_PID_FD_FD);
lxpr_uiobuf_seterr(uiobuf, EFAULT);
}
/*
* lxpr_getattr(): Vnode operation for VOP_GETATTR()
*/
static int
lxpr_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr)
{
register lxpr_node_t *lxpnp = VTOLXP(vp);
lxpr_nodetype_t type = lxpnp->lxpr_type;
extern uint_t nproc;
int error;
/*
* Return attributes of underlying vnode if ATTR_REAL
*
* but keep fd files with the symlink permissions
*/
if (lxpnp->lxpr_realvp != NULL && (flags & ATTR_REAL)) {
vnode_t *rvp = lxpnp->lxpr_realvp;
/*
* withold attribute information to owner or root
*/
if ((error = VOP_ACCESS(rvp, 0, 0, cr)) != 0) {
return (error);
}
/*
* now its attributes
*/
if ((error = VOP_GETATTR(rvp, vap, flags, cr)) != 0) {
return (error);
}
/*
* if it's a file in lx /proc/pid/fd/xx then set its
* mode and keep it looking like a symlink
*/
if (type == LXPR_PID_FD_FD) {
vap->va_mode = lxpnp->lxpr_mode;
vap->va_type = vp->v_type;
vap->va_size = 0;
vap->va_nlink = 1;
}
return (0);
}
/* Default attributes, that may be overridden below */
bzero(vap, sizeof (*vap));
vap->va_atime = vap->va_mtime = vap->va_ctime = lxpnp->lxpr_time;
vap->va_nlink = 1;
vap->va_type = vp->v_type;
vap->va_mode = lxpnp->lxpr_mode;
vap->va_fsid = vp->v_vfsp->vfs_dev;
vap->va_blksize = DEV_BSIZE;
vap->va_uid = lxpnp->lxpr_uid;
vap->va_gid = lxpnp->lxpr_gid;
vap->va_nodeid = lxpnp->lxpr_ino;
switch (type) {
case LXPR_PROCDIR:
vap->va_nlink = nproc + 2 + PROCDIRFILES;
vap->va_size = (nproc + 2 + PROCDIRFILES) * LXPR_SDSIZE;
break;
case LXPR_PIDDIR:
vap->va_nlink = PIDDIRFILES;
vap->va_size = PIDDIRFILES * LXPR_SDSIZE;
break;
case LXPR_SELF:
vap->va_uid = crgetruid(curproc->p_cred);
vap->va_gid = crgetrgid(curproc->p_cred);
break;
default:
break;
}
vap->va_nblocks = (fsblkcnt64_t)btod(vap->va_size);
return (0);
}
/*
* lxpr_access(): Vnode operation for VOP_ACCESS()
*/
static int
lxpr_access(vnode_t *vp, int mode, int flags, cred_t *cr)
{
lxpr_node_t *lxpnp = VTOLXP(vp);
int shift = 0;
proc_t *tp;
/* lx /proc is a read only file system */
if (mode & VWRITE)
return (EROFS);
/*
* If this is a restricted file, check access permissions.
*/
switch (lxpnp->lxpr_type) {
case LXPR_PIDDIR:
return (0);
case LXPR_PID_CURDIR:
case LXPR_PID_ENV:
case LXPR_PID_EXE:
case LXPR_PID_MAPS:
case LXPR_PID_MEM:
case LXPR_PID_ROOTDIR:
case LXPR_PID_FDDIR:
case LXPR_PID_FD_FD:
if ((tp = lxpr_lock(lxpnp->lxpr_pid)) == NULL)
return (ENOENT);
if (tp != curproc && secpolicy_proc_access(cr) != 0 &&
priv_proc_cred_perm(cr, tp, NULL, mode) != 0) {
lxpr_unlock(tp);
return (EACCES);
}
lxpr_unlock(tp);
default:
break;
}
if (lxpnp->lxpr_realvp != NULL) {
/*
* For these we use the underlying vnode's accessibility.
*/
return (VOP_ACCESS(lxpnp->lxpr_realvp, mode, flags, cr));
}
/*
* Access check is based on only
* one of owner, group, public.
* If not owner, then check group.
* If not a member of the group, then
* check public access.
*/
if (crgetuid(cr) != lxpnp->lxpr_uid) {
shift += 3;
if (!groupmember((uid_t)lxpnp->lxpr_gid, cr))
shift += 3;
}
mode &= ~(lxpnp->lxpr_mode << shift);
if (mode == 0)
return (0);
return (EACCES);
}
/* ARGSUSED */
static vnode_t *
lxpr_lookup_not_a_dir(vnode_t *dp, char *comp)
{
return (NULL);
}
/*
* lxpr_lookup(): Vnode operation for VOP_LOOKUP()
*/
/* ARGSUSED */
static int
lxpr_lookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pathp,
int flags, vnode_t *rdir, cred_t *cr)
{
lxpr_node_t *lxpnp = VTOLXP(dp);
lxpr_nodetype_t type = lxpnp->lxpr_type;
int error;
ASSERT(dp->v_type == VDIR);
ASSERT(type >= 0 && type < LXPR_NFILES);
/*
* we should never get here because the lookup
* is done on the realvp for these nodes
*/
ASSERT(type != LXPR_PID_FD_FD &&
type != LXPR_PID_CURDIR &&
type != LXPR_PID_ROOTDIR);
/*
* restrict lookup permission to owner or root
*/
if ((error = lxpr_access(dp, VEXEC, 0, cr)) != 0) {
return (error);
}
/*
* Just return the parent vnode
* if thats where we are trying to go
*/
if (strcmp(comp, "..") == 0) {
VN_HOLD(lxpnp->lxpr_parent);
*vpp = lxpnp->lxpr_parent;
return (0);
}
/*
* Special handling for directory searches
* Note: null component name is synonym for
* current directory being searched.
*/
if ((dp->v_type == VDIR) && (*comp == '\0' || strcmp(comp, ".") == 0)) {
VN_HOLD(dp);
*vpp = dp;
return (0);
}
*vpp = (lxpr_lookup_function[type](dp, comp));
return ((*vpp == NULL) ? ENOENT : 0);
}
/*
* Do a sequential search on the given directory table
*/
static vnode_t *
lxpr_lookup_common(vnode_t *dp, char *comp, proc_t *p,
lxpr_dirent_t *dirtab, int dirtablen)
{
lxpr_node_t *lxpnp;
int count;
for (count = 0; count < dirtablen; count++) {
if (strcmp(dirtab[count].d_name, comp) == 0) {
lxpnp = lxpr_getnode(dp, dirtab[count].d_type, p, 0);
dp = LXPTOV(lxpnp);
ASSERT(dp != NULL);
return (dp);
}
}
return (NULL);
}
static vnode_t *
lxpr_lookup_piddir(vnode_t *dp, char *comp)
{
proc_t *p;
ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PIDDIR);
p = lxpr_lock(VTOLXP(dp)->lxpr_pid);
if (p == NULL)
return (NULL);
dp = lxpr_lookup_common(dp, comp, p, piddir, PIDDIRFILES);
lxpr_unlock(p);
return (dp);
}
/*
* Lookup one of the process's open files.
*/
static vnode_t *
lxpr_lookup_fddir(vnode_t *dp, char *comp)
{
lxpr_node_t *dlxpnp = VTOLXP(dp);
lxpr_node_t *lxpnp;
vnode_t *vp = NULL;
proc_t *p;
file_t *fp;
uint_t fd;
int c;
uf_entry_t *ufp;
uf_info_t *fip;
ASSERT(dlxpnp->lxpr_type == LXPR_PID_FDDIR);
/*
* convert the string rendition of the filename
* to a file descriptor
*/
fd = 0;
while ((c = *comp++) != '\0') {
int ofd;
if (c < '0' || c > '9')
return (NULL);
ofd = fd;
fd = 10*fd + c - '0';
/* integer overflow */
if (fd / 10 != ofd)
return (NULL);
}
/*
* get the proc to work with and lock it
*/
p = lxpr_lock(dlxpnp->lxpr_pid);
if ((p == NULL))
return (NULL);
/*
* If the process is a zombie or system process
* it can't have any open files.
*/
if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) {
lxpr_unlock(p);
return (NULL);
}
/*
* get us a fresh node/vnode
*/
lxpnp = lxpr_getnode(dp, LXPR_PID_FD_FD, p, fd);
/*
* get open file info
*/
fip = (&(p)->p_user.u_finfo);
mutex_enter(&fip->fi_lock);
/*
* got the fd data so now done with this proc
*/
lxpr_unlock(p);
if (fd < fip->fi_nfiles) {
UF_ENTER(ufp, fip, fd);
/*
* ensure the fd is still kosher.
* it may have gone between the readdir and
* the lookup
*/
if (fip->fi_list[fd].uf_file == NULL) {
mutex_exit(&fip->fi_lock);
UF_EXIT(ufp);
lxpr_freenode(lxpnp);
return (NULL);
}
if ((fp = ufp->uf_file) != NULL)
vp = fp->f_vnode;
UF_EXIT(ufp);
}
mutex_exit(&fip->fi_lock);
if (vp == NULL) {
lxpr_freenode(lxpnp);
return (NULL);
} else {
/*
* Fill in the lxpr_node so future references will
* be able to find the underlying vnode.
* The vnode is held on the realvp.
*/
lxpnp->lxpr_realvp = vp;
VN_HOLD(lxpnp->lxpr_realvp);
}
dp = LXPTOV(lxpnp);
ASSERT(dp != NULL);
return (dp);
}
static vnode_t *
lxpr_lookup_netdir(vnode_t *dp, char *comp)
{
ASSERT(VTOLXP(dp)->lxpr_type == LXPR_NETDIR);
dp = lxpr_lookup_common(dp, comp, NULL, netdir, NETDIRFILES);
return (dp);
}
static vnode_t *
lxpr_lookup_procdir(vnode_t *dp, char *comp)
{
ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PROCDIR);
/*
* We know all the names of files & dirs in our
* file system structure except those that are pid names.
* These change as pids are created/deleted etc.
* So just look for a number as the first char to see if we
* are we doing pid lookups?
*
* Don't need to check for "self" as it is implemented as a symlink
*/
if (*comp >= '0' && *comp <= '9') {
pid_t pid = 0;
lxpr_node_t *lxpnp = NULL;
proc_t *p;
int c;
while ((c = *comp++) != '\0')
pid = 10*pid + c - '0';
/*
* Can't continue if the process is still loading
* or it doesn't really exist yet (or maybe it just died!)
*/
p = lxpr_lock(pid);
if (p == NULL)
return (NULL);
if (secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
lxpr_unlock(p);
return (NULL);
}
/*
* allocate and fill in a new lx /proc node
*/
lxpnp = lxpr_getnode(dp, LXPR_PIDDIR, p, 0);
lxpr_unlock(p);
dp = LXPTOV(lxpnp);
ASSERT(dp != NULL);
return (dp);
}
/* Lookup fixed names */
return (lxpr_lookup_common(dp, comp, NULL, lx_procdir, PROCDIRFILES));
}
/*
* lxpr_readdir(): Vnode operation for VOP_READDIR()
*/
/* ARGSUSED */
static int
lxpr_readdir(vnode_t *dp, uio_t *uiop, cred_t *cr, int *eofp)
{
lxpr_node_t *lxpnp = VTOLXP(dp);
lxpr_nodetype_t type = lxpnp->lxpr_type;
ssize_t uresid;
off_t uoffset;
int error;
ASSERT(dp->v_type == VDIR);
ASSERT(type >= 0 && type < LXPR_NFILES);
/*
* we should never get here because the readdir
* is done on the realvp for these nodes
*/
ASSERT(type != LXPR_PID_FD_FD &&
type != LXPR_PID_CURDIR &&
type != LXPR_PID_ROOTDIR);
/*
* restrict readdir permission to owner or root
*/
if ((error = lxpr_access(dp, VREAD, 0, cr)) != 0)
return (error);
uoffset = uiop->uio_offset;
uresid = uiop->uio_resid;
/* can't do negative reads */
if (uoffset < 0 || uresid <= 0)
return (EINVAL);
/* can't read directory entries that don't exist! */
if (uoffset % LXPR_SDSIZE)
return (ENOENT);
return (lxpr_readdir_function[lxpnp->lxpr_type](lxpnp, uiop, eofp));
}
/* ARGSUSED */
static int
lxpr_readdir_not_a_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
{
return (ENOTDIR);
}
/*
* This has the common logic for returning directory entries
*/
static int
lxpr_readdir_common(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp,
lxpr_dirent_t *dirtab, int dirtablen)
{
/* bp holds one dirent64 structure */
longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
dirent64_t *dirent = (dirent64_t *)bp;
ssize_t oresid; /* save a copy for testing later */
ssize_t uresid;
oresid = uiop->uio_resid;
/* clear out the dirent buffer */
bzero(bp, sizeof (bp));
/*
* Satisfy user request
*/
while ((uresid = uiop->uio_resid) > 0) {
int dirindex;
off_t uoffset;
int reclen;
int error;
uoffset = uiop->uio_offset;
dirindex = (uoffset / LXPR_SDSIZE) - 2;
if (uoffset == 0) {
dirent->d_ino = lxpnp->lxpr_ino;
dirent->d_name[0] = '.';
dirent->d_name[1] = '\0';
reclen = DIRENT64_RECLEN(1);
} else if (uoffset == LXPR_SDSIZE) {
dirent->d_ino = lxpr_parentinode(lxpnp);
dirent->d_name[0] = '.';
dirent->d_name[1] = '.';
dirent->d_name[2] = '\0';
reclen = DIRENT64_RECLEN(2);
} else if (dirindex < dirtablen) {
int slen = strlen(dirtab[dirindex].d_name);
dirent->d_ino = lxpr_inode(dirtab[dirindex].d_type,
lxpnp->lxpr_pid, 0);
ASSERT(slen < LXPNSIZ);
(void) strcpy(dirent->d_name, dirtab[dirindex].d_name);
reclen = DIRENT64_RECLEN(slen);
} else {
/* Run out of table entries */
if (eofp) {
*eofp = 1;
}
return (0);
}
dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
dirent->d_reclen = (ushort_t)reclen;
/*
* if the size of the data to transfer is greater
* that that requested then we can't do it this transfer.
*/
if (reclen > uresid) {
/*
* Error if no entries have been returned yet.
*/
if (uresid == oresid) {
return (EINVAL);
}
break;
}
/*
* uiomove() updates both uiop->uio_resid and
* uiop->uio_offset by the same amount. But we want
* uiop->uio_offset to change in increments
* of LXPR_SDSIZE, which is different from the number of bytes
* being returned to the user.
* So we set uiop->uio_offset separately, ignoring what
* uiomove() does.
*/
if (error = uiomove((caddr_t)dirent, reclen, UIO_READ, uiop)) {
return (error);
}
uiop->uio_offset = uoffset + LXPR_SDSIZE;
}
/* Have run out of space, but could have just done last table entry */
if (eofp) {
*eofp =
(uiop->uio_offset >= ((dirtablen+2) * LXPR_SDSIZE)) ? 1 : 0;
}
return (0);
}
static int
lxpr_readdir_procdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
{
/* bp holds one dirent64 structure */
longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
dirent64_t *dirent = (dirent64_t *)bp;
ssize_t oresid; /* save a copy for testing later */
ssize_t uresid;
off_t uoffset;
zoneid_t zoneid;
pid_t pid;
int error;
int ceof;
ASSERT(lxpnp->lxpr_type == LXPR_PROCDIR);
oresid = uiop->uio_resid;
zoneid = LXPTOZ(lxpnp)->zone_id;
/*
* We return directory entries in the order:
* "." and ".." then the unique lx procfs files, then the
* directories corresponding to the running processes.
*
* This is a good order because it allows us to more easily
* keep track of where we are betwen calls to getdents().
* If the number of processes changes between calls then we
* can't lose track of where we are in the lx procfs files.
*/
/* Do the fixed entries */
error = lxpr_readdir_common(lxpnp, uiop, &ceof, lx_procdir,
PROCDIRFILES);
/* Finished if we got an error or if we couldn't do all the table */
if (error != 0 || ceof == 0)
return (error);
/* clear out the dirent buffer */
bzero(bp, sizeof (bp));
/* Do the process entries */
while ((uresid = uiop->uio_resid) > 0) {
proc_t *p;
int len;
int reclen;
int i;
uoffset = uiop->uio_offset;
/*
* Stop when entire proc table has been examined.
*/
i = (uoffset / LXPR_SDSIZE) - 2 - PROCDIRFILES;
if (i >= v.v_proc) {
/* Run out of table entries */
if (eofp) {
*eofp = 1;
}
return (0);
}
mutex_enter(&pidlock);
/*
* Skip indices for which there is no pid_entry, PIDs for
* which there is no corresponding process, a PID of 0,
* and anything the security policy doesn't allow
* us to look at.
*/
if ((p = pid_entry(i)) == NULL || p->p_stat == SIDL ||
p->p_pid == 0 ||
secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
mutex_exit(&pidlock);
goto next;
}
mutex_exit(&pidlock);
/*
* Convert pid to the Linux default of 1 if we're the zone's
* init process, otherwise use the value from the proc
* structure
*/
pid = ((p->p_pid != curproc->p_zone->zone_proc_initpid) ?
p->p_pid : 1);
/*
* If this /proc was mounted in the global zone, view
* all procs; otherwise, only view zone member procs.
*/
if (zoneid != GLOBAL_ZONEID && p->p_zone->zone_id != zoneid) {
goto next;
}
ASSERT(p->p_stat != 0);
dirent->d_ino = lxpr_inode(LXPR_PIDDIR, pid, 0);
len = snprintf(dirent->d_name, LXPNSIZ, "%d", pid);
ASSERT(len < LXPNSIZ);
reclen = DIRENT64_RECLEN(len);
dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
dirent->d_reclen = (ushort_t)reclen;
/*
* if the size of the data to transfer is greater
* that that requested then we can't do it this transfer.
*/
if (reclen > uresid) {
/*
* Error if no entries have been returned yet.
*/
if (uresid == oresid)
return (EINVAL);
break;
}
/*
* uiomove() updates both uiop->uio_resid and
* uiop->uio_offset by the same amount. But we want
* uiop->uio_offset to change in increments
* of LXPR_SDSIZE, which is different from the number of bytes
* being returned to the user.
* So we set uiop->uio_offset separately, in the
* increment of this for loop, ignoring what uiomove() does.
*/
if (error = uiomove((caddr_t)dirent, reclen, UIO_READ, uiop))
return (error);
next:
uiop->uio_offset = uoffset + LXPR_SDSIZE;
}
if (eofp)
*eofp =
(uiop->uio_offset >=
((v.v_proc + PROCDIRFILES + 2) * LXPR_SDSIZE)) ? 1 : 0;
return (0);
}
static int
lxpr_readdir_piddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
{
proc_t *p;
ASSERT(lxpnp->lxpr_type == LXPR_PIDDIR);
/* can't read its contents if it died */
mutex_enter(&pidlock);
p = prfind((lxpnp->lxpr_pid == 1) ?
curproc->p_zone->zone_proc_initpid : lxpnp->lxpr_pid);
if (p == NULL || p->p_stat == SIDL) {
mutex_exit(&pidlock);
return (ENOENT);
}
mutex_exit(&pidlock);
return (lxpr_readdir_common(lxpnp, uiop, eofp, piddir, PIDDIRFILES));
}
static int
lxpr_readdir_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
{
ASSERT(lxpnp->lxpr_type == LXPR_NETDIR);
return (lxpr_readdir_common(lxpnp, uiop, eofp, netdir, NETDIRFILES));
}
static int
lxpr_readdir_fddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
{
/* bp holds one dirent64 structure */
longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
dirent64_t *dirent = (dirent64_t *)bp;
ssize_t oresid; /* save a copy for testing later */
ssize_t uresid;
off_t uoffset;
int error;
int ceof;
proc_t *p;
int fddirsize;
uf_info_t *fip;
ASSERT(lxpnp->lxpr_type == LXPR_PID_FDDIR);
oresid = uiop->uio_resid;
/* can't read its contents if it died */
p = lxpr_lock(lxpnp->lxpr_pid);
if (p == NULL)
return (ENOENT);
/* Get open file info */
fip = (&(p)->p_user.u_finfo);
if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas))
fddirsize = 0;
else
fddirsize = fip->fi_nfiles;
mutex_enter(&fip->fi_lock);
lxpr_unlock(p);
/* Do the fixed entries (in this case just "." & "..") */
error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0);
/* Finished if we got an error or if we couldn't do all the table */
if (error != 0 || ceof == 0)
return (error);
/* clear out the dirent buffer */
bzero(bp, sizeof (bp));
/*
* Loop until user's request is satisfied or until
* all file descriptors have been examined.
*/
for (; (uresid = uiop->uio_resid) > 0;
uiop->uio_offset = uoffset + LXPR_SDSIZE) {
int reclen;
int fd;
int len;
uoffset = uiop->uio_offset;
/*
* Stop at the end of the fd list
*/
fd = (uoffset / LXPR_SDSIZE) - 2;
if (fd >= fddirsize) {
if (eofp) {
*eofp = 1;
}
goto out;
}
if (fip->fi_list[fd].uf_file == NULL)
continue;
dirent->d_ino = lxpr_inode(LXPR_PID_FD_FD, lxpnp->lxpr_pid, fd);
len = snprintf(dirent->d_name, LXPNSIZ, "%d", fd);
ASSERT(len < LXPNSIZ);
reclen = DIRENT64_RECLEN(len);
dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
dirent->d_reclen = (ushort_t)reclen;
if (reclen > uresid) {
/*
* Error if no entries have been returned yet.
*/
if (uresid == oresid)
error = EINVAL;
goto out;
}
if (error = uiomove((caddr_t)dirent, reclen, UIO_READ, uiop))
goto out;
}
if (eofp)
*eofp =
(uiop->uio_offset >= ((fddirsize+2) * LXPR_SDSIZE)) ? 1 : 0;
out:
mutex_exit(&fip->fi_lock);
return (error);
}
/*
* lxpr_readlink(): Vnode operation for VOP_READLINK()
*/
static int
lxpr_readlink(vnode_t *vp, uio_t *uiop)
{
char bp[MAXPATHLEN + 1];
size_t buflen = sizeof (bp);
lxpr_node_t *lxpnp = VTOLXP(vp);
vnode_t *rvp = lxpnp->lxpr_realvp;
pid_t pid;
int error = 0;
/* must be a symbolic link file */
if (vp->v_type != VLNK)
return (EINVAL);
/*
* Try to produce a symlink name for anything that's really a regular
* file or directory (but not for anything else)
*/
if (rvp != NULL && (rvp->v_type == VDIR || rvp->v_type == VREG)) {
if ((error = lxpr_access(vp, VREAD, 0, CRED())) != 0)
return (error);
if ((error = vnodetopath(NULL, rvp, bp, buflen, CRED())) != 0)
return (error);
} else {
switch (lxpnp->lxpr_type) {
case LXPR_SELF:
/*
* Don't need to check result as every possible int
* will fit within MAXPATHLEN bytes
*/
/*
* Convert pid to the Linux default of 1 if we're the
* zone's init process
*/
pid = ((curproc->p_pid !=
curproc->p_zone->zone_proc_initpid)
? curproc->p_pid : 1);
(void) snprintf(bp, buflen, "%d", pid);
break;
case LXPR_PID_CURDIR:
case LXPR_PID_ROOTDIR:
case LXPR_PID_EXE:
return (EACCES);
default:
/*
* Need to return error so that nothing thinks
* that the symlink is empty and hence "."
*/
return (EINVAL);
}
}
/* copy the link data to user space */
return (uiomove(bp, strlen(bp), UIO_READ, uiop));
}
/*
* lxpr_inactive(): Vnode operation for VOP_INACTIVE()
* Vnode is no longer referenced, deallocate the file
* and all its resources.
*/
/* ARGSUSED */
static void
lxpr_inactive(vnode_t *vp, cred_t *cr)
{
lxpr_freenode(VTOLXP(vp));
}
/*
* lxpr_sync(): Vnode operation for VOP_SYNC()
*/
static int
lxpr_sync()
{
/*
* nothing to sync but this
* function must never fail
*/
return (0);
}
/*
* lxpr_cmp(): Vnode operation for VOP_CMP()
*/
static int
lxpr_cmp(vnode_t *vp1, vnode_t *vp2)
{
vnode_t *rvp;
while (vn_matchops(vp1, lxpr_vnodeops) &&
(rvp = VTOLXP(vp1)->lxpr_realvp) != NULL)
vp1 = rvp;
while (vn_matchops(vp2, lxpr_vnodeops) &&
(rvp = VTOLXP(vp2)->lxpr_realvp) != NULL)
vp2 = rvp;
if (vn_matchops(vp1, lxpr_vnodeops) || vn_matchops(vp2, lxpr_vnodeops))
return (vp1 == vp2);
return (VOP_CMP(vp1, vp2));
}
/*
* lxpr_realvp(): Vnode operation for VOP_REALVP()
*/
static int
lxpr_realvp(vnode_t *vp, vnode_t **vpp)
{
vnode_t *rvp;
if ((rvp = VTOLXP(vp)->lxpr_realvp) != NULL) {
vp = rvp;
if (VOP_REALVP(vp, &rvp) == 0)
vp = rvp;
}
*vpp = vp;
return (0);
}