#!/usr/bin/ksh
#
# iotop - display top disk I/O events by process.
# Written using DTrace (Solaris 10 3/05).
#
# This is measuring disk events that have made it past system caches.
#
# $Id: iotop 8 2007-08-06 05:55:26Z brendan $
#
# USAGE: iotop [-C] [-D|-o|-P] [-j|-Z] [-d device] [-f filename]
# [-m mount_point] [-t top] [interval [count]]
#
# iotop # default output, 5 second intervals
#
# -C # don't clear the screen
# -D # print delta times, elapsed, us
# -j # print project ID
# -o # print disk delta times, us
# -P # print %I/O (disk delta times)
# -Z # print zone ID
# -d device # instance name to snoop (eg, dad0)
# -f filename # full pathname of file to snoop
# -m mount_point # this FS only (will skip raw events)
# -t top # print top number only
# eg,
# iotop 1 # 1 second samples
# iotop -C # don't clear the screen
# iotop -P # print %I/O (time based)
# iotop -j # print project IDs
# iotop -Z # print zone IDs
# iotop -t 20 # print top 20 lines only
# iotop -C 5 12 # print 12 x 5 second samples
#
# FIELDS:
# UID user ID
# PID process ID
# PPID parent process ID
# PROJ project ID
# ZONE zone ID
# CMD process command name
# DEVICE device name
# MAJ device major number
# MIN device minor number
# D direction, Read or Write
# BYTES total size of operations, bytes
# ELAPSED total elapsed from request to completion, us
# DISKTIME total time for disk to complete request, us
# %I/O percent disk I/O, based on time (DISKTIME)
# load 1 min load average
# disk_r total disk read Kbytes for sample
# disk_w total disk write Kbytes for sample
#
# NOTE:
# * There are two different delta times reported. -D prints the
# elapsed time from the disk request (strategy) to the disk completion
# (iodone); -o prints the time for the disk to complete that event
# since it's last event (time between iodones), or, the time to the
# strategy if the disk had been idle.
# * The %I/O value can exceed 100%. It represents how busy a process is
# making the disks, in terms of a single disk. A value of 200% could
# mean 2 disks are busy at 100%, or 4 disks at 50%...
#
# SEE ALSO: iosnoop
# Solaris Dynamic Tracing Guide, http://docs.oracle.com
# DTrace Tools, http://www.brendangregg.com/dtrace.html
#
# INSPIRATION: top(1) by William LeFebvre
#
# COPYRIGHT: Copyright (c) 2005, 2006 Brendan Gregg.
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License, Version 1.0 only
# (the "License"). You may not use this file except in compliance
# with the License.
#
# You can obtain a copy of the license at Docs/cddl1.txt
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# CDDL HEADER END
#
# KNOWN BUGS:
# - This can print errors while running on servers with Veritas volumes.
#
# Author: Brendan Gregg [Sydney, Australia]
#
# 15-Jul-2005 Brendan Gregg Created this.
# 20-Apr-2006 " " Last update.
#
##############################
# --- Process Arguments ---
#
### default variables
opt_device=0; opt_file=0; opt_mount=0; opt_clear=1; opt_proj=0; opt_zone=0
opt_percent=0; opt_def=1; opt_bytes=1; filter=0; device=.; filename=.; mount=.
opt_top=0; opt_elapsed=0; opt_dtime=0; interval=5; count=-1; top=0
### process options
while getopts CDd:f:hjm:oPt:Z name
do
case $name in
C) opt_clear=0 ;;
D) opt_elapsed=1; opt_bytes=0 ;;
d) opt_device=1; device=$OPTARG ;;
f) opt_file=1; filename=$OPTARG ;;
j) opt_proj=1; opt_def=0 ;;
m) opt_mount=1; mount=$OPTARG ;;
o) opt_dtime=1; opt_bytes=0 ;;
P) opt_percent=1; opt_dtime=1; opt_bytes=0 ;;
t) opt_top=1; top=$OPTARG ;;
Z) opt_zone=1; opt_def=0 ;;
h|?) cat <<-END >&2
USAGE: iotop [-C] [-D|-o|-P] [-j|-Z] [-d device] [-f filename]
[-m mount_point] [-t top] [interval [count]]
-C # don't clear the screen
-D # print delta times, elapsed, us
-j # print project ID
-o # print disk delta times, us
-P # print %I/O (disk delta times)
-Z # print zone ID
-d device # instance name to snoop
-f filename # snoop this file only
-m mount_point # this FS only
-t top # print top number only
eg,
iotop # default output, 5 second samples
iotop 1 # 1 second samples
iotop -P # print %I/O (time based)
iotop -m / # snoop events on filesystem / only
iotop -t 20 # print top 20 lines only
iotop -C 5 12 # print 12 x 5 second samples
END
exit 1
esac
done
shift $(( $OPTIND - 1 ))
### option logic
if [[ "$1" > 0 ]]; then
interval=$1; shift
fi
if [[ "$1" > 0 ]]; then
count=$1; shift
fi
if (( opt_proj && opt_zone )); then
opt_proj=0
fi
if (( opt_elapsed && opt_dtime )); then
opt_elapsed=0
fi
if (( opt_device || opt_mount || opt_file )); then
filter=1
fi
if (( opt_clear )); then
clearstr=`clear`
else
clearstr=.
fi
#################################
# --- Main Program, DTrace ---
#
/usr/sbin/dtrace -n '
/*
* Command line arguments
*/
inline int OPT_def = '$opt_def';
inline int OPT_proj = '$opt_proj';
inline int OPT_zone = '$opt_zone';
inline int OPT_clear = '$opt_clear';
inline int OPT_bytes = '$opt_bytes';
inline int OPT_elapsed = '$opt_elapsed';
inline int OPT_dtime = '$opt_dtime';
inline int OPT_percent = '$opt_percent';
inline int OPT_device = '$opt_device';
inline int OPT_mount = '$opt_mount';
inline int OPT_file = '$opt_file';
inline int OPT_top = '$opt_top';
inline int INTERVAL = '$interval';
inline int COUNTER = '$count';
inline int FILTER = '$filter';
inline int TOP = '$top';
inline string DEVICE = "'$device'";
inline string FILENAME = "'$filename'";
inline string MOUNT = "'$mount'";
inline string CLEAR = "'$clearstr'";
#pragma D option quiet
/* boost the following if you get "dynamic variable drops" */
#pragma D option dynvarsize=8m
/*
* Print header
*/
dtrace:::BEGIN
{
last_event[""] = 0;
/* starting values */
counts = COUNTER;
secs = INTERVAL;
disk_r = 0;
disk_w = 0;
printf("Tracing... Please wait.\n");
}
/*
* Check event is being traced
*/
io:genunix::start,
io:genunix::done
{
/* default is to trace unless filtering, */
this->ok = FILTER ? 0 : 1;
/* check each filter, */
(OPT_device == 1 && DEVICE == args[1]->dev_statname)? this->ok = 1 : 1;
(OPT_file == 1 && FILENAME == args[2]->fi_pathname) ? this->ok = 1 : 1;
(OPT_mount == 1 && MOUNT == args[2]->fi_mount) ? this->ok = 1 : 1;
}
/*
* Reset last_event for disk idle -> start
* this prevents idle time being counted as disk time.
*/
io:genunix::start
/! pending[args[1]->dev_statname]/
{
/* save last disk event */
last_event[args[1]->dev_statname] = timestamp;
}
/*
* Store entry details
*/
io:genunix::start
/this->ok/
{
/* these are used as a unique disk event key, */
this->dev = args[0]->b_edev;
this->blk = args[0]->b_blkno;
/* save disk event details, */
start_uid[this->dev, this->blk] = uid;
start_pid[this->dev, this->blk] = pid;
start_ppid[this->dev, this->blk] = ppid;
start_comm[this->dev, this->blk] = execname;
start_time[this->dev, this->blk] = timestamp;
start_proj[this->dev, this->blk] = curpsinfo->pr_projid;
start_zone[this->dev, this->blk] = curpsinfo->pr_zoneid;
start_rw[this->dev, this->blk] = args[0]->b_flags & B_READ ? "R" : "W";
disk_r += args[0]->b_flags & B_READ ? args[0]->b_bcount : 0;
disk_w += args[0]->b_flags & B_READ ? 0 : args[0]->b_bcount;
/* increase disk event pending count */
pending[args[1]->dev_statname]++;
}
/*
* Process and Print completion
*/
io:genunix::done
/this->ok/
{
/* decrease disk event pending count */
pending[args[1]->dev_statname]--;
/*
* Process details
*/
/* fetch entry values */
this->dev = args[0]->b_edev;
this->blk = args[0]->b_blkno;
this->suid = start_uid[this->dev, this->blk];
this->spid = start_pid[this->dev, this->blk];
this->sppid = start_ppid[this->dev, this->blk];
this->sproj = start_proj[this->dev, this->blk];
this->szone = start_zone[this->dev, this->blk];
self->scomm = start_comm[this->dev, this->blk];
this->stime = start_time[this->dev, this->blk];
this->etime = timestamp; /* endtime */
this->elapsed = this->etime - this->stime;
self->rw = start_rw[this->dev, this->blk];
this->dtime = last_event[args[1]->dev_statname] == 0 ? 0 :
timestamp - last_event[args[1]->dev_statname];
/* memory cleanup */
start_uid[this->dev, this->blk] = 0;
start_pid[this->dev, this->blk] = 0;
start_ppid[this->dev, this->blk] = 0;
start_time[this->dev, this->blk] = 0;
start_comm[this->dev, this->blk] = 0;
start_zone[this->dev, this->blk] = 0;
start_proj[this->dev, this->blk] = 0;
start_rw[this->dev, this->blk] = 0;
/*
* Choose statistic to track
*/
OPT_bytes ? this->value = args[0]->b_bcount : 1;
OPT_elapsed ? this->value = this->elapsed / 1000 : 1;
OPT_dtime ? this->value = this->dtime / 1000 : 1;
/*
* Save details
*/
OPT_def ? @out[this->suid, this->spid, this->sppid, self->scomm,
args[1]->dev_statname, args[1]->dev_major, args[1]->dev_minor,
self->rw] = sum(this->value) : 1;
OPT_proj ? @out[this->sproj, this->spid, this->sppid, self->scomm,
args[1]->dev_statname, args[1]->dev_major, args[1]->dev_minor,
self->rw] = sum(this->value) : 1;
OPT_zone ? @out[this->szone, this->spid, this->sppid, self->scomm,
args[1]->dev_statname, args[1]->dev_major, args[1]->dev_minor,
self->rw] = sum(this->value) : 1;
/* save last disk event */
last_event[args[1]->dev_statname] = timestamp;
self->scomm = 0;
self->rw = 0;
}
/*
* Prevent pending from underflowing
* this can happen if this program is started during disk events.
*/
io:genunix::done
/pending[args[1]->dev_statname] < 0/
{
pending[args[1]->dev_statname] = 0;
}
/*
* Timer
*/
profile:::tick-1sec
{
secs--;
}
/*
* Print Report
*/
profile:::tick-1sec
/secs == 0/
{
/* fetch 1 min load average */
this->load1a = `hp_avenrun[0] / 65536;
this->load1b = ((`hp_avenrun[0] % 65536) * 100) / 65536;
/* convert counters to Kbytes */
disk_r /= 1024;
disk_w /= 1024;
/* print status */
OPT_clear ? printf("%s", CLEAR) : 1;
printf("%Y, load: %d.%02d, disk_r: %6d KB, disk_w: %6d KB\n\n",
walltimestamp, this->load1a, this->load1b, disk_r, disk_w);
/* print headers */
OPT_def ? printf(" UID ") : 1;
OPT_proj ? printf(" PROJ ") : 1;
OPT_zone ? printf(" ZONE ") : 1;
printf("%6s %6s %-16s %-7s %3s %3s %1s",
"PID", "PPID", "CMD", "DEVICE", "MAJ", "MIN", "D");
OPT_bytes ? printf(" %16s\n", "BYTES") : 1;
OPT_elapsed ? printf(" %16s\n", "ELAPSED") : 1;
OPT_dtime && ! OPT_percent ? printf(" %16s\n", "DISKTIME") : 1;
OPT_dtime && OPT_percent ? printf(" %6s\n", "%I/O") : 1;
/* truncate to top lines if needed */
OPT_top ? trunc(@out, TOP) : 1;
/* normalise to percentage if needed */
OPT_percent ? normalize(@out, INTERVAL * 10000) : 1;
/* print data */
! OPT_percent ?
printa("%5d %6d %6d %-16s %-7s %3d %3d %1s %16@d\n", @out) :
printa("%5d %6d %6d %-16s %-7s %3d %3d %1s %6@d\n", @out);
printf("\n");
/* clear data */
trunc(@out);
disk_r = 0;
disk_w = 0;
secs = INTERVAL;
counts--;
}
/*
* End of program
*/
profile:::tick-1sec
/counts == 0/
{
exit(0);
}
/*
* Cleanup for Ctrl-C
*/
dtrace:::END
{
trunc(@out);
}
'