1N/A#!/usr/bin/ksh
1N/A#
1N/A# dexplorer - DTrace system explorer, runs a collection of scripts.
1N/A# Written using DTrace (Solaris 10 3/05).
1N/A#
1N/A# This program automatically runs a collection of DTrace scripts to examine
1N/A# many areas of the system, and places the output in a meaningful directory
1N/A# structure that is tar'd and gzip'd.
1N/A#
1N/A# $Id: dexplorer 3 2007-08-01 10:50:08Z brendan $
1N/A#
1N/A# USAGE: dexplorer [-yDT] [-d outputdir] [-i interval]
1N/A#
1N/A# -q # quiet mode
1N/A# -y # "yes", don't prompt for confirmation
1N/A# -D # don't delete output dir
1N/A# -T # don't create output tar.gz
1N/A# -d outputdir # output directory
1N/A# -i interval # interval for each sample
1N/A# eg,
1N/A# dexplorer # default is 5 second samples
1N/A# dexplorer -y -i30 # no prompting, with 30 second samples
1N/A#
1N/A# SEE ALSO: DTraceToolkit
1N/A#
1N/A# THANKS: David Visser, et all. for the idea and encouragement.
1N/A#
1N/A# COPYRIGHT: Copyright (c) 2005 Brendan Gregg.
1N/A#
1N/A# CDDL HEADER START
1N/A#
1N/A# The contents of this file are subject to the terms of the
1N/A# Common Development and Distribution License, Version 1.0 only
1N/A# (the "License"). You may not use this file except in compliance
1N/A# with the License.
1N/A#
1N/A# You can obtain a copy of the license at Docs/cddl1.txt
1N/A# or http://www.opensolaris.org/os/licensing.
1N/A# See the License for the specific language governing permissions
1N/A# and limitations under the License.
1N/A#
1N/A# CDDL HEADER END
1N/A#
1N/A# CODE:
1N/A#
1N/A# This is currently a monolithic script, and while it contains only
1N/A# a few dozen straigftforward DTrace scripts I think it's desirable to
1N/A# keep it that way. The scripts themselves have designed to be very
1N/A# generic (eg, switching on all sdt:::), and are aggregations to keep a
1N/A# limit on the size of the output.
1N/A#
1N/A# Author: Brendan Gregg [Sydney, Australia]
1N/A#
1N/A# 23-Jun-2005 Brendan Gregg Created this.
1N/A# 28-Jun-2005 " " Last update.
1N/A
1N/A#
1N/A# Default variables
1N/A#
1N/Ainterval=5 # time of each sample
1N/Averbose=1 # print screen output
1N/Aprompt=1 # prompt before run
1N/Atar=1 # create tar file
1N/Adelete=1 # delete output dirs
1N/Adtrace=/usr/sbin/dtrace # path to dtrace
1N/Aroot=. # default output dir
1N/APATH=/usr/bin:/usr/sbin # safe path
1N/Adir=de_`uname -n`_`date +%Y%m%d%H%M` # OUTPUT FILENAME
1N/Asamples=20 # max number of tests
1N/Acurrent=0 # current sample
1N/A
1N/A#
1N/A# Process options
1N/A#
1N/Awhile getopts d:hi:qyDT name
1N/Ado
1N/A case $name in
1N/A d) root=$OPTARG ;;
1N/A i) interval=$OPTARG ;;
1N/A q) verbose=0 ;;
1N/A y) prompt=0 ;;
1N/A D) delete=0 ;;
1N/A T) tar=0 ;;
1N/A h|?) cat <<-END >&2
1N/A USAGE: dexplorer [-qyDT] [-d outputdir] [-i interval]
1N/A
1N/A -q # quiet mode
1N/A -y # "yes", don't prompt for confirmation
1N/A -D # don't delete output dir
1N/A -T # don't create output tar.gz
1N/A -d outputdir # output directory
1N/A -i interval # interval for each sample
1N/A eg,
1N/A dexplorer # default is 5 second samples
1N/A dexplorer -y -i30 # no prompting, with 30 second samples
1N/A END
1N/A exit 1
1N/A esac
1N/Adone
1N/Ashift $(( OPTIND - 1 ))
1N/A
1N/A#
1N/A# Confirm path
1N/A#
1N/Aif [[ "$prompt" == "1" ]] ; then
1N/A if [[ "$root" == "." ]]; then
1N/A print "Output dir will be the current dir ($PWD)."
1N/A else
1N/A print "Output dir will be $root"
1N/A fi
1N/A print -n "Hit enter for yes, or type path: "
1N/A read ans junk
1N/A if [[ "$ans" == [yY] || "$ans" == [yY]es ]]; then
1N/A print "WARNING: I didn't ask for \"$ans\"!"
1N/A print "\tI was asking for the path or just enter."
1N/A print "\tignoring \"$ans\"..."
1N/A fi
1N/A if [[ "$ans" != "" ]]; then
1N/A root=$ans
1N/A print "Output is now $root."
1N/A fi
1N/Afi
1N/A
1N/A#
1N/A# Sanity checks
1N/A#
1N/Aif [[ "$interval" == *[a-zA-Z]* ]]; then
1N/A print "ERROR2: Invalid interval $interval.\n"
1N/A print "Please use a number of seconds."
1N/A exit 2
1N/Afi
1N/Aif (( ${#interval} < 1 )); then
1N/A print "ERROR3: Length of interval $interval too short.\n"
1N/A print "Minimum 1 second."
1N/A exit 3
1N/Afi
1N/Aif [[ ! -d "$root" ]]; then
1N/A print "ERROR4: Output directory \"$root\" does not exist.\n"
1N/A print "Perhaps try a mkdir first?"
1N/A print "or use an existing dir, eg \"/tmp\""
1N/A exit 4
1N/Afi
1N/Aif [[ ! -w "$root" ]]; then
1N/A print "ERROR5: Can't write to output directory \"$root\".\n"
1N/A print "Are you logged in as root?"
1N/A print "Perhaps try another directory, eg \"/tmp\""
1N/A exit 5
1N/Afi
1N/Aif [[ `$dtrace -b1k -qn 'BEGIN { trace(pid); exit(0); }'` == "" ]]; then
1N/A print "ERROR6: Unable to run dtrace!\n"
1N/A print "Perhaps this is a permission problem? Try running as root."
1N/A exit 6
1N/Afi
1N/A
1N/A# calculate total time
1N/A(( total = interval * samples ))
1N/Aif (( total > 180 )); then
1N/A (( total = total / 60 ))
1N/A total="$total minutes"
1N/Aelse
1N/A total="$total seconds"
1N/Afi
1N/A
1N/A#
1N/A# Common Functions
1N/A#
1N/Afunction decho {
1N/A if (( verbose )); then print "$*"; fi
1N/A}
1N/Aclean="sed /^\$/d"
1N/Aheader='dtrace:::BEGIN {
1N/A printf("%Y, ", walltimestamp);
1N/A printf("%s %s %s %s %s, ", `utsname.sysname, `utsname.nodename,
1N/A `utsname.release, `utsname.version, `utsname.machine);
1N/A printf("%d secs\n",'$interval');
1N/A }
1N/A profile:::tick-'$interval'sec { exit(0); }
1N/A '
1N/Afunction dstatus {
1N/A if (( verbose )); then
1N/A (( percent = current * 100 / samples ))
1N/A printf "%3d%% $*\n" $percent
1N/A (( current = current + 1 ))
1N/A fi
1N/A}
1N/A
1N/A########################################
1N/A# START #
1N/A########################################
1N/A
1N/A#
1N/A# Make dirs
1N/A#
1N/Aerr=0
1N/Acd $root
1N/A(( err = err + $? ))
1N/Amkdir $dir
1N/A(( err = err + $? ))
1N/Acd $dir
1N/A(( err = err + $? ))
1N/Abase1=${PWD##*/}
1N/Abase2=${dir##*/}
1N/Aif [[ "$base1" != "$base2" || "$err" != "0" ]]; then
1N/A print "ERROR7: tried to mkdir $dir from $root, but something failed.\n"
1N/A print "Check directories before rerunning."
1N/A exit 7
1N/Afi
1N/Amkdir Cpu
1N/Amkdir Disk
1N/Amkdir Mem
1N/Amkdir Net
1N/Amkdir Proc
1N/Amkdir Info
1N/A
1N/A#
1N/A# Create Log
1N/A#
1N/Adecho "Starting dexplorer ver 0.76."
1N/Adecho "Sample interval is $interval seconds. Total run is > $total."
1N/A( print "dexplorer ver 0.76\n------------------"
1N/Aprint -n "System: "
1N/Auname -a
1N/Aprint -n "Start: "
1N/Adate ) > log
1N/A
1N/A#
1N/A# Capture Standard Info
1N/A#
1N/Aargs='pid,ppid,uid,gid,projid,zoneid,pset,pri,nice,'
1N/Aargs=$args'class,vsz,rss,time,pcpu,pmem,args'
1N/Auname -a > Info/uname-a # System
1N/Apsrinfo -v > Info/psrinfo-v # CPU
1N/Aprtconf > Info/prtconf # Memory (+ devices)
1N/Adf -k > Info/df-k # Disk
1N/Aifconfig -a > Info/ifconfig-a # Network
1N/Aps -eo $args > Info/ps-o # Processes
1N/Auptime > Info/uptime # Load
1N/A
1N/A#
1N/A# Cpu Tests, DTrace
1N/A#
1N/A
1N/Adstatus "Interrupts by CPU..."
1N/A$dtrace -qn "$header"'
1N/A sdt:::interrupt-start { @num[cpu] = count(); }
1N/A dtrace:::END
1N/A {
1N/A printf("%-16s %16s\n", "CPU", "INTERRUPTS");
1N/A printa("%-16d %@16d\n", @num);
1N/A }
1N/A' | $clean > Cpu/interrupt_by_cpu
1N/A
1N/Adstatus "Interrupt times..."
1N/A$dtrace -qn "$header"'
1N/A sdt:::interrupt-start { self->ts = vtimestamp; }
1N/A sdt:::interrupt-complete
1N/A /self->ts && arg0 != 0/
1N/A {
1N/A this->devi = (struct dev_info *)arg0;
1N/A self->name = this->devi != 0 ?
1N/A stringof(`devnamesp[this->devi->devi_major].dn_name) : "?";
1N/A this->inst = this->devi != 0 ? this->devi->devi_instance : 0;
1N/A @num[self->name, this->inst] = sum(vtimestamp - self->ts);
1N/A self->name = 0;
1N/A }
1N/A sdt:::interrupt-complete { self->ts = 0; }
1N/A dtrace:::END
1N/A {
1N/A printf("%11s %16s\n", "DEVICE", "TIME (ns)");
1N/A printa("%10s%-3d %@16d\n", @num);
1N/A }
1N/A' | $clean > Cpu/interrupt_time
1N/A
1N/Adstatus "Dispatcher queue length by CPU..."
1N/A$dtrace -qn "$header"'
1N/A profile:::profile-1000
1N/A {
1N/A this->num = curthread->t_cpu->cpu_disp->disp_nrunnable;
1N/A @length[cpu] = lquantize(this->num, 0, 100, 1);
1N/A }
1N/A dtrace:::END { printa(" CPU %d%@d\n", @length); }
1N/A' | $clean > Cpu/dispqlen_by_cpu
1N/A
1N/Adstatus "Sdt counts..."
1N/A$dtrace -qn "$header"'
1N/A sdt:::{ @num[probefunc, probename] = count(); }
1N/A dtrace:::END
1N/A {
1N/A printf("%-32s %-32s %10s\n", "FUNC", "NAME", "COUNT");
1N/A printa("%-32s %-32s %@10d\n", @num);
1N/A }
1N/A' | $clean > Cpu/sdt_count
1N/A
1N/A#
1N/A# Disk Tests, DTrace
1N/A#
1N/A
1N/Adstatus "Pages paged in by process..."
1N/A$dtrace -qn "$header"'
1N/A vminfo:::pgpgin { @pg[pid, execname] = sum(arg0); }
1N/A dtrace:::END
1N/A {
1N/A printf("%6s %-16s %16s\n", "PID", "CMD", "PAGES");
1N/A printa("%6d %-16s %@16d\n", @pg);
1N/A }
1N/A' | $clean > Disk/pgpgin_by_process
1N/A
1N/Adstatus "Files opened successfully count..."
1N/A$dtrace -qn "$header"'
1N/A syscall::openat*:entry { self->file = copyinstr(arg1); self->ok = 1; }
1N/A syscall::openat*:return /self->ok && arg0 != -1/
1N/A {
1N/A @num[self->file] = count();
1N/A }
1N/A syscall::openat*:return /self->ok/ { self->file = 0; self->ok = 0; }
1N/A dtrace:::END
1N/A {
1N/A printf("%-64s %8s\n", "FILE", "COUNT");
1N/A printa("%-64s %@8d\n", @num);
1N/A }
1N/A' | $clean > Disk/fileopen_count
1N/A
1N/Adstatus "Disk I/O size distribution by process..."
1N/A$dtrace -qn "$header"'
1N/A io:::start { @size[pid, execname] = quantize(args[0]->b_bcount); }
1N/A' | $clean > Disk/sizedist_by_process
1N/A
1N/A#
1N/A# Mem Tests, DTrace
1N/A#
1N/A
1N/Adstatus "Minor faults by process..."
1N/A$dtrace -qn "$header"'
1N/A vminfo:::as_fault { @mem[pid, execname] = sum(arg0); }
1N/A dtrace:::END
1N/A {
1N/A printf("%6s %-16s %16s\n", "PID", "CMD", "MINFAULTS");
1N/A printa("%6d %-16s %@16d\n", @mem);
1N/A }
1N/A' | $clean > Mem/minf_by_process
1N/A
1N/A
1N/Adstatus "Vminfo data by process..."
1N/A$dtrace -qn "$header"'
1N/A vminfo::: { @data[pid, execname, probename] = sum(arg0); }
1N/A dtrace:::END
1N/A {
1N/A printf("%6s %-16s %-16s %16s\n",
1N/A "PID", "CMD", "STATISTIC", "VALUE");
1N/A printa("%6d %-16s %-16s %@16d\n", @data);
1N/A }
1N/A' | $clean > Mem/vminfo_by_process
1N/A
1N/A#
1N/A# Net Tests, DTrace
1N/A#
1N/A
1N/Adstatus "Mib data by mib statistic..."
1N/A$dtrace -qn "$header"'
1N/A mib::: { @data[probename] = sum(arg0); }
1N/A dtrace:::END
1N/A {
1N/A printf("%-32s %16s\n", "STATISTIC", "VALUE");
1N/A printa("%-32s %@16d\n", @data);
1N/A }
1N/A' | $clean > Net/mib_data
1N/A
1N/Adstatus "TCP write bytes by process..."
1N/A$dtrace -qn "$header"'
1N/A fbt:ip:tcp_output:entry
1N/A {
1N/A this->size = msgdsize(args[1]);
1N/A @size[pid, execname] = sum(this->size);
1N/A }
1N/A dtrace:::END
1N/A {
1N/A printf("%6s %-16s %12s\n", "PID", "CMD", "BYTES");
1N/A printa("%6d %-16s %@12d\n", @size);
1N/A }
1N/A' | $clean > Net/tcpw_by_process
1N/A
1N/A#
1N/A# Proc Tests, DTrace
1N/A#
1N/A
1N/Adstatus "Sample process @ 1000 Hz..."
1N/A$dtrace -qn "$header"'
1N/A profile:::profile-1000
1N/A {
1N/A @num[pid, curpsinfo->pr_psargs] = count();
1N/A }
1N/A dtrace:::END
1N/A {
1N/A printf("%6s %12s %s\n", "PID", "SAMPLES", "ARGS");
1N/A printa("%6d %@12d %S\n", @num);
1N/A }
1N/A' | $clean > Proc/sample_process
1N/A
1N/Adstatus "Syscall count by process..."
1N/A$dtrace -qn "$header"'
1N/A syscall:::entry { @num[pid, execname, probefunc] = count(); }
1N/A dtrace:::END
1N/A {
1N/A printf("%6s %-24s %-24s %8s\n",
1N/A "PID", "CMD", "SYSCALL", "COUNT");
1N/A printa("%6d %-24s %-24s %@8d\n", @num);
1N/A }
1N/A' | $clean > Proc/syscall_by_process
1N/A
1N/Adstatus "Syscall count by syscall..."
1N/A$dtrace -qn "$header"'
1N/A syscall:::entry { @num[probefunc] = count(); }
1N/A dtrace:::END
1N/A {
1N/A printf("%-32s %16s\n", "SYSCALL", "COUNT");
1N/A printa("%-32s %@16d\n", @num);
1N/A }
1N/A' | $clean > Proc/syscall_count
1N/A
1N/Adstatus "Read bytes by process..."
1N/A$dtrace -qn "$header"'
1N/A sysinfo:::readch { @bytes[pid, execname] = sum(arg0); }
1N/A dtrace:::END
1N/A {
1N/A printf("%6s %-16s %16s\n", "PID", "CMD", "BYTES");
1N/A printa("%6d %-16s %@16d\n", @bytes);
1N/A }
1N/A' | $clean > Proc/readb_by_process
1N/A
1N/Adstatus "Write bytes by process..."
1N/A$dtrace -qn "$header"'
1N/A sysinfo:::writech { @bytes[pid, execname] = sum(arg0); }
1N/A dtrace:::END
1N/A {
1N/A printf("%6s %-16s %16s\n", "PID", "CMD", "BYTES");
1N/A printa("%6d %-16s %@16d\n", @bytes);
1N/A }
1N/A' | $clean > Proc/writeb_by_process
1N/A
1N/Adstatus "Sysinfo counts by process..."
1N/A$dtrace -qn "$header"'
1N/A sysinfo::: { @num[pid, execname, probename] = sum(arg0); }
1N/A dtrace:::END
1N/A {
1N/A printf("%6s %-16s %-16s %16s\n",
1N/A "PID", "CMD", "STATISTIC", "COUNT");
1N/A printa("%6d %-16s %-16s %@16d\n", @num);
1N/A }
1N/A' | $clean > Proc/sysinfo_by_process
1N/A
1N/Adstatus "New process counts with arguments..."
1N/A$dtrace -qn "$header"'
1N/A proc:::exec-success
1N/A {
1N/A @num[pid, ppid, curpsinfo->pr_psargs] = count();
1N/A }
1N/A dtrace:::END
1N/A {
1N/A printf("%6s %6s %8s %s\n", "PID", "PPID", "COUNT", "ARGS");
1N/A printa("%6d %6d %@8d %S\n", @num);
1N/A }
1N/A' | $clean > Proc/newprocess_count
1N/A
1N/Adstatus "Signal counts..."
1N/A$dtrace -qn "$header"'
1N/A proc:::signal-send {
1N/A @num[execname,args[2],stringof(args[1]->pr_fname)] = count();
1N/A }
1N/A dtrace:::END
1N/A {
1N/A printf("%-16s %-8s %-16s %8s\n",
1N/A "FROM", "SIG", "TO", "COUNT");
1N/A printa("%-16s %-8d %-16s %@8d\n", @num);
1N/A }
1N/A' | $clean > Proc/signal_count
1N/A
1N/Adstatus "Syscall error counts..."
1N/A$dtrace -qn "$header"'
1N/A syscall:::return /(int)arg0 == -1/
1N/A {
1N/A @num[pid, execname, probefunc, errno] = count();
1N/A }
1N/A dtrace:::END
1N/A {
1N/A printf("%6s %-16s %-32s %-6s %8s\n",
1N/A "PID", "CMD", "SYSCALL", "ERRNO", "COUNT");
1N/A printa("%6d %-16s %-32s %-6d %@8d\n", @num);
1N/A }
1N/A' | $clean > Proc/syscall_errors
1N/A
1N/A
1N/A###########
1N/A# Done
1N/A#
1N/A( print -n "End: "
1N/Adate ) >> log
1N/Adecho "100% Done."
1N/Aif (( tar )); then
1N/A cd ..
1N/A tar cf $dir.tar $dir
1N/A gzip $dir.tar
1N/A decho "File is $dir.tar.gz"
1N/Afi
1N/Aif (( delete && tar )); then
1N/A cd $dir
1N/A # this could be all an "rm -r $dir", but since it will be run
1N/A # as root on production servers - lets be analy cautious,
1N/A rm Cpu/interrupt_by_cpu
1N/A rm Cpu/interrupt_time
1N/A rm Cpu/dispqlen_by_cpu
1N/A rm Cpu/sdt_count
1N/A rm Disk/pgpgin_by_process
1N/A rm Disk/fileopen_count
1N/A rm Disk/sizedist_by_process
1N/A rm Mem/minf_by_process
1N/A rm Mem/vminfo_by_process
1N/A rm Net/mib_data
1N/A rm Net/tcpw_by_process
1N/A rm Proc/sample_process
1N/A rm Proc/syscall_by_process
1N/A rm Proc/syscall_count
1N/A rm Proc/readb_by_process
1N/A rm Proc/writeb_by_process
1N/A rm Proc/sysinfo_by_process
1N/A rm Proc/newprocess_count
1N/A rm Proc/signal_count
1N/A rm Proc/syscall_errors
1N/A rmdir Cpu
1N/A rmdir Disk
1N/A rmdir Mem
1N/A rmdir Net
1N/A rmdir Proc
1N/A rm Info/uname-a
1N/A rm Info/psrinfo-v
1N/A rm Info/prtconf
1N/A rm Info/df-k
1N/A rm Info/ifconfig-a
1N/A rm Info/ps-o
1N/A rm Info/uptime
1N/A rmdir Info
1N/A rm log
1N/A cd ..
1N/A rmdir $dir
1N/Aelse
1N/A decho "Directory is $dir"
1N/Afi
1N/A