0N/A/*
0N/A * CDDL HEADER START
0N/A *
0N/A * The contents of this file are subject to the terms
0N/A * of the Common Development and Distribution License
0N/A * (the "License"). You may not use this file except
0N/A * in compliance with the License.
0N/A *
0N/A * You can obtain a copy of the license at
0N/A * src/OPENSOLARIS.LICENSE
0N/A * or http://www.opensolaris.org/os/licensing.
0N/A * See the License for the specific language governing
0N/A * permissions and limitations under the License.
0N/A *
0N/A * When distributing Covered Code, include this CDDL
0N/A * HEADER in each file and include the License file at
0N/A * usr/src/OPENSOLARIS.LICENSE. If applicable,
0N/A * add the following below this CDDL HEADER, with the
0N/A * fields enclosed by brackets "[]" replaced with your
0N/A * own identifying information: Portions Copyright [yyyy]
0N/A * [name of copyright owner]
0N/A *
0N/A * CDDL HEADER END
0N/A */
0N/A
0N/A/*
9N/A * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
0N/A * Use is subject to license terms.
0N/A */
0N/A
0N/A/*
0N/A * benchmarking routines
0N/A */
0N/A
0N/A#include <sys/types.h>
0N/A#include <sys/time.h>
0N/A#include <sys/ipc.h>
0N/A#include <sys/sem.h>
0N/A#include <sys/mman.h>
0N/A#include <sys/wait.h>
0N/A#include <ctype.h>
0N/A#include <string.h>
0N/A#include <strings.h>
0N/A#include <signal.h>
0N/A#include <stdio.h>
0N/A#include <unistd.h>
0N/A#include <stdlib.h>
0N/A#include <poll.h>
0N/A#include <pthread.h>
0N/A#include <dlfcn.h>
0N/A#include <errno.h>
0N/A#include <sys/resource.h>
0N/A#include <math.h>
0N/A#include <limits.h>
0N/A
0N/A#ifdef __sun
0N/A#include <sys/elf.h>
0N/A#endif
0N/A
0N/A#include "libmicro.h"
0N/A
0N/A
0N/A/*
0N/A * user visible globals
0N/A */
0N/A
0N/Aint lm_argc = 0;
0N/Achar ** lm_argv = NULL;
0N/A
0N/Aint lm_opt1;
0N/Aint lm_optA;
0N/Aint lm_optB;
0N/Aint lm_optC = 100;
0N/Aint lm_optD;
0N/Aint lm_optE;
0N/Aint lm_optH;
0N/Aint lm_optI;
0N/Aint lm_optL = 0;
0N/Aint lm_optM = 0;
0N/Achar *lm_optN;
0N/Aint lm_optP;
0N/Aint lm_optS;
0N/Aint lm_optT;
0N/Aint lm_optW;
0N/A
0N/Aint lm_def1 = 0;
0N/Aint lm_defB = 0; /* use lm_nsecs_per_op */
0N/Aint lm_defD = 10;
0N/Aint lm_defH = 0;
0N/Achar *lm_defN = NULL;
0N/Aint lm_defP = 1;
0N/A
0N/Aint lm_defS = 0;
0N/Aint lm_defT = 1;
0N/A
0N/A/*
9N/A * default on fast platform, should be overridden by individual
0N/A * benchmarks if significantly wrong in either direction.
0N/A */
0N/A
0N/Aint lm_nsecs_per_op = 5;
0N/A
0N/Achar *lm_procpath;
0N/Achar lm_procname[STRSIZE];
0N/Achar lm_usage[STRSIZE];
0N/Achar lm_optstr[STRSIZE];
0N/Achar lm_header[STRSIZE];
0N/Asize_t lm_tsdsize = 0;
0N/A
0N/A
0N/A/*
0N/A * Globals we do not export to the user
0N/A */
0N/A
0N/Astatic barrier_t *lm_barrier;
0N/Astatic pid_t *pids = NULL;
0N/Astatic pthread_t *tids = NULL;
0N/Astatic int pindex = -1;
0N/Astatic void *tsdseg = NULL;
0N/Astatic size_t tsdsize = 0;
0N/A
0N/A#ifdef USE_RDTSC
0N/Astatic long long lm_hz = 0;
0N/A#endif
0N/A
0N/A
0N/A/*
0N/A * Forward references
0N/A */
0N/A
0N/Astatic void worker_process();
0N/Astatic void usage();
0N/Astatic void print_stats(barrier_t *);
0N/Astatic void print_histo(barrier_t *);
9N/Astatic int remove_outliers(double *, int, stats_t *);
0N/Astatic long long nsecs_overhead;
0N/Astatic long long nsecs_resolution;
0N/Astatic long long get_nsecs_overhead();
0N/Astatic int crunch_stats(double *, int, stats_t *);
0N/Astatic void compute_stats(barrier_t *);
0N/A/*
0N/A * main routine; renamed in this file to allow linking with other
0N/A * files
0N/A */
0N/A
0N/Aint
0N/Aactual_main(int argc, char *argv[])
0N/A{
0N/A int i;
0N/A int opt;
0N/A extern char *optarg;
0N/A char *tmp;
0N/A char optstr[256];
0N/A barrier_t *b;
10N/A long long startnsecs;
0N/A
0N/A#ifdef USE_RDTSC
0N/A if (getenv("LIBMICRO_HZ") == NULL) {
0N/A (void) printf("LIBMICRO_HZ needed but not set\n");
0N/A exit(1);
0N/A }
0N/A lm_hz = strtoll(getenv("LIBMICRO_HZ"), NULL, 10);
0N/A#endif
0N/A
10N/A startnsecs = getnsecs();
10N/A
0N/A lm_argc = argc;
0N/A lm_argv = argv;
0N/A
0N/A /* before we do anything */
0N/A (void) benchmark_init();
0N/A
0N/A
0N/A nsecs_overhead = get_nsecs_overhead();
0N/A nsecs_resolution = get_nsecs_resolution();
0N/A
0N/A /*
0N/A * Set defaults
0N/A */
0N/A
0N/A lm_opt1 = lm_def1;
0N/A lm_optB = lm_defB;
0N/A lm_optD = lm_defD;
0N/A lm_optH = lm_defH;
0N/A lm_optN = lm_defN;
0N/A lm_optP = lm_defP;
0N/A
0N/A lm_optS = lm_defS;
0N/A lm_optT = lm_defT;
0N/A
0N/A /*
0N/A * squirrel away the path to the current
0N/A * binary in a way that works on both
0N/A * Linux and Solaris
0N/A */
0N/A
0N/A if (*argv[0] == '/') {
0N/A lm_procpath = strdup(argv[0]);
0N/A *strrchr(lm_procpath, '/') = 0;
0N/A } else {
0N/A char path[1024];
0N/A (void) getcwd(path, 1024);
0N/A (void) strcat(path, "/");
0N/A (void) strcat(path, argv[0]);
0N/A *strrchr(path, '/') = 0;
0N/A lm_procpath = strdup(path);
0N/A }
0N/A
0N/A /*
0N/A * name of binary
0N/A */
0N/A
0N/A if ((tmp = strrchr(argv[0], '/')) == NULL)
0N/A (void) strcpy(lm_procname, argv[0]);
0N/A else
0N/A (void) strcpy(lm_procname, tmp + 1);
0N/A
0N/A if (lm_optN == NULL) {
0N/A lm_optN = lm_procname;
0N/A }
0N/A
0N/A /*
0N/A * Parse command line arguments
0N/A */
0N/A
0N/A (void) sprintf(optstr, "1AB:C:D:EHI:LMN:P:RST:VW?%s", lm_optstr);
0N/A while ((opt = getopt(argc, argv, optstr)) != -1) {
0N/A switch (opt) {
0N/A case '1':
0N/A lm_opt1 = 1;
0N/A break;
0N/A case 'A':
0N/A lm_optA = 1;
0N/A break;
0N/A case 'B':
0N/A lm_optB = sizetoint(optarg);
0N/A break;
0N/A case 'C':
0N/A lm_optC = sizetoint(optarg);
0N/A break;
0N/A case 'D':
0N/A lm_optD = sizetoint(optarg);
0N/A break;
0N/A case 'E':
0N/A lm_optE = 1;
0N/A break;
0N/A case 'H':
0N/A lm_optH = 1;
0N/A break;
0N/A case 'I':
0N/A lm_optI = sizetoint(optarg);
0N/A break;
0N/A case 'L':
0N/A lm_optL = 1;
0N/A break;
0N/A case 'M':
0N/A lm_optM = 1;
0N/A break;
0N/A case 'N':
0N/A lm_optN = optarg;
0N/A break;
0N/A case 'P':
0N/A lm_optP = sizetoint(optarg);
0N/A break;
0N/A case 'S':
0N/A lm_optS = 1;
0N/A break;
0N/A case 'T':
0N/A lm_optT = sizetoint(optarg);
0N/A break;
0N/A case 'V':
0N/A (void) printf("%s\n", LIBMICRO_VERSION);
0N/A exit(0);
0N/A break;
0N/A case 'W':
0N/A lm_optW = 1;
0N/A lm_optS = 1;
0N/A break;
0N/A case '?':
0N/A usage();
0N/A exit(0);
0N/A break;
0N/A default:
0N/A if (benchmark_optswitch(opt, optarg) == -1) {
0N/A usage();
0N/A exit(0);
0N/A }
0N/A }
0N/A }
0N/A
0N/A /* deal with implicit and overriding options */
0N/A if (lm_opt1 && lm_optP > 1) {
0N/A lm_optP = 1;
0N/A (void) printf("warning: -1 overrides -P\n");
0N/A }
0N/A
0N/A if (lm_optE) {
0N/A (void) fprintf(stderr, "Running:%20s", lm_optN);
0N/A (void) fflush(stderr);
0N/A }
0N/A
9N/A if (lm_optB == 0) {
0N/A /*
0N/A * neither benchmark or user has specified the number
0N/A * of cnts/sample, so use computed value
0N/A */
0N/A if (lm_optI)
0N/A lm_nsecs_per_op = lm_optI;
0N/A
0N/A lm_optB = nsecs_resolution * 100 / lm_nsecs_per_op;
0N/A if (lm_optB == 0)
0N/A lm_optB = 1;
0N/A }
0N/A
9N/A /*
0N/A * now that the options are set
0N/A */
0N/A
0N/A if (benchmark_initrun() == -1) {
0N/A exit(1);
0N/A }
0N/A
0N/A /* allocate dynamic data */
0N/A pids = (pid_t *)malloc(lm_optP * sizeof (pid_t));
0N/A if (pids == NULL) {
0N/A perror("malloc(pids)");
0N/A exit(1);
0N/A }
0N/A tids = (pthread_t *)malloc(lm_optT * sizeof (pthread_t));
0N/A if (tids == NULL) {
0N/A perror("malloc(tids)");
0N/A exit(1);
0N/A }
0N/A
0N/A /* check that the case defines lm_tsdsize before proceeding */
0N/A if (lm_tsdsize == (size_t)-1) {
0N/A (void) fprintf(stderr, "error in benchmark_init: "
0N/A "lm_tsdsize not set\n");
0N/A exit(1);
0N/A }
0N/A
0N/A /* round up tsdsize to nearest 128 to eliminate false sharing */
0N/A tsdsize = ((lm_tsdsize + 127) / 128) * 128;
0N/A
0N/A /* allocate sufficient TSD for each thread in each process */
0N/A tsdseg = (void *)mmap(NULL, lm_optT * lm_optP * tsdsize + 8192,
0N/A PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0L);
0N/A if (tsdseg == NULL) {
0N/A perror("mmap(tsd)");
0N/A exit(1);
0N/A }
0N/A
0N/A /* initialise worker synchronisation */
0N/A b = barrier_create(lm_optT * lm_optP, DATASIZE);
0N/A if (b == NULL) {
0N/A perror("barrier_create()");
0N/A exit(1);
0N/A }
0N/A lm_barrier = b;
0N/A b->ba_flag = 1;
0N/A
0N/A /* need this here so that parent and children can call exit() */
0N/A (void) fflush(stdout);
0N/A (void) fflush(stderr);
0N/A
0N/A /* when we started and when to stop */
0N/A
0N/A b->ba_starttime = getnsecs();
0N/A b->ba_deadline = (long long) (b->ba_starttime + (lm_optD * 1000000LL));
0N/A
0N/A /* do the work */
0N/A if (lm_opt1) {
0N/A /* single process, non-fork mode */
0N/A pindex = 0;
0N/A worker_process();
0N/A } else {
0N/A /* create worker processes */
0N/A for (i = 0; i < lm_optP; i++) {
0N/A pids[i] = fork();
0N/A
0N/A switch (pids[i]) {
0N/A case 0:
0N/A pindex = i;
0N/A worker_process();
0N/A exit(0);
0N/A break;
0N/A case -1:
0N/A perror("fork");
0N/A exit(1);
0N/A break;
0N/A default:
0N/A continue;
0N/A }
0N/A }
0N/A
0N/A /* wait for worker processes */
0N/A for (i = 0; i < lm_optP; i++) {
0N/A if (pids[i] > 0) {
0N/A (void) waitpid(pids[i], NULL, 0);
0N/A }
0N/A }
0N/A }
0N/A
0N/A b->ba_endtime = getnsecs();
0N/A
0N/A /* compute results */
0N/A
0N/A compute_stats(b);
0N/A
0N/A /* print arguments benchmark was invoked with ? */
0N/A if (lm_optL) {
0N/A int l;
9N/A (void) printf("# %s ", argv[0]);
0N/A for (l = 1; l < argc; l++) {
9N/A (void) printf("%s ", argv[l]);
0N/A }
9N/A (void) printf("\n");
0N/A }
0N/A
0N/A /* print result header (unless suppressed) */
0N/A if (!lm_optH) {
0N/A (void) printf("%12s %3s %3s %12s %12s %8s %8s %s\n",
0N/A "", "prc", "thr",
0N/A "usecs/call",
0N/A "samples", "errors", "cnt/samp", lm_header);
0N/A }
0N/A
0N/A /* print result */
0N/A
0N/A (void) printf("%-12s %3d %3d %12.5f %12d %8lld %8d %s\n",
0N/A lm_optN, lm_optP, lm_optT,
0N/A (lm_optM?b->ba_corrected.st_mean:b->ba_corrected.st_median),
0N/A b->ba_batches, b->ba_errors, lm_optB,
0N/A benchmark_result());
0N/A
0N/A if (lm_optS) {
0N/A print_stats(b);
0N/A }
0N/A
0N/A /* just incase something goes awry */
0N/A (void) fflush(stdout);
0N/A (void) fflush(stderr);
0N/A
0N/A /* cleanup by stages */
0N/A (void) benchmark_finirun();
0N/A (void) barrier_destroy(b);
0N/A (void) benchmark_fini();
0N/A
0N/A if (lm_optE) {
0N/A (void) fprintf(stderr, " for %12.5f seconds\n",
0N/A (double)(getnsecs() - startnsecs) /
0N/A 1.e9);
0N/A (void) fflush(stderr);
0N/A }
0N/A return (0);
0N/A}
0N/A
0N/Avoid *
0N/Aworker_thread(void *arg)
0N/A{
0N/A result_t r;
0N/A long long last_sleep = 0;
0N/A long long t;
0N/A
0N/A r.re_errors = benchmark_initworker(arg);
0N/A
0N/A while (lm_barrier->ba_flag) {
0N/A r.re_count = 0;
0N/A r.re_errors += benchmark_initbatch(arg);
0N/A
0N/A /* sync to clock */
0N/A
0N/A if (lm_optA && ((t = getnsecs()) - last_sleep) > 75000000LL) {
0N/A (void) poll(0, 0, 10);
0N/A last_sleep = t;
0N/A }
0N/A /* wait for it ... */
0N/A (void) barrier_queue(lm_barrier, NULL);
0N/A
0N/A /* time the test */
0N/A r.re_t0 = getnsecs();
0N/A (void) benchmark(arg, &r);
0N/A r.re_t1 = getnsecs();
0N/A
0N/A /* time to stop? */
0N/A if (r.re_t1 > lm_barrier->ba_deadline &&
0N/A (!lm_optC || lm_optC < lm_barrier->ba_batches)) {
0N/A lm_barrier->ba_flag = 0;
0N/A }
0N/A
0N/A /* record results and sync */
0N/A (void) barrier_queue(lm_barrier, &r);
0N/A
0N/A (void) benchmark_finibatch(arg);
0N/A
0N/A r.re_errors = 0;
0N/A }
0N/A
0N/A (void) benchmark_finiworker(arg);
0N/A
0N/A return (0);
0N/A}
0N/A
0N/Avoid
0N/Aworker_process()
0N/A{
0N/A int i;
0N/A void *tsd;
0N/A
0N/A for (i = 1; i < lm_optT; i++) {
0N/A tsd = gettsd(pindex, i);
0N/A if (pthread_create(&tids[i], NULL, worker_thread, tsd) != 0) {
0N/A perror("pthread_create");
0N/A exit(1);
0N/A }
0N/A }
0N/A
0N/A tsd = gettsd(pindex, 0);
0N/A (void) worker_thread(tsd);
0N/A
0N/A for (i = 1; i < lm_optT; i++) {
0N/A (void) pthread_join(tids[i], NULL);
0N/A }
0N/A}
0N/A
0N/Avoid
0N/Ausage()
0N/A{
0N/A (void) printf(
0N/A "usage: %s\n"
0N/A " [-1] (single process; overrides -P > 1)\n"
0N/A " [-A] (align with clock)\n"
0N/A " [-B batch-size (default %d)]\n"
0N/A " [-C minimum number of samples (default 0)]\n"
0N/A " [-D duration in msecs (default %ds)]\n"
0N/A " [-E (echo name to stderr)]\n"
0N/A " [-H] (suppress headers)\n"
0N/A " [-I] nsecs per op (used to compute batch size)"
0N/A " [-L] (print argument line)\n"
0N/A " [-M] (reports mean rather than median)\n"
0N/A " [-N test-name (default '%s')]\n"
0N/A " [-P processes (default %d)]\n"
0N/A " [-S] (print detailed stats)\n"
0N/A " [-T threads (default %d)]\n"
0N/A " [-V] (print the libMicro version and exit)\n"
0N/A " [-W] (flag possible benchmark problems)\n"
0N/A "%s\n",
0N/A lm_procname,
0N/A lm_defB, lm_defD, lm_procname, lm_defP, lm_defT,
0N/A lm_usage);
0N/A}
0N/A
0N/Avoid
9N/Aprint_warnings(barrier_t *b)
9N/A{
9N/A int head = 0;
9N/A int increase;
9N/A
9N/A if (b->ba_quant) {
9N/A if (!head++) {
9N/A (void) printf("#\n# WARNINGS\n");
9N/A }
9N/A increase = (int)(floor((nsecs_resolution * 100.0) /
9N/A ((double)lm_optB * b->ba_corrected.st_median * 1000.0)) +
9N/A 1.0);
9N/A (void) printf("# Quantization error likely;"
9N/A "increase batch size (-B option) %dX to avoid.\n",
9N/A increase);
9N/A }
9N/A
9N/A /*
9N/A * XXX should warn on median != mean by a lot
9N/A */
9N/A
9N/A if (b->ba_errors) {
9N/A if (!head++) {
9N/A (void) printf("#\n# WARNINGS\n");
9N/A }
9N/A (void) printf("# Errors occured during benchmark.\n");
9N/A }
9N/A}
9N/A
9N/Avoid
0N/Aprint_stats(barrier_t *b)
0N/A{
0N/A (void) printf("#\n");
9N/A (void) printf("# STATISTICS %12s %12s\n",
0N/A "usecs/call (raw)",
0N/A "usecs/call (outliers removed)");
0N/A
0N/A if (b->ba_count == 0) {
0N/A (void) printf("zero samples\n");
0N/A return;
0N/A }
0N/A
0N/A (void) printf("# min %12.5f %12.5f\n",
0N/A b->ba_raw.st_min,
0N/A b->ba_corrected.st_min);
0N/A
9N/A (void) printf("# max %12.5f %12.5f\n",
0N/A b->ba_raw.st_max,
0N/A b->ba_corrected.st_max);
9N/A (void) printf("# mean %12.5f %12.5f\n",
0N/A b->ba_raw.st_mean,
0N/A b->ba_corrected.st_mean);
9N/A (void) printf("# median %12.5f %12.5f\n",
0N/A b->ba_raw.st_median,
0N/A b->ba_corrected.st_median);
9N/A (void) printf("# stddev %12.5f %12.5f\n",
0N/A b->ba_raw.st_stddev,
0N/A b->ba_corrected.st_stddev);
9N/A (void) printf("# standard error %12.5f %12.5f\n",
0N/A b->ba_raw.st_stderr,
0N/A b->ba_corrected.st_stderr);
9N/A (void) printf("# 99%% confidence level %12.5f %12.5f\n",
0N/A b->ba_raw.st_99confidence,
0N/A b->ba_corrected.st_99confidence);
9N/A (void) printf("# skew %12.5f %12.5f\n",
0N/A b->ba_raw.st_skew,
0N/A b->ba_corrected.st_skew);
9N/A (void) printf("# kurtosis %12.5f %12.5f\n",
0N/A b->ba_raw.st_kurtosis,
0N/A b->ba_corrected.st_kurtosis);
0N/A
9N/A (void) printf("# time correlation %12.5f %12.5f\n",
0N/A b->ba_raw.st_timecorr,
0N/A b->ba_corrected.st_timecorr);
0N/A (void) printf("#\n");
0N/A
0N/A (void) printf("# elasped time %12.5f\n", (b->ba_endtime -
0N/A b->ba_starttime) / 1.0e9);
0N/A (void) printf("# number of samples %12d\n", b->ba_batches);
9N/A (void) printf("# number of outliers %12d\n", b->ba_outliers);
0N/A (void) printf("# getnsecs overhead %12d\n", (int)nsecs_overhead);
0N/A
0N/A (void) printf("#\n");
0N/A (void) printf("# DISTRIBUTION\n");
0N/A
0N/A print_histo(b);
0N/A
0N/A if (lm_optW) {
9N/A print_warnings(b);
0N/A }
0N/A}
0N/A
0N/Avoid
0N/Aupdate_stats(barrier_t *b, result_t *r)
0N/A{
0N/A double time;
0N/A double nsecs_per_call;
0N/A
0N/A if (b->ba_waiters == 0) {
0N/A /* first thread only */
0N/A b->ba_t0 = r->re_t0;
0N/A b->ba_t1 = r->re_t1;
0N/A b->ba_count0 = 0;
0N/A b->ba_errors0 = 0;
0N/A } else {
0N/A /* all but first thread */
0N/A if (r->re_t0 < b->ba_t0) {
0N/A b->ba_t0 = r->re_t0;
0N/A }
0N/A if (r->re_t1 > b->ba_t1) {
0N/A b->ba_t1 = r->re_t1;
0N/A }
0N/A }
0N/A
0N/A b->ba_count0 += r->re_count;
0N/A b->ba_errors0 += r->re_errors;
0N/A
0N/A if (b->ba_waiters == b->ba_hwm - 1) {
0N/A /* last thread only */
0N/A
0N/A
9N/A time = (double)b->ba_t1 - (double)b->ba_t0 -
9N/A (double)nsecs_overhead;
0N/A
0N/A if (time < 100 * nsecs_resolution)
0N/A b->ba_quant++;
0N/A
0N/A /*
0N/A * normalize by procs * threads if not -U
0N/A */
0N/A
0N/A nsecs_per_call = time / (double)b->ba_count0 *
0N/A (double)(lm_optT * lm_optP);
0N/A
0N/A b->ba_count += b->ba_count0;
0N/A b->ba_errors += b->ba_errors0;
0N/A
0N/A b->ba_data[b->ba_batches % b->ba_datasize] =
0N/A nsecs_per_call;
0N/A
0N/A b->ba_batches++;
0N/A }
0N/A}
0N/A
0N/A#ifdef USE_SEMOP
0N/Abarrier_t *
0N/Abarrier_create(int hwm, int datasize)
0N/A{
0N/A struct sembuf s[1];
0N/A barrier_t *b;
0N/A
0N/A /*LINTED*/
0N/A b = (barrier_t *)mmap(NULL,
0N/A sizeof (barrier_t) + (datasize - 1) * sizeof (double),
0N/A PROT_READ | PROT_WRITE,
0N/A MAP_SHARED | MAP_ANON, -1, 0L);
0N/A if (b == (barrier_t *)MAP_FAILED) {
0N/A return (NULL);
0N/A }
0N/A b->ba_datasize = datasize;
0N/A
0N/A b->ba_flag = 0;
0N/A b->ba_hwm = hwm;
0N/A b->ba_semid = semget(IPC_PRIVATE, 3, 0600);
0N/A if (b->ba_semid == -1) {
0N/A (void) munmap((void *)b, sizeof (barrier_t));
0N/A return (NULL);
0N/A }
0N/A
0N/A /* [hwm - 1, 0, 0] */
0N/A s[0].sem_num = 0;
0N/A s[0].sem_op = hwm - 1;
0N/A s[0].sem_flg = 0;
0N/A if (semop(b->ba_semid, s, 1) == -1) {
0N/A perror("semop(1)");
0N/A (void) semctl(b->ba_semid, 0, IPC_RMID);
0N/A (void) munmap((void *)b, sizeof (barrier_t));
0N/A return (NULL);
0N/A }
0N/A
0N/A b->ba_waiters = 0;
0N/A b->ba_phase = 0;
0N/A
0N/A b->ba_count = 0;
0N/A b->ba_errors = 0;
0N/A
0N/A return (b);
0N/A}
0N/A
0N/Aint
0N/Abarrier_destroy(barrier_t *b)
0N/A{
0N/A (void) semctl(b->ba_semid, 0, IPC_RMID);
0N/A (void) munmap((void *)b, sizeof (barrier_t));
0N/A
0N/A return (0);
0N/A}
0N/A
0N/Aint
0N/Abarrier_queue(barrier_t *b, result_t *r)
0N/A{
0N/A struct sembuf s[2];
0N/A
0N/A /*
0N/A * {s0(-(hwm-1))}
0N/A * if ! nowait {s1(-(hwm-1))}
0N/A * (all other threads)
0N/A * update shared stats
0N/A * {s0(hwm-1), s1(1)}
0N/A * {s0(1), s2(-1)}
0N/A * else
0N/A * (last thread)
0N/A * update shared stats
0N/A * {s2(hwm-1)}
0N/A */
0N/A
0N/A s[0].sem_num = 0;
0N/A s[0].sem_op = -(b->ba_hwm - 1);
0N/A s[0].sem_flg = 0;
0N/A if (semop(b->ba_semid, s, 1) == -1) {
0N/A perror("semop(2)");
0N/A return (-1);
0N/A }
0N/A
0N/A s[0].sem_num = 1;
0N/A s[0].sem_op = -(b->ba_hwm - 1);
0N/A s[0].sem_flg = IPC_NOWAIT;
0N/A if (semop(b->ba_semid, s, 1) == -1) {
0N/A if (errno != EAGAIN) {
0N/A perror("semop(3)");
0N/A return (-1);
0N/A }
0N/A
0N/A /* all but the last thread */
0N/A
0N/A if (r != NULL) {
0N/A update_stats(b, r);
0N/A }
0N/A
0N/A b->ba_waiters++;
0N/A
0N/A s[0].sem_num = 0;
0N/A s[0].sem_op = b->ba_hwm - 1;
0N/A s[0].sem_flg = 0;
0N/A s[1].sem_num = 1;
0N/A s[1].sem_op = 1;
0N/A s[1].sem_flg = 0;
0N/A if (semop(b->ba_semid, s, 2) == -1) {
0N/A perror("semop(4)");
0N/A return (-1);
0N/A }
0N/A
0N/A s[0].sem_num = 0;
0N/A s[0].sem_op = 1;
0N/A s[0].sem_flg = 0;
0N/A s[1].sem_num = 2;
0N/A s[1].sem_op = -1;
0N/A s[1].sem_flg = 0;
0N/A if (semop(b->ba_semid, s, 2) == -1) {
0N/A perror("semop(5)");
0N/A return (-1);
0N/A }
0N/A
0N/A } else {
0N/A /* the last thread */
0N/A
0N/A if (r != NULL) {
0N/A update_stats(b, r);
0N/A }
0N/A
0N/A b->ba_waiters = 0;
0N/A b->ba_phase++;
0N/A
0N/A s[0].sem_num = 2;
0N/A s[0].sem_op = b->ba_hwm - 1;
0N/A s[0].sem_flg = 0;
0N/A if (semop(b->ba_semid, s, 1) == -1) {
0N/A perror("semop(6)");
0N/A return (-1);
0N/A }
0N/A }
0N/A
0N/A return (0);
0N/A}
0N/A
0N/A#else /* USE_SEMOP */
0N/A
0N/Abarrier_t *
0N/Abarrier_create(int hwm, int datasize)
0N/A{
0N/A pthread_mutexattr_t attr;
0N/A pthread_condattr_t cattr;
0N/A barrier_t *b;
0N/A
0N/A /*LINTED*/
0N/A b = (barrier_t *)mmap(NULL,
0N/A sizeof (barrier_t) + (datasize - 1) * sizeof (double),
0N/A PROT_READ | PROT_WRITE,
0N/A MAP_SHARED | MAP_ANON, -1, 0L);
0N/A if (b == (barrier_t *)MAP_FAILED) {
0N/A return (NULL);
0N/A }
0N/A b->ba_datasize = datasize;
0N/A
0N/A b->ba_hwm = hwm;
0N/A b->ba_flag = 0;
0N/A
0N/A (void) pthread_mutexattr_init(&attr);
0N/A (void) pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
0N/A
0N/A (void) pthread_condattr_init(&cattr);
0N/A (void) pthread_condattr_setpshared(&cattr, PTHREAD_PROCESS_SHARED);
0N/A
0N/A (void) pthread_mutex_init(&b->ba_lock, &attr);
0N/A (void) pthread_cond_init(&b->ba_cv, &cattr);
0N/A
0N/A b->ba_waiters = 0;
0N/A b->ba_phase = 0;
0N/A
0N/A b->ba_count = 0;
0N/A b->ba_errors = 0;
9N/A
0N/A return (b);
0N/A}
0N/A
0N/Aint
0N/Abarrier_destroy(barrier_t *b)
0N/A{
0N/A (void) munmap((void *)b, sizeof (barrier_t));
0N/A
0N/A return (0);
0N/A}
0N/A
0N/Aint
0N/Abarrier_queue(barrier_t *b, result_t *r)
0N/A{
0N/A int phase;
0N/A
0N/A (void) pthread_mutex_lock(&b->ba_lock);
0N/A
0N/A if (r != NULL) {
0N/A update_stats(b, r);
0N/A }
0N/A
0N/A phase = b->ba_phase;
0N/A
0N/A b->ba_waiters++;
0N/A if (b->ba_hwm == b->ba_waiters) {
0N/A b->ba_waiters = 0;
0N/A b->ba_phase++;
0N/A (void) pthread_cond_broadcast(&b->ba_cv);
0N/A }
0N/A
0N/A while (b->ba_phase == phase) {
0N/A (void) pthread_cond_wait(&b->ba_cv, &b->ba_lock);
0N/A }
0N/A
0N/A (void) pthread_mutex_unlock(&b->ba_lock);
0N/A return (0);
0N/A}
0N/A#endif /* USE_SEMOP */
0N/A
0N/Aint
0N/Agettindex()
0N/A{
0N/A int i;
0N/A
0N/A if (tids == NULL) {
0N/A return (-1);
0N/A }
0N/A
0N/A for (i = 1; i < lm_optT; i++) {
0N/A if (pthread_self() == tids[i]) {
0N/A return (i);
0N/A }
0N/A }
0N/A
0N/A return (0);
0N/A}
0N/A
0N/Aint
0N/Agetpindex()
0N/A{
0N/A return (pindex);
0N/A}
0N/A
0N/Avoid *
0N/Agettsd(int p, int t)
0N/A{
0N/A if ((p < 0) || (p >= lm_optP) || (t < 0) || (t >= lm_optT))
0N/A return (NULL);
0N/A
0N/A return ((void *)((unsigned long)tsdseg +
0N/A (((p * lm_optT) + t) * tsdsize)));
0N/A}
0N/A
0N/A#ifdef USE_GETHRTIME
0N/Along long
0N/Agetnsecs()
0N/A{
0N/A return (gethrtime());
0N/A}
0N/A
0N/Along long
0N/Agetusecs()
0N/A{
0N/A return (gethrtime() / 1000);
0N/A}
0N/A
0N/A#elif USE_RDTSC /* USE_GETHRTIME */
0N/A
0N/A__inline__ long long
0N/Ardtsc(void)
0N/A{
0N/A unsigned long long x;
0N/A __asm__ volatile(".byte 0x0f, 0x31" : "=A" (x));
0N/A return (x);
0N/A}
0N/A
0N/Along long
0N/Agetusecs()
0N/A{
0N/A return (rdtsc() * 1000000 / lm_hz);
0N/A}
0N/A
0N/Along long
0N/Agetnsecs()
0N/A{
9N/A return (rdtsc() * 1000000000 / lm_hz);
0N/A}
0N/A
0N/A#else /* USE_GETHRTIME */
0N/A
0N/Along long
0N/Agetusecs()
0N/A{
0N/A struct timeval tv;
0N/A
0N/A (void) gettimeofday(&tv, NULL);
0N/A
0N/A return ((long long)tv.tv_sec * 1000000LL + (long long) tv.tv_usec);
0N/A}
0N/A
0N/Along long
0N/Agetnsecs()
0N/A{
0N/A struct timeval tv;
0N/A
0N/A (void) gettimeofday(&tv, NULL);
0N/A
9N/A return ((long long)tv.tv_sec * 1000000000LL +
0N/A (long long) tv.tv_usec * 1000LL);
0N/A}
0N/A
0N/A#endif /* USE_GETHRTIME */
0N/A
0N/Aint
0N/Asetfdlimit(int limit)
0N/A{
0N/A struct rlimit rlimit;
0N/A
0N/A if (getrlimit(RLIMIT_NOFILE, &rlimit) < 0) {
0N/A perror("getrlimit");
0N/A exit(1);
0N/A }
0N/A
0N/A if (rlimit.rlim_cur > limit)
0N/A return (0); /* no worries */
0N/A
0N/A rlimit.rlim_cur = limit;
0N/A
0N/A if (rlimit.rlim_max < limit)
0N/A rlimit.rlim_max = limit;
0N/A
0N/A if (setrlimit(RLIMIT_NOFILE, &rlimit) < 0) {
0N/A perror("setrlimit");
0N/A exit(3);
0N/A }
0N/A
0N/A return (0);
0N/A}
0N/A
0N/A
0N/A#define KILOBYTE 1024
0N/A#define MEGABYTE (KILOBYTE * KILOBYTE)
0N/A#define GIGABYTE (KILOBYTE * MEGABYTE)
0N/A
0N/Along long
0N/Asizetoll(const char *arg)
0N/A{
0N/A int len = strlen(arg);
0N/A int i;
0N/A long long mult = 1;
0N/A
0N/A if (len && isalpha(arg[len - 1])) {
0N/A switch (arg[len - 1]) {
0N/A
0N/A case 'k':
0N/A case 'K':
0N/A mult = KILOBYTE;
0N/A break;
0N/A case 'm':
0N/A case 'M':
0N/A mult = MEGABYTE;
0N/A break;
0N/A case 'g':
0N/A case 'G':
0N/A mult = GIGABYTE;
0N/A break;
0N/A default:
0N/A return (-1);
0N/A }
0N/A
0N/A for (i = 0; i < len - 1; i++)
0N/A if (!isdigit(arg[i]))
0N/A return (-1);
0N/A }
0N/A
0N/A return (mult * strtoll(arg, NULL, 10));
0N/A}
0N/A
0N/Aint
0N/Asizetoint(const char *arg)
0N/A{
0N/A int len = strlen(arg);
0N/A int i;
0N/A long long mult = 1;
0N/A
0N/A if (len && isalpha(arg[len - 1])) {
0N/A switch (arg[len - 1]) {
0N/A
0N/A case 'k':
0N/A case 'K':
0N/A mult = KILOBYTE;
0N/A break;
0N/A case 'm':
0N/A case 'M':
0N/A mult = MEGABYTE;
0N/A break;
0N/A case 'g':
0N/A case 'G':
0N/A mult = GIGABYTE;
0N/A break;
0N/A default:
0N/A return (-1);
0N/A }
0N/A
0N/A for (i = 0; i < len - 1; i++)
0N/A if (!isdigit(arg[i]))
0N/A return (-1);
0N/A }
0N/A
0N/A return (mult * atoi(arg));
0N/A}
0N/A
0N/Astatic void
0N/Aprint_bar(long count, long total)
0N/A{
0N/A int i;
0N/A
0N/A (void) putchar_unlocked(count ? '*' : ' ');
0N/A for (i = 1; i < (32 * count) / total; i++)
0N/A (void) putchar_unlocked('*');
0N/A for (; i < 32; i++)
0N/A (void) putchar_unlocked(' ');
0N/A}
0N/A
9N/Astatic int
9N/Adoublecmp(const void *p1, const void *p2)
9N/A{
9N/A double a = *((double *)p1);
9N/A double b = *((double *)p2);
9N/A
9N/A if (a > b)
9N/A return (1);
9N/A if (a < b)
9N/A return (-1);
9N/A return (0);
9N/A}
9N/A
0N/Astatic void
0N/Aprint_histo(barrier_t *b)
0N/A{
0N/A int n;
0N/A int i;
0N/A int j;
0N/A int last;
0N/A long long maxcount;
0N/A double sum;
0N/A long long min;
0N/A long long scale;
0N/A double x;
0N/A long long y;
0N/A long long count;
0N/A int i95;
0N/A double p95;
0N/A double r95;
0N/A double m95;
0N/A histo_t *histo;
0N/A
0N/A (void) printf("# %12s %12s %32s %12s\n", "counts", "usecs/call",
0N/A "", "means");
0N/A
0N/A /* calculate how much data we've captured */
0N/A n = b->ba_batches > b->ba_datasize ? b->ba_datasize : b->ba_batches;
0N/A
0N/A /* find the 95th percentile - index, value and range */
9N/A qsort((void *)b->ba_data, n, sizeof (double), doublecmp);
0N/A min = b->ba_data[0] + 0.000001;
0N/A i95 = n * 95 / 100;
0N/A p95 = b->ba_data[i95];
0N/A r95 = p95 - min + 1;
0N/A
0N/A /* find a suitable min and scale */
0N/A i = 0;
0N/A x = r95 / (HISTOSIZE - 1);
0N/A while (x >= 10.0) {
0N/A x /= 10.0;
0N/A i++;
0N/A }
0N/A y = x + 0.9999999999;
0N/A while (i > 0) {
0N/A y *= 10;
0N/A i--;
0N/A }
0N/A min /= y;
0N/A min *= y;
0N/A scale = y * (HISTOSIZE - 1);
0N/A if (scale < (HISTOSIZE - 1)) {
0N/A scale = (HISTOSIZE - 1);
0N/A }
0N/A
0N/A /* create and initialise the histogram */
0N/A histo = malloc(HISTOSIZE * sizeof (histo_t));
0N/A for (i = 0; i < HISTOSIZE; i++) {
0N/A histo[i].sum = 0.0;
0N/A histo[i].count = 0;
0N/A }
0N/A
0N/A /* populate the histogram */
0N/A last = 0;
0N/A sum = 0.0;
0N/A count = 0;
0N/A for (i = 0; i < i95; i++) {
0N/A j = (HISTOSIZE - 1) * (b->ba_data[i] - min) / scale;
0N/A
0N/A if (j >= HISTOSIZE) {
0N/A (void) printf("panic!\n");
0N/A j = HISTOSIZE - 1;
0N/A }
0N/A
0N/A histo[j].sum += b->ba_data[i];
0N/A histo[j].count++;
0N/A
0N/A sum += b->ba_data[i];
0N/A count++;
0N/A }
0N/A m95 = sum / count;
0N/A
0N/A /* find the larges bucket */
0N/A maxcount = 0;
0N/A for (i = 0; i < HISTOSIZE; i++)
0N/A if (histo[i].count > 0) {
0N/A last = i;
0N/A if (histo[i].count > maxcount)
0N/A maxcount = histo[i].count;
0N/A }
0N/A
0N/A /* print the buckets */
0N/A for (i = 0; i <= last; i++) {
0N/A (void) printf("# %12lld %12.5f |", histo[i].count,
0N/A (min + scale * (double)i / (HISTOSIZE - 1)));
0N/A
0N/A print_bar(histo[i].count, maxcount);
0N/A
0N/A if (histo[i].count > 0)
0N/A (void) printf("%12.5f\n",
0N/A histo[i].sum / histo[i].count);
0N/A else
0N/A (void) printf("%12s\n", "-");
0N/A }
0N/A
0N/A /* find the mean of values beyond the 95th percentile */
0N/A sum = 0.0;
0N/A count = 0;
0N/A for (i = i95; i < n; i++) {
0N/A sum += b->ba_data[i];
0N/A count++;
0N/A }
0N/A
0N/A /* print the >95% bucket summary */
0N/A (void) printf("#\n");
0N/A (void) printf("# %12lld %12s |", count, "> 95%");
0N/A print_bar(count, maxcount);
0N/A if (count > 0)
0N/A (void) printf("%12.5f\n", sum / count);
0N/A else
0N/A (void) printf("%12s\n", "-");
0N/A (void) printf("#\n");
0N/A (void) printf("# %12s %12.5f\n", "mean of 95%", m95);
0N/A (void) printf("# %12s %12.5f\n", "95th %ile", p95);
0N/A
0N/A /* quantify any buffer overflow */
0N/A if (b->ba_batches > b->ba_datasize)
0N/A (void) printf("# %12s %12d\n", "data dropped",
0N/A b->ba_batches - b->ba_datasize);
0N/A}
0N/A
0N/Astatic void
0N/Acompute_stats(barrier_t *b)
0N/A{
0N/A int i;
0N/A
0N/A if (b->ba_batches > b->ba_datasize)
0N/A b->ba_batches = b->ba_datasize;
0N/A
0N/A /*
0N/A * convert to usecs/call
0N/A */
0N/A
0N/A for (i = 0; i < b->ba_batches; i++)
0N/A b->ba_data[i] /= 1000.0;
0N/A
0N/A /*
0N/A * do raw stats
0N/A */
0N/A
9N/A (void) crunch_stats(b->ba_data, b->ba_batches, &b->ba_raw);
0N/A
0N/A /*
0N/A * recursively apply 3 sigma rule to remove outliers
0N/A */
0N/A
0N/A b->ba_corrected = b->ba_raw;
0N/A b->ba_outliers = 0;
0N/A
9N/A if (b->ba_batches > 40) { /* remove outliers */
0N/A int removed;
0N/A
0N/A do {
9N/A removed = remove_outliers(b->ba_data, b->ba_batches,
0N/A &b->ba_corrected);
0N/A b->ba_outliers += removed;
0N/A b->ba_batches -= removed;
9N/A (void) crunch_stats(b->ba_data, b->ba_batches,
9N/A &b->ba_corrected);
0N/A } while (removed != 0 && b->ba_batches > 40);
0N/A }
0N/A
0N/A}
0N/A
0N/A/*
0N/A * routine to compute various statistics on array of doubles.
0N/A */
0N/A
0N/Astatic int
0N/Acrunch_stats(double *data, int count, stats_t *stats)
0N/A{
0N/A double a;
0N/A double std;
0N/A double diff;
0N/A double sk;
0N/A double ku;
0N/A double mean;
0N/A int i;
0N/A int bytes;
9N/A double *dupdata;
0N/A
9N/A /*
0N/A * first we need the mean
0N/A */
0N/A
0N/A mean = 0.0;
0N/A
0N/A for (i = 0; i < count; i++) {
0N/A mean += data[i];
0N/A }
0N/A
0N/A mean /= count;
0N/A
0N/A stats->st_mean = mean;
0N/A
0N/A /*
0N/A * malloc and sort so we can do median
0N/A */
0N/A
0N/A dupdata = malloc(bytes = sizeof (double) * count);
9N/A (void) memcpy(dupdata, data, bytes);
0N/A qsort((void *)dupdata, count, sizeof (double), doublecmp);
0N/A stats->st_median = dupdata[count/2];
0N/A
0N/A /*
0N/A * reuse dupdata to compute time correlation of data to
0N/A * detect interesting time-based trends
0N/A */
0N/A
0N/A for (i = 0; i < count; i++)
9N/A dupdata[i] = (double)i;
0N/A
9N/A (void) fit_line(dupdata, data, count, &a, &stats->st_timecorr);
0N/A free(dupdata);
0N/A
0N/A std = 0.0;
0N/A sk = 0.0;
0N/A ku = 0.0;
0N/A
0N/A stats->st_max = -1;
0N/A stats->st_min = 1.0e99; /* hard to find portable values */
0N/A
0N/A for (i = 0; i < count; i++) {
0N/A if (data[i] > stats->st_max)
0N/A stats->st_max = data[i];
0N/A if (data[i] < stats->st_min)
0N/A stats->st_min = data[i];
0N/A
0N/A diff = data[i] - mean;
0N/A std += diff * diff;
0N/A sk += diff * diff * diff;
0N/A ku += diff * diff * diff * diff;
0N/A }
0N/A
0N/A stats->st_stddev = std = sqrt(std/(double)(count - 1));
0N/A stats->st_stderr = std / sqrt(count);
0N/A stats->st_99confidence = stats->st_stderr * 2.326;
0N/A stats->st_skew = sk / (std * std * std) / (double)(count);
9N/A stats->st_kurtosis = ku / (std * std * std * std) /
0N/A (double)(count) - 3;
9N/A
0N/A return (0);
0N/A}
0N/A
0N/A/*
0N/A * does a least squares fit to the set of points x, y and
0N/A * fits a line y = a + bx. Returns a, b
0N/A */
0N/A
0N/Aint
0N/Afit_line(double *x, double *y, int count, double *a, double *b)
0N/A{
0N/A double sumx, sumy, sumxy, sumx2;
0N/A double denom;
0N/A int i;
0N/A
0N/A sumx = sumy = sumxy = sumx2 = 0.0;
0N/A
0N/A for (i = 0; i < count; i++) {
0N/A sumx += x[i];
0N/A sumx2 += x[i] * x[i];
0N/A sumy += y[i];
0N/A sumxy += x[i] * y[i];
0N/A }
0N/A
0N/A denom = count * sumx2 - sumx * sumx;
9N/A
9N/A if (denom == 0.0)
0N/A return (-1);
9N/A
0N/A *a = (sumy * sumx2 - sumx * sumxy) / denom;
9N/A
0N/A *b = (count * sumxy - sumx * sumy) / denom;
0N/A
0N/A return (0);
0N/A}
9N/A
0N/A/*
0N/A * empty function for measurement purposes
0N/A */
0N/A
0N/Aint
0N/Anop()
0N/A{
0N/A return (1);
0N/A}
9N/A
9N/A#define NSECITER 1000
0N/A
0N/Astatic long long
0N/Aget_nsecs_overhead()
0N/A{
0N/A long long s;
0N/A
0N/A double data[NSECITER];
0N/A stats_t stats;
0N/A
0N/A int i;
0N/A int count;
0N/A int outliers;
0N/A
9N/A (void) getnsecs(); /* warmup */
9N/A (void) getnsecs(); /* warmup */
9N/A (void) getnsecs(); /* warmup */
0N/A
0N/A i = 0;
0N/A
0N/A count = NSECITER;
9N/A
0N/A for (i = 0; i < count; i++) {
9N/A s = getnsecs();
0N/A data[i] = getnsecs() - s;
0N/A }
0N/A
9N/A (void) crunch_stats(data, count, &stats);
9N/A
9N/A while ((outliers = remove_outliers(data, count, &stats)) != 0) {
0N/A count -= outliers;
9N/A (void) crunch_stats(data, count, &stats);
0N/A }
9N/A
9N/A return ((long long)stats.st_mean);
0N/A
0N/A}
0N/A
11N/A/*
11N/A * Determine the resolution of the system's high resolution counter.
11N/A * Most hardware has a nanosecond resolution counter, but some systems still
11N/A * use course resolution (e.g. derived instead by a periodic interrupt).
11N/A *
11N/A * Algorithm:
11N/A * Determine a busy loop that is long enough for successive nanosecond counter
11N/A * reads to report different times. Then take 1000 samples with busy loop
11N/A * interval successively increases by i. The counter resolution is assumed
11N/A * to be the smallest non-zero time delta between these 1000 samples.
11N/A *
11N/A * One last wrinkle is all 1000 samples may have the same delta on a system
11N/A * with a very fast and consistent hardware counter based getnsecs().
11N/A * In that case assume the resolution is 1ns.
11N/A */
9N/Along long
0N/Aget_nsecs_resolution()
0N/A{
0N/A long long y[1000];
0N/A
11N/A volatile int i, j;
11N/A int nops, res;
0N/A long long start, stop;
0N/A
0N/A /*
0N/A * first, figure out how many nops to use
0N/A * to get any delta between time measurements.
0N/A * use a minimum of one.
0N/A */
0N/A
0N/A /*
0N/A * warm cache
0N/A */
0N/A
0N/A stop = start = getnsecs();
0N/A
0N/A for (i = 1; i < 10000000; i++) {
0N/A start = getnsecs();
0N/A for (j = i; j; j--)
0N/A ;
0N/A stop = getnsecs();
0N/A if (stop > start)
0N/A break;
0N/A }
0N/A
0N/A nops = i;
0N/A
0N/A /*
0N/A * now collect data at linearly varying intervals
0N/A */
0N/A
0N/A for (i = 0; i < 1000; i++) {
0N/A start = getnsecs();
0N/A for (j = nops * i; j; j--)
0N/A ;
0N/A stop = getnsecs();
0N/A y[i] = stop - start;
0N/A }
0N/A
0N/A /*
0N/A * find smallest positive difference between samples;
11N/A * this is the counter resolution
0N/A */
0N/A
11N/A res = y[0];
0N/A for (i = 1; i < 1000; i++) {
0N/A int diff = y[i] - y[i-1];
9N/A
0N/A if (diff > 0 && res > diff)
0N/A res = diff;
0N/A
0N/A }
11N/A if (res == 0)
11N/A res = 1;
0N/A
0N/A return (res);
0N/A}
0N/A
0N/A/*
0N/A * remove any data points from the array more than 3 sigma out
0N/A */
0N/A
0N/Astatic int
9N/Aremove_outliers(double *data, int count, stats_t *stats)
0N/A{
0N/A double outmin = stats->st_mean - 3 * stats->st_stddev;
0N/A double outmax = stats->st_mean + 3 * stats->st_stddev;
0N/A
0N/A int i, j, outliers;
0N/A
0N/A for (outliers = i = j = 0; i < count; i++)
0N/A if (data[i] > outmax || data[i] < outmin)
0N/A outliers++;
0N/A else
0N/A data[j++] = data[i];
0N/A
0N/A return (outliers);
0N/A}