/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms
* of the Common Development and Distribution License
* (the "License"). You may not use this file except
* in compliance with the License.
*
* You can obtain a copy of the license at
* src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing
* permissions and limitations under the License.
*
* When distributing Covered Code, include this CDDL
* HEADER in each file and include the License file at
* usr/src/OPENSOLARIS.LICENSE. If applicable,
* add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your
* own identifying information: Portions Copyright [yyyy]
* [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* routine to benchmark cache-to-cache transfer times... uses
* solaris features to find and bind to cpus in the current
* processor set, so not likely to work elsewhere.
*/
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <fcntl.h>
#include <string.h>
#include <sys/processor.h>
#include <sys/types.h>
#include <stdio.h>
#include <errno.h>
#include <sys/pset.h>
#include "libmicro.h"
static long opts = 1024*512;
typedef struct {
long **ts_data;
long ts_result;
pthread_mutex_t ts_lock;
} tsd_t;
static unsigned int ncpu = 1024;
static tsd_t *thread_data[1024];
static processorid_t cpus[1024];
int traverse_ptrchain(long **, int, int);
int
benchmark_init()
{
lm_tsdsize = sizeof (tsd_t);
(void) sprintf(lm_optstr, "s:");
(void) sprintf(lm_usage,
" [-s size] size of access area in bytes"
" (default %ld)\n"
"notes: measures cache to cache transfer times on Solaris\n",
opts);
(void) sprintf(lm_header, "%8s", "size");
return (0);
}
int
benchmark_optswitch(int opt, char *optarg)
{
switch (opt) {
case 's':
opts = sizetoint(optarg);
break;
default:
return (-1);
}
return (0);
}
int
benchmark_initrun()
{
if (pset_info(PS_MYID, NULL, &ncpu, cpus) < 0) {
perror("pset_info");
return (1);
}
return (0);
}
int
benchmark_initworker(void *tsd)
{
tsd_t *ts = (tsd_t *)tsd;
int i, j;
processorid_t cpu;
ts->ts_data = malloc(opts);
if (ts->ts_data == NULL) {
return (1);
}
(void) pthread_mutex_init(&ts->ts_lock, NULL);
if (processor_bind(P_LWPID, P_MYID,
cpu = cpus[(pthread_self() - 1) % ncpu],
NULL) < 0) {
perror("processor_bind:");
return (1);
}
(void) printf("# thread %d using processor %d\n", pthread_self(), cpu);
/*
* use lmbench style backwards stride
*/
for (i = 0; i < opts / sizeof (long); i++) {
j = i - 128;
if (j < 0)
j = j + opts / sizeof (long);
ts->ts_data[i] = (long *)&(ts->ts_data[j]);
}
thread_data[pthread_self() - 1] = ts;
return (0);
}
/*
* here we go in order for each thread, causing inherent serialization
* this is normally not a good idea, but in this case we're trying to
* measure cache-to-cache transfer times, and if we run threads in
* parallel we're likely to see saturation effects rather than cache-to-cache,
* esp. on wimpy memory platforms like P4.
*/
/*ARGSUSED*/
int
benchmark(void *tsd, result_t *res)
{
tsd_t *ts;
int i, j;
int count = opts / 128 / sizeof (long);
for (j = 0; j < lm_optB; j++)
for (i = 0; i < lm_optT; i++) {
ts = thread_data[i];
(void) pthread_mutex_lock(&ts->ts_lock);
ts->ts_result += traverse_ptrchain(
(long **)ts->ts_data, count, 0);
(void) pthread_mutex_unlock(&ts->ts_lock);
}
res->re_count = lm_optB * lm_optT * count;
return (0);
}
int
traverse_ptrchain(long **ptr, int count, int value)
{
int i;
for (i = 0; i < count; i += 10) {
*ptr = *ptr + value;
ptr = (long **)*ptr;
*ptr = *ptr + value;
ptr = (long **)*ptr;
*ptr = *ptr + value;
ptr = (long **)*ptr;
*ptr = *ptr + value;
ptr = (long **)*ptr;
*ptr = *ptr + value;
ptr = (long **)*ptr;
*ptr = *ptr + value;
ptr = (long **)*ptr;
*ptr = *ptr + value;
ptr = (long **)*ptr;
*ptr = *ptr + value;
ptr = (long **)*ptr;
*ptr = *ptr + value;
ptr = (long **)*ptr;
*ptr = *ptr + value;
ptr = (long **)*ptr;
*ptr = *ptr + value;
}
return ((int)*ptr); /* bogus return */
}
char *
benchmark_result()
{
static char result[256];
(void) sprintf(result, "%8ld ", opts);
return (result);
}