utility.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include "utility.h"
#include "initialize.h"
#include "statistics.h"
#include "streams_common.h"
#include "streams.h"
/*
* utility
*
* Overview
* utility.c contains the general purpose routines used in various locations
* throughout sort. It provides a number of interfaces that maintain local
* state relevant to this instance of sort. We discuss the more significant
* of these interfaces below.
*
* Output guard
* sort is one of the few Unix utilities that is capable of working "in
* place"; that is, sort can manipulate an input file and place its output in
* a file of the same name safely. This is handled in this implementation by
* the output guard facility. In the case of an interrupt or other fatal
* signal, sort essays to restore the original input file.
*
* Temporary file cleanup
* Similar to the output guard facility, sort cleans up its temporary files in
* the case of interruption (or normal exit, for that matter); this is handled
* by registering a list of file pointers for later use by the atexit handler.
*
* Temporary filename security
* sort protects against "open-through-link" security attacks by verifying
* that the selected temporary file name is unused. If the file name is in
* use, the pattern is readjusted until an available name pattern is
* discovered.
*
* Buffered I/O
* sort has a simple buffered I/O facility of its own, to facilitate writing
* data in large quantities (particularly for multibyte locales). cxwrite()
* is the base routine, while wxwrite(), which handles multibyte buffers, is
* built on top of cxwrite().
*/
#define XBUFFER_SIZE (32 * KILOBYTE)
#define EXIT_OK 0
#define EXIT_FAILURE 1
#define EXIT_ERROR 2
#define EXIT_INTERNAL 3
static int held_fd = -1;
static stream_t **cleanup_chain = NULL;
static char *output_guard_tempname = NULL;
static ssize_t output_guard_size = 0;
static char *output_guard_filename = NULL;
static int output_guard_copy_complete = 0;
static const char *default_tmpdir = "/var/tmp";
static const char *default_template = "/stmAAAXXXXXX";
static const char *default_template_count = ".00000000";
static char *current_tmpdir;
static char *current_template;
static const char PNAME_FMT[] = "%s: ";
static const char ERRNO_FMT[] = ": %s\n";
static const char *pname = "sort";
void
swap(void **a, void **b)
{
void *t;
t = *a;
*a = *b;
*b = t;
__S(stats_incr_swaps());
}
/*
* Temporary file name template handling.
*/
static void
reset_file_template()
{
struct stat s;
do {
(void) strcpy(current_template, current_tmpdir);
(void) strcat(current_template, default_template);
(void) mktemp(current_template);
(void) strcat(current_template, default_template_count);
} while (lstat(current_template, &s) != -1);
}
int
bump_file_template()
{
struct stat s;
int n = strlen(current_template);
int i;
for (i = n - 1; isdigit((uchar_t)current_template[i]); i--) {
current_template[i]++;
if (current_template[i] > '9')
current_template[i] = '0';
else
break;
}
if (!isdigit((uchar_t)current_template[i])) {
/*
* Template has been exhausted, so reset.
*/
reset_file_template();
}
if (lstat(current_template, &s) == 0) {
/*
* Our newly bumped template has been anticipated; reset to
* avoid possible "link-through" attack.
*/
reset_file_template();
}
return (0);
}
void
set_file_template(char **T)
{
struct stat s;
int check_tmpdir = 0;
if (*T != NULL) {
current_tmpdir = strdup(*T);
check_tmpdir = 1;
} else if ((current_tmpdir = getenv("TMPDIR")) != NULL) {
check_tmpdir = 1;
} else {
current_tmpdir = (char *)default_tmpdir;
}
/*
* Check that the temporary directory given exists, and is a directory.
*/
if (check_tmpdir) {
if (stat(current_tmpdir, &s) != 0) {
warn(gettext("cannot stat temporary directory %s"),
current_tmpdir);
current_tmpdir = (char *)default_tmpdir;
} else if (!S_ISDIR(s.st_mode)) {
warn(gettext("%s is not a directory; "
"using default temporary directory"),
current_tmpdir);
current_tmpdir = (char *)default_tmpdir;
}
}
ASSERT(current_tmpdir != NULL);
current_template = safe_realloc(NULL, strlen(current_tmpdir)
+ strlen(default_template) + strlen(default_template_count) + 1);
reset_file_template();
}
char *
get_file_template()
{
return (current_template);
}
/*
* Output guard routines.
*/
void
establish_output_guard(sort_t *S)
{
struct stat output_stat;
if (S->m_output_to_stdout)
return;
if (stat(S->m_output_filename, &output_stat) == 0) {
stream_t *strp = S->m_input_streams;
while (strp != NULL) {
/*
* We needn't protect an empty file.
*/
if (!(strp->s_status & STREAM_NOTFILE) &&
strp->s_dev == output_stat.st_dev &&
strp->s_ino == output_stat.st_ino &&
strp->s_filesize > 0) {
output_guard_filename = S->m_output_filename;
output_guard_size = strp->s_filesize;
ASSERT(output_guard_filename != NULL);
if (bump_file_template() < 0)
die(EMSG_TEMPORARY);
if ((strp->s_filename = output_guard_tempname =
strdup(get_file_template())) == NULL)
die(EMSG_ALLOC);
xcp(output_guard_tempname,
output_guard_filename, output_guard_size);
output_guard_copy_complete = 1;
return;
}
strp = strp->s_next;
}
}
}
void
remove_output_guard()
{
if (output_guard_tempname && unlink(output_guard_tempname) == -1)
warn(gettext("unable to unlink %s"), output_guard_tempname);
output_guard_tempname = NULL;
}
void
set_cleanup_chain(stream_t **strp)
{
ASSERT(strp != NULL);
cleanup_chain = strp;
}
/*
* atexit_handler() cleans up any temporary files outstanding after a fatal
* signal, a call to die() or at exit(). To preserve the input file under low
* storage conditions (and both the output file and the temporary files are
* directed at the same filesystem), we remove all temporary files but the
* output guard first, and then restore the original file. Of course, this is
* not foolproof, as another writer may have exhausted storage.
*/
void
atexit_handler()
{
stream_t *strp;
if (cleanup_chain && *cleanup_chain)
for (strp = *cleanup_chain; strp != NULL; strp = strp->s_next)
stream_unlink_temporary(strp);
if (output_guard_tempname) {
if (output_guard_copy_complete)
xcp(output_guard_filename, output_guard_tempname,
output_guard_size);
remove_output_guard();
}
__S(stats_display());
}
size_t
strtomem(char *S)
{
const char *format_str = "%lf%c";
double val = 0.0;
size_t retval;
char units = 'k';
size_t phys_total = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
if (sscanf(S, format_str, &val, &units) < 1 || val < 0)
return (0);
if (units == '%') {
if (val < 0 || val > 100)
return (0);
val *= phys_total / 100;
} else
switch (units) {
case 't' : /* terabytes */
case 'T' :
val *= 1024;
/*FALLTHROUGH*/
case 'g' : /* gigabytes */
case 'G' :
val *= 1024;
/*FALLTHROUGH*/
case 'm' : /* megabytes */
case 'M' :
val *= 1024;
/*FALLTHROUGH*/
case 'k' : /* kilobytes */
case 'K' :
val *= 1024;
/*FALLTHROUGH*/
case 'b' : /* bytes */
case 'B' :
break;
default :
/*
* default is kilobytes
*/
val *= 1024;
break;
}
if (val > SIZE_MAX)
return (0);
retval = (size_t)val;
return (retval);
}
size_t
available_memory(size_t mem_limit)
{
size_t phys_avail = sysconf(_SC_AVPHYS_PAGES) * sysconf(_SC_PAGESIZE);
size_t avail;
if (mem_limit != 0) {
#ifdef DEBUG
/*
* In the debug case, we want to test the temporary files
* handling, so no lower bound on the memory limit is imposed.
*/
avail = mem_limit;
#else
avail = MAX(64 * KILOBYTE, mem_limit);
#endif /* DEBUG */
} else {
avail = MAX(64 * KILOBYTE, MIN(AV_MEM_MULTIPLIER * phys_avail /
AV_MEM_DIVISOR, 16 * MEGABYTE));
}
__S(stats_set_available_memory(avail));
return (avail);
}
void
set_memory_ratio(sort_t *S, int *numerator, int *denominator)
{
if (S->m_c_locale) {
*numerator = CHAR_AVG_LINE;
*denominator = sizeof (line_rec_t) + sizeof (line_rec_t *) +
CHAR_AVG_LINE + CHAR_AVG_LINE;
return;
}
if (S->m_single_byte_locale) {
*numerator = CHAR_AVG_LINE;
*denominator = sizeof (line_rec_t) + sizeof (line_rec_t *) +
CHAR_AVG_LINE + XFRM_MULTIPLIER * CHAR_AVG_LINE;
return;
}
*numerator = WCHAR_AVG_LINE;
*denominator = sizeof (line_rec_t) + sizeof (line_rec_t *) +
WCHAR_AVG_LINE + WCHAR_AVG_LINE;
}
void *
safe_realloc(void *ptr, size_t sz)
{
/*
* safe_realloc() is not meant as an alternative free() mechanism--we
* disallow reallocations to size zero.
*/
ASSERT(sz != 0);
if ((ptr = realloc(ptr, sz)) != NULL)
return (ptr);
die(gettext("unable to reallocate buffer"));
/*NOTREACHED*/
return (NULL); /* keep gcc happy */
}
void
safe_free(void *ptr)
{
if (ptr)
free(ptr);
}
void *
xzmap(void *addr, size_t len, int prot, int flags, off_t off)
{
void *pa;
pa = mmap(addr, len, prot, flags | MAP_ANON, -1, off);
if (pa == MAP_FAILED)
die(gettext("can't mmap anonymous memory"));
return (pa);
}
void
usage()
{
(void) fprintf(stderr,
gettext("usage: %s [-cmu] [-o output] [-T directory] [-S mem]"
" [-z recsz]\n\t[-dfiMnr] [-b] [-t char] [-k keydef]"
" [+pos1 [-pos2]] files...\n"), CMDNAME);
exit(E_USAGE);
}
/*
* hold_file_descriptor() and release_file_descriptor() reserve a single file
* descriptor entry for later use. We issue the hold prior to any loop that has
* an exit condition based on the receipt of EMFILE from an open() call; once we
* have exited, we can release, typically prior to opening a file for output.
*/
void
hold_file_descriptor()
{
ASSERT(held_fd == -1);
if ((held_fd = open("/dev/null", O_RDONLY)) == -1)
die(gettext("insufficient available file descriptors\n"));
}
void
release_file_descriptor()
{
ASSERT(held_fd != -1);
(void) close(held_fd);
held_fd = -1;
}
void
copy_line_rec(const line_rec_t *a, line_rec_t *b)
{
(void) memcpy(b, a, sizeof (line_rec_t));
}
void
trip_eof(FILE *f)
{
if (feof(f))
return;
(void) ungetc(fgetc(f), f);
}
/*
* int cxwrite(int, char *, size_t)
*
* Overview
* cxwrite() implements a buffered version of fwrite(ptr, nbytes, 1, .) on
* file descriptors. It returns -1 in the case that the write() fails to
* write the current buffer contents. cxwrite() must be flushed before being
* applied to a new file descriptor.
*
* Return values
* 0 on success, -1 on error.
*/
int
cxwrite(int fd, char *ptr, size_t nbytes)
{
static char buffer[XBUFFER_SIZE];
static size_t offset = 0;
size_t mbytes;
if (ptr == NULL) {
errno = 0;
while (offset -= write(fd, buffer, offset)) {
if (errno)
break;
}
if (offset)
return (-1);
return (0);
}
while (nbytes != 0) {
if (offset + nbytes > XBUFFER_SIZE)
mbytes = XBUFFER_SIZE - offset;
else
mbytes = nbytes;
(void) memcpy(buffer + offset, ptr, mbytes);
nbytes -= mbytes;
offset += mbytes;
ptr += mbytes;
if (nbytes) {
errno = 0;
while (offset -= write(fd, buffer, offset)) {
if (errno)
break;
}
if (offset)
return (-1);
}
}
return (0);
}
/*
* int wxwrite(int, wchar_t *)
*
* Overview
* wxwrite() implements a buffered write() function for null-terminated wide
* character buffers with similar calling semantics to cxwrite(). It returns
* -1 in the case that it fails to write the current buffer contents.
* wxwrite() must be flushed before being applied to a new file descriptor.
*
* Return values
* 0 on success, -1 on error.
*/
int
wxwrite(int fd, wchar_t *ptr)
{
static char *convert_buffer;
static size_t convert_bufsize = 1024;
size_t req_bufsize;
if (ptr == NULL)
return (cxwrite(NULL, 0, 1));
if (convert_buffer == NULL)
convert_buffer = safe_realloc(NULL, convert_bufsize);
/*
* We use wcstombs(NULL, ., .) to verify that we have an adequate
* buffer size for the conversion. Since this buffer was converted into
* wide character format earlier, we can safely assume that the buffer
* can be converted back to the external multibyte form.
*/
req_bufsize = wcstombs(NULL, ptr, convert_bufsize);
if (req_bufsize > convert_bufsize) {
convert_bufsize = req_bufsize + 1;
convert_buffer = safe_realloc(convert_buffer, convert_bufsize);
}
(void) wcstombs(convert_buffer, ptr, convert_bufsize);
return (cxwrite(fd, convert_buffer, req_bufsize));
}
int
xstreql(const char *a, const char *b)
{
return (strcmp(a, b) == 0);
}
int
xstrneql(const char *a, const char *b, const size_t l)
{
return (strncmp(a, b, l) == 0);
}
char *
xstrnchr(const char *S, const int c, const size_t n)
{
const char *eS = S + n;
do {
if (*S == (char)c)
return ((char *)S);
} while (++S < eS);
return (NULL);
}
void
xstrninv(char *s, ssize_t start, ssize_t length)
{
ssize_t i;
for (i = start; i < start + length; i++)
s[i] = UCHAR_MAX - s[i];
}
int
xwcsneql(const wchar_t *a, const wchar_t *b, const size_t length)
{
return (wcsncmp(a, b, length) == 0);
}
wchar_t *
xwsnchr(const wchar_t *ws, const wint_t wc, const size_t n)
{
const wchar_t *ews = ws + n;
do {
if (*ws == (wchar_t)wc)
return ((wchar_t *)ws);
} while (++ws < ews);
return (NULL);
}
void
xwcsninv(wchar_t *s, ssize_t start, ssize_t length)
{
ssize_t i;
for (i = start; i < start + length; i++)
s[i] = WCHAR_MAX - s[i];
}
#ifdef _LITTLE_ENDIAN
void
xwcsntomsb(wchar_t *s, ssize_t length)
{
ssize_t i;
ASSERT(sizeof (wchar_t) == sizeof (uint32_t));
for (i = 0; i < length; i++, s++) {
char *t = (char *)s;
char u;
u = *t;
*t = *(t + 3);
*(t + 3) = u;
u = *(t + 1);
*(t + 1) = *(t + 2);
*(t + 2) = u;
}
}
#endif /* _LITTLE_ENDIAN */
wchar_t *
xmemwchar(wchar_t *s, wchar_t w, ssize_t length)
{
ssize_t i = length;
while (--i > 0) {
if (*s == w)
return (s);
s++;
}
return (NULL);
}
void
xcp(char *dst, char *src, off_t size)
{
int fd_in, fd_out;
void *mm_in;
size_t chunksize = 2 * MEGABYTE;
int i;
ssize_t nchunks = size / chunksize;
ssize_t lastchunk = size % chunksize;
if (dst == NULL || src == NULL)
return;
if ((fd_in = open(src, O_RDONLY)) < 0)
die(EMSG_OPEN, src);
if ((fd_out = open(dst, O_RDWR | O_CREAT | O_TRUNC, OUTPUT_MODE)) < 0)
die(EMSG_OPEN, dst);
for (i = 0; i < nchunks; i++) {
if ((mm_in = mmap(0, chunksize, PROT_READ, MAP_SHARED, fd_in,
i * chunksize)) == MAP_FAILED)
die(EMSG_MMAP, src);
if (write(fd_out, mm_in, chunksize) != chunksize)
die(EMSG_WRITE, dst);
(void) munmap(mm_in, chunksize);
}
if (lastchunk) {
if ((mm_in = mmap(0, lastchunk, PROT_READ, MAP_SHARED, fd_in,
nchunks * chunksize)) == MAP_FAILED)
die(EMSG_MMAP, src);
if (write(fd_out, mm_in, lastchunk) != lastchunk)
die(EMSG_WRITE, dst);
(void) munmap(mm_in, lastchunk);
}
(void) close(fd_in);
if (close(fd_out) == -1)
die(EMSG_CLOSE, dst);
}
/*PRINTFLIKE1*/
void
warn(const char *format, ...)
{
int err = errno;
va_list alist;
if (pname != NULL)
(void) fprintf(stderr, gettext(PNAME_FMT), pname);
va_start(alist, format);
(void) vfprintf(stderr, format, alist);
va_end(alist);
if (strrchr(format, '\n') == NULL)
(void) fprintf(stderr, gettext(ERRNO_FMT), strerror(err));
}
/*PRINTFLIKE1*/
void
die(const char *format, ...)
{
int err = errno;
va_list alist;
if (pname != NULL)
(void) fprintf(stderr, gettext(PNAME_FMT), pname);
va_start(alist, format);
(void) vfprintf(stderr, format, alist);
va_end(alist);
if (strrchr(format, '\n') == NULL)
(void) fprintf(stderr, gettext(ERRNO_FMT), strerror(err));
exit(E_ERROR);
}
#ifdef DEBUG
/*
* pprintc() is called only by xdump().
*/
#define BYTES_PER_LINE 16
static void
pprintc(FILE *fp, char c)
{
if (isspace((uchar_t)c))
(void) fprintf(fp, " ");
else if (isprint((uchar_t)c))
(void) fprintf(fp, "%c", c);
else
(void) fprintf(fp, ".");
}
static void
pprintwc(FILE *fp, wchar_t c)
{
if (iswspace(c))
(void) fprintf(fp, " ");
else if (iswprint(c))
(void) fprintf(fp, "%wc", c);
else
(void) fprintf(fp, ".");
}
/*
* xdump() is used only for debugging purposes.
*/
void
xdump(FILE *fp, uchar_t *buf, size_t bufsize, int wide)
{
int i;
size_t nc = 0;
uchar_t d[BYTES_PER_LINE];
for (; nc < bufsize; buf++) {
d[nc % BYTES_PER_LINE] = *buf;
if (nc % BYTES_PER_LINE == 0) {
(void) fprintf(fp, "%08x:", nc);
}
(void) fprintf(fp, " %02x", *buf);
nc++;
if (nc % BYTES_PER_LINE == 0) {
(void) fprintf(fp, " ");
if (wide) {
for (i = 0; i < BYTES_PER_LINE;
i += sizeof (wchar_t))
pprintwc(fp, *(wchar_t *)(d + i));
} else {
for (i = 0; i < BYTES_PER_LINE; i++)
pprintc(fp, d[i]);
}
(void) fprintf(fp, "\n");
}
}
for (i = nc % BYTES_PER_LINE; i < BYTES_PER_LINE; i++)
(void) fprintf(fp, " ");
(void) fprintf(fp, " ");
if (wide) {
for (i = 0; i < nc % BYTES_PER_LINE; i += sizeof (wchar_t))
pprintwc(fp, *(wchar_t *)(d + i));
} else {
for (i = 0; i < nc % BYTES_PER_LINE; i++)
pprintc(fp, d[i]);
}
(void) fprintf(fp, "\n");
}
#endif /* DEBUG */