od.c revision 84441f85b19f6b8080883f30109e58e43c893709
/*
* This file and its contents are supplied under the terms of the
* Common Development and Distribution License ("CDDL"), version 1.0.
* You may only use this file in accordance with the terms of version
* 1.0 of the CDDL.
*
* A full copy of the text of the CDDL should have accompanied this
* source. A copy of the CDDL is also available via the Internet
* http://www.illumos.org/license/CDDL.
*/
/*
* Copyright 2010 Nexenta Systems, Inc. All rights reserved.
*/
/*
* od - octal dump. Not really just octal anymore; read the POSIX
* specification for it -- its more complex than you think!
*
* NB: We followed the POSIX semantics fairly strictly, where the
* legacy code's behavior was in conflict. In many cases the legacy
* Solaris code was so completely broken as to be completely unusable.
* (For example, the long double support was broken beyond
* imagination!) Note that GNU coreutils violates POSIX in a few
* interesting ways, such as changing the numbering of the addresses
* when skipping. (Address starts should always be at 0, according to
* the sample output in the Open Group man page.)
*/
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <string.h>
#include <err.h>
#include <wchar.h>
#include <locale.h>
#include <unistd.h>
#include <sys/stat.h>
#define _(x) gettext(x)
#ifndef TEXT_DOMAIN
#define TEXT_DOMAIN "SYS_TEST"
#endif
/* address format */
static char *afmt = "%07llo";
static char *cfmt = " ";
static FILE *input = NULL;
static size_t lcm = 1;
static size_t blocksize = 16;
static int numfiles = 0;
static int curfile = 0;
static char **files = NULL;
static off_t limit = -1;
/*
* This structure describes our ring buffer. Its always a power of 2
* in size to make wrap around calculations fast using a mask instead
* of doing modulo.
*
* The size is calculated thusly: We need three "blocks" of data, as
* we process a block at a time (one block == one line of od output.)
*
* We need lookahead of an extra block to support multibyte chars. We
* also have a look behind so that we can avoid printing lines that
* are identical to what we've already printed. Finally, we need the
* current block.
*
* The block size is determined by the least common multiple of the
* data items being displayed. Usually it will be 16, but sometimes
* it is 24 (when 12-byte long doubles are presented.)
*
* The data buffer is allocaed via memalign to make sure it is
* properly aligned.
*/
typedef struct buffer {
char *data; /* data buffer */
int prod; /* producer index */
int cons; /* consumer index */
int mask; /* buffer size - 1, wraparound index */
int navail; /* total bytes avail */
} buffer_t;
/*
* This structure is used to provide information on a specific output
* format. We link them together in a list representing the output
* formats that the user has selected.
*/
typedef struct output {
int width; /* bytes consumed per call */
void (*func)(buffer_t *, int); /* output function */
struct output *next; /* link node */
} output_t;
/*
* Specifiers
*/
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef char s8;
typedef short s16;
typedef int s32;
typedef long long s64;
typedef float fF;
typedef double fD;
typedef long double fL;
static void
usage(void)
{
(void) fprintf(stderr, _("usage: od [-bcCdDfFoOsSvxX] "
"[-t types ]... [-A base] [-j skip] [-N count] [file]...\n"));
exit(1);
}
#define DECL_GET(typ) \
static typ \
get_ ## typ(buffer_t *b, int index) \
{ \
typ val = *(typ *)(void *)(b->data + index); \
return (val); \
}
DECL_GET(u8)
DECL_GET(u16)
DECL_GET(u32)
DECL_GET(u64)
DECL_GET(s8)
DECL_GET(s16)
DECL_GET(s32)
DECL_GET(s64)
DECL_GET(fF)
DECL_GET(fD)
DECL_GET(fL)
#define DECL_OUT(nm, typ, fmt) \
static void \
do_ ## nm(buffer_t *buf, int index) \
{ \
typ v = get_ ## typ(buf, index); \
(void) printf(fmt, v); \
} \
\
static output_t output_ ## nm = { \
sizeof (typ), do_ ## nm \
};
DECL_OUT(oct_b, u8, " %03o")
DECL_OUT(oct_w, u16, " %06ho")
DECL_OUT(oct_d, u32, " %011o")
DECL_OUT(oct_q, u64, " %022llo")
DECL_OUT(dec_b, u8, " %03u")
DECL_OUT(dec_w, u16, " %05hu")
DECL_OUT(dec_d, u32, " %010u")
DECL_OUT(dec_q, u64, " %020llu")
DECL_OUT(sig_b, s8, " %03d")
DECL_OUT(sig_w, s16, " %6.05hd")
DECL_OUT(sig_d, s32, " %11.010d")
DECL_OUT(sig_q, s64, " %20.019lld")
DECL_OUT(hex_b, u8, " %02x")
DECL_OUT(hex_w, u16, " %04hx")
DECL_OUT(hex_d, s32, " %08x")
DECL_OUT(hex_q, s64, " %016llx")
DECL_OUT(float, fF, " %14.7e")
DECL_OUT(double, fD, " %21.14e")
DECL_OUT(ldouble, fL, " %24.14Le")
static char *ascii[] = {
"nul", "soh", "stx", "etx", "eot", "enq", "ack", " be",
" bs", " ht", " lf", " vt", " ff", " cr", " so", " si",
"dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb",
"can", " em", "sub", "esc", " fs", " gs", " rs", " us",
" sp", " !", " \"", " #", " $", " %", " &", " '",
" (", " )", " *", " +", " ,", " -", " .", " /",
" 0", " 1", " 2", " 3", " 4", " 5", " 6", " 7",
" 8", " 9", " :", " ;", " <", " =", " >", " ?",
" @", " A", " B", " C", " D", " E", " F", " G",
" H", " I", " J", " K", " L", " M", " N", " O",
" P", " Q", " R", " S", " T", " U", " V", " W",
" X", " Y", " Z", " [", " \\", " ]", " ^", " _",
" `", " a", " b", " c", " d", " e", " f", " g",
" h", " i", " j", " k", " l", " m", " n", " o",
" p", " q", " r", " s", " t", " u", " v", " w",
" x", " y", " z", " {", " |", " }", " ~", "del"
};
static void
do_ascii(buffer_t *buf, int index)
{
uint8_t v = get_u8(buf, index);
(void) fputc(' ', stdout);
(void) fputs(ascii[v & 0x7f], stdout);
}
static output_t output_ascii = {
1, do_ascii,
};
static void
do_char(buffer_t *buf, int index)
{
static int nresid = 0;
static int printable = 0;
int cnt;
int avail;
int nb;
char scratch[10];
wchar_t wc;
int which;
uint8_t v = get_u8(buf, index);
/*
* If there were residual bytes from an earlier
* character, then just display the ** continuation
* indication.
*/
if (nresid) {
if (printable) {
(void) fputs(" **", stdout);
} else {
(void) printf(" %03o", v);
}
nresid--;
return;
}
/*
* Peek ahead up to MB_CUR_MAX characters. This has to be
* done carefully because we might need to look into the next
* block to really know for sure.
*/
scratch[0] = v;
avail = buf->navail;
if (avail > MB_CUR_MAX)
avail = MB_CUR_MAX;
for (cnt = 1, which = index + 1; cnt < avail; cnt++, which++) {
scratch[cnt] = buf->data[which & buf->mask];
}
/* now see if the value is a real character */
nresid = 0;
wc = 0;
nb = mbtowc(&wc, scratch, avail);
if (nb < 0) {
(void) printf(" %03o", v);
return;
}
if (nb == 0) {
(void) fputs(" \\0", stdout);
return;
}
nresid = nb - 1;
if (nb && iswprint(wc)) {
scratch[nb] = 0;
(void) fputs(" ", stdout);
(void) fputs(scratch, stdout);
printable = 1;
return;
}
printable = 0;
if (wc == 0) {
(void) fputs(" \\0", stdout);
} else if (wc == '\b') {
(void) fputs(" \\b", stdout);
} else if (wc == '\f') {
(void) fputs(" \\f", stdout);
} else if (wc == '\n') {
(void) fputs(" \\n", stdout);
} else if (wc == '\r') {
(void) fputs(" \\r", stdout);
} else if (wc == '\t') {
(void) fputs(" \\t", stdout);
} else {
(void) printf(" %03o", v);
}
}
static output_t output_char = {
1, do_char,
};
/*
* List of output formatting structures.
*/
static output_t *head = NULL;
static output_t **tailp = &head;
static void
add_out(output_t *src)
{
output_t *out;
int m;
if ((out = calloc(1, sizeof (*src))) == NULL) {
err(1, "malloc");
}
m = lcm;
while ((m % src->width) != 0) {
m += lcm;
}
lcm = m;
blocksize = lcm;
while (blocksize < 16)
blocksize *= 2;
(void) memcpy(out, src, sizeof (*src));
*tailp = out;
tailp = &out->next;
}
static FILE *
next_input(void)
{
for (;;) {
if (curfile >= numfiles)
return (NULL);
if (input != NULL) {
if ((input = freopen(files[curfile], "r", input)) !=
NULL) {
curfile++;
return (input);
}
} else {
if ((input = fopen(files[curfile], "r")) != NULL) {
curfile++;
return (input);
}
}
warn("open: %s", files[curfile]);
curfile++;
}
}
static void
refill(buffer_t *b)
{
int n;
int want;
int zero;
/*
* If we have 2 blocks of bytes available, we're done. Note
* that each iteration usually loads up 16 bytes, unless we
* run out of data.
*/
while ((input != NULL) && (b->navail < (2 * blocksize))) {
/* we preload the next one in advance */
if (limit == 0) {
(void) fclose(input);
input = NULL;
continue;
}
/* we want to read a whole block if possible */
want = blocksize;
if ((limit >= 0) && (want > limit)) {
want = limit;
}
zero = blocksize;
while (want && input) {
int c;
b->prod &= b->mask;
c = (b->prod + want > (b->mask + 1)) ?
b->mask - b->prod :
want;
n = fread(b->data + b->prod, 1, c, input);
if (n < 0) {
warn("read: %s",
files ? files[curfile-1] : "stdin");
input = next_input();
continue;
}
if (n == 0) {
input = next_input();
continue;
}
if (limit >= 0)
limit -= n;
b->navail += n;
b->prod += n;
want -= n;
zero -= n;
}
while (zero) {
b->data[b->prod & b->mask] = 0;
b->prod++;
b->prod &= b->mask;
zero--;
}
}
}
#define STR1 "C1"
#define STR2 "S2"
#ifdef _LP64
#define STR8 "L8"
#define STR4 "I4"
#else
#define STR8 "8"
#define STR4 "IL4"
#endif
static void
do_type_string(char *typestr)
{
if (*typestr == 0) {
errx(1, _("missing type string"));
}
while (*typestr) {
switch (*typestr) {
case 'a':
typestr++;
add_out(&output_ascii);
break;
case 'c':
add_out(&output_char);
typestr++;
break;
case 'f':
typestr++;
switch (*typestr) {
case 'F':
case '4':
add_out(&output_float);
typestr++;
break;
case '8':
case 'D':
add_out(&output_double);
typestr++;
break;
case 'L':
add_out(&output_ldouble);
typestr++;
break;
default:
add_out(&output_float);
break;
}
break;
case 'd':
typestr++;
if (strchr(STR1, *typestr)) {
typestr++;
add_out(&output_sig_b);
} else if (strchr(STR2, *typestr)) {
typestr++;
add_out(&output_sig_w);
} else if (strchr(STR4, *typestr)) {
typestr++;
add_out(&output_sig_d);
} else if (strchr(STR8, *typestr)) {
typestr++;
add_out(&output_sig_q);
} else {
add_out(&output_sig_d);
}
break;
case 'u':
typestr++;
if (strchr(STR1, *typestr)) {
typestr++;
add_out(&output_dec_b);
} else if (strchr(STR2, *typestr)) {
typestr++;
add_out(&output_dec_w);
} else if (strchr(STR4, *typestr)) {
typestr++;
add_out(&output_dec_d);
} else if (strchr(STR8, *typestr)) {
typestr++;
add_out(&output_dec_q);
} else {
add_out(&output_dec_d);
}
break;
case 'o':
typestr++;
if (strchr(STR1, *typestr)) {
typestr++;
add_out(&output_oct_b);
} else if (strchr(STR2, *typestr)) {
typestr++;
add_out(&output_oct_w);
} else if (strchr(STR4, *typestr)) {
typestr++;
add_out(&output_oct_d);
} else if (strchr(STR8, *typestr)) {
typestr++;
add_out(&output_oct_q);
} else {
add_out(&output_oct_d);
}
break;
case 'x':
typestr++;
if (strchr(STR1, *typestr)) {
typestr++;
add_out(&output_hex_b);
} else if (strchr(STR2, *typestr)) {
typestr++;
add_out(&output_hex_w);
} else if (strchr(STR4, *typestr)) {
typestr++;
add_out(&output_hex_d);
} else if (strchr(STR8, *typestr)) {
typestr++;
add_out(&output_hex_q);
} else {
add_out(&output_hex_d);
}
break;
default:
errx(1, _("unrecognized type string character: %c"),
*typestr);
exit(1);
}
}
}
int
main(int argc, char **argv)
{
int c;
int i;
buffer_t buffer;
boolean_t first = B_TRUE;
boolean_t doall = B_FALSE;
boolean_t same = B_FALSE;
boolean_t newarg = B_FALSE;
off_t offset = 0;
off_t skip = 0;
char *eptr;
char *offstr = 0;
input = stdin;
(void) setlocale(LC_ALL, "");
(void) textdomain(TEXT_DOMAIN);
while ((c = getopt(argc, argv, "A:bCcdDfFj:N:oOsSxXvt:")) != EOF) {
switch (c) {
case 'A':
newarg = B_TRUE;
if (strlen(optarg) > 1) {
afmt = NULL;
}
switch (*optarg) {
case 'o':
afmt = "%07llo";
cfmt = " ";
break;
case 'd':
afmt = "%07lld";
cfmt = " ";
break;
case 'x':
afmt = "%07llx";
cfmt = " ";
break;
case 'n':
/*
* You could argue that the code should
* use the same 7 spaces. Legacy uses 8
* though. Oh well. Better to avoid
* gratuitous change.
*/
afmt = " ";
cfmt = " ";
break;
default:
afmt = NULL;
break;
}
if (strlen(optarg) != 1) {
afmt = NULL;
}
if (afmt == NULL)
warnx(_("invalid address base, "
"must be o, d, x, or n"));
break;
case 'b':
add_out(&output_oct_b);
break;
case 'c':
case 'C':
add_out(&output_char);
break;
case 'f':
add_out(&output_float);
break;
case 'F':
add_out(&output_double);
break;
case 'd':
add_out(&output_dec_w);
break;
case 'D':
add_out(&output_dec_d);
break;
case 't':
newarg = B_TRUE;
do_type_string(optarg);
break;
case 'o':
add_out(&output_oct_w);
break;
case 'O':
add_out(&output_oct_d);
break;
case 's':
add_out(&output_sig_w);
break;
case 'S':
add_out(&output_sig_d);
break;
case 'x':
add_out(&output_hex_w);
break;
case 'X':
add_out(&output_hex_d);
break;
case 'v':
doall = B_TRUE;
break;
case 'j':
newarg = B_TRUE;
skip = strtoll(optarg, &eptr, 0);
if (*eptr == 'b') {
skip <<= 9; /* 512 bytes */
eptr++;
} else if (*eptr == 'k') {
skip <<= 10; /* 1k */
eptr++;
} else if (*eptr == 'm') {
skip <<= 20; /* 1m */
eptr++;
} else if (*eptr == 'g') {
skip <<= 30; /* 1g */
eptr++;
}
if ((skip < 0) || (eptr[0] != 0)) {
warnx(_("invalid skip count '%s' specified"),
optarg);
exit(1);
}
break;
case 'N':
newarg = B_TRUE;
limit = strtoll(optarg, &eptr, 0);
/*
* POSIX doesn't specify this, but I think these
* may be helpful.
*/
if (*eptr == 'b') {
limit <<= 9;
eptr++;
} else if (*eptr == 'k') {
limit <<= 10;
eptr++;
} else if (*eptr == 'm') {
limit <<= 20;
eptr++;
} else if (*eptr == 'g') {
limit <<= 30;
eptr++;
}
if ((limit < 0) || (eptr[0] != 0)) {
warnx(_("invalid byte count '%s' specified"),
optarg);
exit(1);
}
break;
default:
usage();
break;
}
}
/* this finds the smallest power of two size we can use */
buffer.mask = (1 << (ffs(blocksize * 3) + 1)) - 1;
buffer.data = memalign(16, buffer.mask + 1);
if (buffer.data == NULL) {
err(1, "memalign");
}
/*
* Wow. This option parsing is hideous.
*
* If the we've not seen a new option, and there is just one
* operand, if it starts with a "+", then treat it as an
* offset. Otherwise if two operands, and the second operand
* starts with + or a digit, then it is an offset.
*/
if (!newarg) {
if (((argc - optind) == 1) && (argv[optind][0] == '+')) {
offstr = argv[optind];
argc--;
} else if (((argc - optind) == 2) &&
(strchr("+0123456789", (argv[optind + 1][0])) != NULL)) {
offstr = argv[optind + 1];
argc--;
}
}
if (offstr) {
int base = 0;
int mult = 1;
int l;
if (*offstr == '+') {
offstr++;
}
l = strlen(offstr);
if ((strncmp(offstr, "0x", 2) == 0)) {
afmt = "%07llx";
base = 16;
offstr += 2;
if (offstr[l - 1] == 'B') {
offstr[l - 1] = 0;
l--;
mult = 512;
}
} else {
base = 8;
afmt = "%07llo";
if ((offstr[l - 1] == 'B') || (offstr[l - 1] == 'b')) {
offstr[l - 1] = 0;
l--;
mult = 512;
}
if (offstr[l - 1] == '.') {
offstr[l - 1] = 0;
base = 10;
afmt = "%07lld";
}
}
skip = strtoll(offstr, &eptr, base);
if (*eptr != '\0') {
errx(1, _("invalid offset string specified"));
}
skip *= mult;
offset += skip;
}
/*
* Allocate an array for all the input files.
*/
if (argc > optind) {
files = calloc(sizeof (char *), argc - optind);
for (i = 0; i < argc - optind; i++) {
files[i] = argv[optind + i];
numfiles++;
}
input = next_input();
} else {
input = stdin;
}
/*
* We need to seek ahead. fseek would be faster.
*/
while (skip && (input != NULL)) {
struct stat sbuf;
/*
* Only fseek() on regular files. (Others
* we have to read().
*/
if (fstat(fileno(input), &sbuf) < 0) {
warn("fstat: %s", files[curfile-1]);
input = next_input();
continue;
}
if (S_ISREG(sbuf.st_mode)) {
/*
* No point in seeking a file that is too
* short to begin with.
*/
if (sbuf.st_size < skip) {
skip -= sbuf.st_size;
input = next_input();
continue;
}
if (fseeko(input, skip, SEEK_SET) < 0) {
err(1, "fseek:%s", files[curfile-1]);
}
/* Done seeking. */
skip = 0;
break;
}
/*
* fgetc seems like it would be slow, but it uses
* buffered I/O, so it should be fast enough.
*/
flockfile(input);
while (skip) {
if (getc_unlocked(input) == EOF) {
funlockfile(input);
if (ferror(input)) {
warn("read: %s", files[curfile-1]);
}
input = next_input();
if (input != NULL) {
flockfile(input);
}
break;
}
skip--;
}
if (input != NULL)
funlockfile(input);
}
if (head == NULL) {
add_out(&output_oct_w);
}
buffer.navail = 0;
buffer.prod = 0;
buffer.cons = 0;
for (refill(&buffer); buffer.navail > 0; refill(&buffer)) {
output_t *out;
int mx;
int j, k;
/*
* If this buffer was the same as last, then just
* dump an asterisk.
*/
if ((!first) && (buffer.navail >= blocksize) && (!doall)) {
j = buffer.cons;
k = j - blocksize;
for (i = 0; i < blocksize; i++) {
if (buffer.data[j & buffer.mask] !=
buffer.data[k & buffer.mask]) {
break;
}
j++;
k++;
}
if (i == blocksize) {
if (!same) {
(void) fputs("*\n", stdout);
same = B_TRUE;
}
buffer.navail -= blocksize;
offset += blocksize;
buffer.cons += blocksize;
buffer.cons &= buffer.mask;
continue;
}
}
first = B_FALSE;
same = B_FALSE;
mx = (buffer.navail > blocksize) ? blocksize : buffer.navail;
for (out = head; out != NULL; out = out->next) {
if (out == head) {
/*LINTED E_SEC_PRINTF_VAR_FMT*/
(void) printf(afmt, offset);
} else {
(void) fputs(cfmt, stdout);
}
for (i = 0, j = buffer.cons; i < mx; i += out->width) {
out->func(&buffer, j);
j += out->width;
j &= buffer.mask;
}
(void) fputs("\n", stdout);
}
buffer.cons += mx;
buffer.cons &= buffer.mask;
offset += mx;
buffer.navail -= mx;
}
/*LINTED E_SEC_PRINTF_VAR_FMT*/
(void) printf(afmt, offset);
(void) fputs("\n", stdout);
return (0);
}