/***********************************************************************
* *
* This software is part of the ast package *
* Copyright (c) 1992-2010 AT&T Intellectual Property *
* and is licensed under the *
* Common Public License, Version 1.0 *
* by AT&T Intellectual Property *
* *
* A copy of the License is available at *
* http://www.opensource.org/licenses/cpl1.0.txt *
* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
* *
* Information and Software Systems Research *
* AT&T Research *
* Florham Park NJ *
* *
* Glenn Fowler <gsf@research.att.com> *
* David Korn <dgk@research.att.com> *
* *
***********************************************************************/
#pragma prototyped
/*
* David Korn
* AT&T Research
*
* strings
*/
static const char usage[] =
"[-?\n@(#)$Id: strings (AT&T Research) 2000-04-01 $\n]"
USAGE_LICENSE
"[+NAME?strings - find and display printable strings in files]"
"[+DESCRIPTION?\bstrings\b searches for printable strings in regular files"
" and writes those strings to the standard output. A printable string"
" is any sequence of four (by default) or more printable characters"
" terminated by a newline or NUL character.]"
"[a:all?Scan the entire file. Always enabled in this implementation.]"
"[l:long-strings?Ignore \anewline\a characters as string terminators and"
" display strings using C character escape sequences. These strings"
" are suitably escaped for placement inside C \"...\" and"
" \bksh\b(1) $'...' string literals.]"
"[m:multi-byte?Scan for multibyte strings.]"
"[n:length|bytes?Set the minimum matched string length to \alength\a. For"
" compatibility -\anumber\a is equivalent to"
" \b--length\b=\anumber\a.]#[number:=4]"
"[t:radix|format?Write each string preceded by its byte offset from the"
" start of the file. The offset radix is determined by:]:[format]{"
" [+d?decimal]"
" [+o?octal]"
" [+x?hexadecimal]"
"}"
"[o:octal?Equivalent to \b--radix=o\b.]"
"\n"
"\n[ file ... ]\n"
"\n"
"[+SEE ALSO?\bgrep\b(1), \bnm\b(1), \bwhat\b(1)]"
;
#include <cmd.h>
#include <ctype.h>
#define MULTIBYTE 0x01 /* must be 1 */
#define MULTILINE 0x02
#define WIDTH 4
#define special(c) (isspace(c) || (c) == '\a' || (c) == '\b')
static int
mapchar(register int c)
{
switch (c)
{
case '\a':
return('a');
case '\b':
return('b');
case '\f':
return('f');
case '\n':
return('n');
case '\r':
return('r');
case '\t':
return('t');
case '\v':
return('v');
case '"':
return('"');
case '\'':
return('\'');
case '\\':
return('\\');
}
return 0;
}
static int
outstring(Sfio_t* out, char* cp, int nbytes, register int flags)
{
register int c;
register int d;
register int n = nbytes;
while (n-- > 0)
{
c = *cp;
if (flags & MULTIBYTE)
cp += 2;
else
cp++;
if ((flags & MULTILINE) && (d = mapchar(c)))
{
sfputc(out, '\\');
nbytes++;
c = d;
}
sfputc(out, c);
}
sfputc(out, '\n');
return nbytes + 1;
}
static int
strings(Sfio_t* in, Sfio_t* out, register int width, char* format, register int flags)
{
register int n = 0;
register int c;
register unsigned char* inp;
register unsigned char* inend;
register int state = 0;
int sep;
off_t offset;
char fmt[64];
if (format)
{
c = strlen(format) - 1;
if (flags & MULTIBYTE)
sfsprintf(fmt, sizeof(fmt), "%%%.*sI*%c", c, format, format[c]);
else
sfsprintf(fmt, sizeof(fmt), "%%%.*sI*%c %%.*s\n", c, format, format[c]);
if ((offset = sfseek(in, (off_t)0, SEEK_CUR)) < 0)
offset = 0;
offset--;
}
sep = (flags & MULTILINE) ? 0 : '\n';
while ((inp = (unsigned char*)sfgetr(in, sep, 0)) || (inp = (unsigned char*)sfgetr(in, sep, -1)))
{
c = sfvalue(in);
inend = inp+c;
offset += c;
for (;;)
{
if (inp >= inend || !(c = *inp++) || !isprint(c) && (!(flags & MULTILINE) || !special(c)))
{
if (n >= width && !state)
{
if (format)
{
if (flags & (MULTIBYTE|MULTILINE))
{
if (sfprintf(out, fmt, sizeof(offset), offset - (inend - inp) - n) < 0)
return 0;
n = outstring(out, (char*)inp - ((flags & MULTIBYTE) + 1) * n - 1, n, flags);
}
else
n = sfprintf(out, fmt, sizeof(offset), offset - (inend - inp) - n, n, inp - n - 1);
}
else if (flags & (MULTIBYTE|MULTILINE))
n = outstring(out, (char*)inp - ((flags & MULTIBYTE) + 1) * n - 1, n, flags);
else
n = sfprintf(out, "%.*s\n", n, inp - n - 1);
if (n < 0)
return 0;
}
if (c || !state)
n = 0;
else
state = 0;
if (inp >= inend)
break;
}
else if (state)
n = 0;
else
{
if (flags & MULTIBYTE)
state = 1;
n++;
}
}
}
return 1;
}
int
b_strings(int argc, char** argv, void* context)
{
register int n;
register int width = WIDTH;
register int flags = 0;
register Sfio_t* fp;
register char* cp;
register char* format = 0;
cmdinit(argc, argv, context, ERROR_CATALOG, 0);
if (argv[1] && streq(argv[1], "-"))
argv++;
for (;;)
{
switch (optget(argv, usage))
{
case 'a':
/* ignore this */
continue;
case 'l':
flags |= MULTILINE;
continue;
case 'm':
flags |= MULTIBYTE;
continue;
case 'n':
if ((width = opt_info.num) <= 0)
error(2, "%d: width must be positive", opt_info.num);
continue;
case 'o':
format = "07d";
continue;
case 't':
for (cp = opt_info.arg; (n = *cp) == '+' || n == '-' || n == '.' || isdigit(n); cp++);
if (!*(cp + 1) && ((n = *cp) == 'd' || n == 'o' || n == 'x'))
format = opt_info.arg;
else
error(2, "%d: format must be d, o, or x", opt_info.arg);
continue;
case ':':
error(2, "%s", opt_info.arg);
continue;
case '?':
error(ERROR_usage(2), "%s", opt_info.arg);
continue;
}
break;
}
argv += opt_info.index;
if (error_info.errors)
error(ERROR_usage(2), "%s", optusage(NiL));
if (cp = *argv)
argv++;
do
{
if (!cp || streq(cp, "-"))
fp = sfstdin;
else if (!(fp = sfopen(NiL, cp, "r")))
{
error(ERROR_system(0), "%s: cannot open", cp);
error_info.errors = 1;
continue;
}
if (!strings(fp, sfstdout, width, format, flags))
{
error(ERROR_system(0), "%s: failed", cp);
error_info.errors = 1;
}
if (fp != sfstdin)
sfclose(fp);
} while (cp = *argv++);
return error_info.errors;
}