/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* grep - pattern matching program - combined grep, egrep, and fgrep.
* Based on MKS grep command, with XCU & Solaris mods.
*/
/*
* Copyright 1985, 1992 by Mortice Kern Systems Inc. All rights reserved.
*
*/
/* Copyright 2012 Nexenta Systems, Inc. All rights reserved. */
/*
* Copyright 2013 Damian Bogel. All rights reserved.
*/
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <stdarg.h>
#include <regex.h>
#include <limits.h>
#include <fcntl.h>
#include <stdio.h>
#include <locale.h>
#include <wchar.h>
#include <errno.h>
#include <unistd.h>
#include <wctype.h>
#include <ftw.h>
typedef struct _PATTERN {
} PATTERN;
static char *cmdname;
static char *prntbuf;
static void addpattern(char *s);
static void fixpatterns(void);
static void usage(void);
static int grep(int, const char *);
static void bmgcomp(char *, int);
static char *bmgexec(char *, char *);
static void process_path(const char *);
static void process_file(const char *, int);
/*
* mainline for grep
*/
int
{
char *ap;
int c;
int fflag = 0;
#if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
#endif
(void) textdomain(TEXT_DOMAIN);
/*
* true if this is running on the multibyte locale
*/
/*
* Skip leading slashes
*/
/*
*/
regflags |= REG_EXTENDED;
egrep++;
} else {
fgrep++;
}
}
switch (c) {
case 'v': /* POSIX: negate matches */
nvflag = 0;
break;
case 'c': /* POSIX: write count */
cflag++;
break;
case 'i': /* POSIX: ignore case */
iflag++;
break;
case 'l': /* POSIX: Write filenames only */
lflag++;
break;
case 'n': /* POSIX: Write line numbers */
nflag++;
break;
case 'r': /* Solaris: search recursively */
rflag++;
break;
case 'b': /* Solaris: Write file block numbers */
bflag++;
break;
case 's': /* POSIX: No error msgs for files */
sflag++;
break;
case 'e': /* POSIX: pattern list */
n_pattern++;
sizeof (char *) * n_pattern);
if (pattern_list == NULL) {
gettext("%s: out of memory\n"),
cmdname);
exit(2);
}
break;
case 'f': /* POSIX: pattern file */
fflag = 1;
n_file++;
sizeof (char *) * n_file);
gettext("%s: out of memory\n"),
cmdname);
exit(2);
}
break;
/* based on options order h or H is set as in GNU grep */
case 'h': /* Solaris: supress printing of file name */
hflag = 1;
Hflag = 0;
break;
/* Solaris: precede every matching with file name */
case 'H':
Hflag = 1;
hflag = 0;
break;
case 'q': /* POSIX: quiet: status only */
qflag++;
break;
case 'w': /* Solaris: treat pattern as word */
wflag++;
break;
case 'x': /* POSIX: full line matches */
xflag++;
regflags |= REG_ANCHOR;
break;
case 'E': /* POSIX: Extended RE's */
regflags |= REG_EXTENDED;
Eflag++;
break;
case 'F': /* POSIX: strings, not RE's */
Fflag++;
break;
case 'R': /* Solaris: like rflag, but follow symlinks */
Rflag++;
rflag++;
break;
default:
usage();
}
}
/*
* If we're invoked as egrep or fgrep we need to do some checks
*/
/*
* Use of -E or -F with egrep or fgrep is illegal
*/
usage();
/*
* Don't allow use of wflag with egrep / fgrep
*/
if (wflag)
usage();
/*
* For Solaris the -s flag is equivalent to XCU -q
*/
if (sflag)
qflag++;
/*
* done with above checks - set the appropriate flags
*/
if (egrep)
Eflag++;
else /* Else fgrep */
Fflag++;
}
/*
* -w cannot be specified with grep -F
*/
usage();
}
/*
* -E and -F flags are mutually exclusive - check for this
*/
usage();
/*
* -l overrides -H like in GNU grep
*/
if (lflag)
Hflag = 0;
/*
* -c, -l and -q flags are mutually exclusive
* We have -c override -l like in Solaris.
* -q overrides -l & -c programmatically in grep() function.
*/
lflag = 0;
/*
* Now handling -e and -f option
*/
if (pattern_list) {
for (i = 0; i < n_pattern; i++) {
addpattern(pattern_list[i]);
}
}
if (file_list) {
for (i = 0; i < n_file; i++) {
}
}
/*
* No -e or -f? Make sure there is one more arg, use it as the pattern.
*/
if (argc < 2)
usage();
argc--;
argv++;
}
/*
* If -x flag is not specified or -i flag is specified
* with fgrep in a multibyte locale, need to use
* the wide character APIs. Otherwise, byte-oriented
* process will be done.
*/
/*
* Compile Patterns and also decide if BMG can be used
*/
fixpatterns();
/* Process all files: stdin, or rest of arg list */
if (argc < 2) {
} else {
process_path(*argv);
}
}
/*
* Return() here is used instead of exit
*/
if (errors)
return (2);
return (matched ? 0 : 1);
}
static void
{
if (rflag) {
/*
* Add trailing slash if arg
* is directory, to resolve symlinks.
*/
}
/*
* Search through subdirs if path is directory.
* Don't follow symlinks if Rflag is not set.
*/
if (!Rflag)
if (!sflag)
gettext("%s: can't open \"%s\"\n"),
errors = 1;
}
return;
}
}
process_file(path, 0);
}
/*
* Read and process all files in directory recursively.
*/
static int
{
/*
* Process files and follow symlinks if Rflag set.
*/
/* Report broken symlinks and unreadable files */
if (!sflag &&
}
return (0);
}
/* Skip devices and pipes if Rflag is not set */
return (0);
/* Pass offset to relative name from FTW_CHDIR */
return (0);
}
/*
* Opens file and call grep function.
*/
static void
{
int fd;
errors = 1;
if (!sflag) /* Silent mode */
"%s: can't open \"%s\"\n"),
return;
}
"%s: error writing to stdout\n"),
cmdname);
exit(2);
}
}
/*
* Add a file of strings to the pattern list.
*/
static void
{
char *inbuf;
char *bufp;
/*
* Open the pattern file
*/
exit(2);
}
exit(2);
}
bufused = 0;
/*
* Read in the file, reallocing as we need more memory
*/
/*
* if this line does not fit to the buffer,
* realloc larger buffer
*/
gettext("%s: out of memory\n"),
cmdname);
exit(2);
}
continue;
}
}
bufused = 0;
}
}
/*
* Add a string to the pattern list.
*/
static void
addpattern(char *s)
{
char *wordbuf;
char *np;
for (; ; ) {
*np = '\0';
"%s: out of memory\n"),
cmdname);
exit(2);
}
if (wflag) {
/*
* Solaris wflag support: Add '<' '>' to pattern to
* select it as a word. Doesn't make sense with -F
* but we're Libertarian.
*/
gettext("%s: out of memory\n"),
cmdname);
exit(2);
}
} else {
gettext("%s: out of memory\n"),
cmdname);
exit(2);
}
}
break;
s = np + 1;
}
}
/*
* Fix patterns.
* Must do after all arguments read, in case later -i option.
*/
static void
fixpatterns(void)
{
/*
* As REG_ANCHOR flag is not supported in the current Solaris,
* need to fix the specified pattern if -x is specified with
* grep or egrep
*/
npatterns++;
if (fix_pattern) {
/* '^' pattern '$' */
gettext("%s: out of memory\n"),
cmdname);
exit(2);
}
*cq++ = '^';
*cq++ = '$';
*cq = '\0';
}
if (Fflag) {
if (use_wchar) {
/*
* Fflag && mblocale && iflag
* Fflag && mblocale && !xflag
*/
size_t n;
gettext("%s: out of memory\n"),
cmdname);
exit(2);
}
(size_t)-1) {
gettext("%s: failed to convert "
"\"%s\" to wide-characters\n"),
exit(2);
}
if (iflag) {
wp++) {
}
}
} else {
/*
* Fflag && mblocale && !iflag
* Fflag && !mblocale && iflag
* Fflag && !mblocale && !iflag
*/
if (iflag) {
unsigned char *cp;
}
}
}
/*
* fgrep: No regular expressions.
*/
continue;
}
/*
* For non-fgrep, compile the regular expression,
* give an informative error message, and exit if
* it didn't compile.
*/
gettext("%s: RE error in %s: %s\n"),
exit(2);
}
}
/*
* Decide if we are able to run the Boyer-Moore-Gosper algorithm.
* Use the Boyer-Moore-Gosper algorithm if:
* - fgrep (Fflag)
* - singlebyte locale (!mblocale)
* - no ignoring case (!iflag)
* - no printing line numbers (!nflag)
* - no negating the output (nvflag)
* - only one pattern (npatterns == 1)
* - non zero length pattern (strlen(patterns->pattern) != 0)
*
* It's guaranteed patterns->pattern is still alive
* when Fflag && !mblocale.
*/
}
/*
* Search a newline from the beginning of the string
*/
static char *
{
while (len-- != 0) {
if (*ptr++ == '\n') {
return ((char *)--ptr);
}
}
return (NULL);
}
/*
* Search a newline from the end of the string
*/
static char *
{
while (len--) {
if (*--uptr == '\n') {
return ((char *)uptr);
}
}
return (NULL);
}
/*
* Duplicate the specified string converting each character
* into a lower case.
*/
static char *
{
char *p;
/* ibuf does not fit to s1 */
exit(2);
}
}
p = ibuf;
do {
} while (*s1++ != '\0');
return (ibuf);
}
/*
* Do grep on a single file.
* Return true in any lines matched.
*
* We have two strategies:
* The fast one is used when we have a single pattern with
* a string known to occur in the pattern. We can then
* do a BMG match on the whole buffer.
* This is an order of magnitude faster.
* Otherwise we split the buffer into lines,
* and check for a match on each line.
*/
static int
{
long long lineno;
return (0); /* no patterns to match -- just return */
if (use_bmg) {
}
cmdname);
exit(2);
}
}
cmdname);
exit(2);
}
}
line_offset = 0;
lineno = 0;
newlinep = 1;
data_len = 0;
for (; ; ) {
long count;
if (data_len == 0) {
/*
* If no data in the buffer, reset ptr
*/
}
/*
* The current data chunk starts from prntbuf.
* This means either the buffer has no data
* or the buffer has no newline.
* So, read more data from input.
*/
if (count < 0) {
/* read error */
if (cflag) {
"%s:", fn);
}
matches);
}
}
return (0);
} else if (count == 0) {
/* no new data */
if (data_len == 0) {
/* end of file already reached */
break;
}
/* last line of file has no newline */
newlinep = 0;
goto L_start_process;
}
}
/*
* Look for newline in the chunk
* between ptr + offset and ptr + data_len - offset.
*/
/* no newline found in this chunk */
/*
* Move remaining data to the beginning
* of the buffer.
* Remaining data lie from ptr for
* data_len bytes.
*/
}
if (data_len == prntbuflen) {
/*
* No enough room in the buffer
*/
prntbuflen += BUFSIZE;
gettext("%s: out of memory\n"),
cmdname);
exit(2);
}
}
/* read the next input */
continue;
}
/*
* Beginning of the chunk: ptr
* End of the chunk: ptr + data_len
* Beginning of the line: ptr
* End of the line: ptrend
*/
if (use_bmg) {
/*
* Use Boyer-Moore-Gosper algorithm to find out if
* this chunk (not this line) contains the specified
* pattern. If not, restart from the last line
* of this chunk.
*/
char *bline;
/*
* No pattern found in this chunk.
* Need to find the last line
* in this chunk.
*/
/*
* When this chunk does not contain newline,
* ptrend becomes NULL, which should happen
* when the last line of file does not end
* with a newline. At such a point,
* newlinep should have been set to 0.
* Therefore, just after jumping to
* L_skip_line, the main for-loop quits,
* and the line_len value won't be
* used.
*/
goto L_skip_line;
}
/*
* Pattern found not in the first line
* of this chunk.
* Discard the first line.
*/
goto L_skip_line;
}
/*
* Pattern found in the first line of this chunk.
* Using this result.
*/
*ptrend = '\0';
/*
* before jumping to L_next_line,
* need to handle xflag if specified
*/
/* didn't match */
} else {
}
goto L_next_line;
}
lineno++;
/*
* Line starts from ptr and ends at ptrend.
* line_len will be the length of the line.
*/
*ptrend = '\0';
/*
* From now, the process will be performed based
* on the line from ptr to ptrend.
*/
if (use_wchar) {
gettext("%s: out of memory\n"),
cmdname);
exit(2);
}
}
"%s: input file \"%s\": line %lld: invalid multibyte character\n"),
/* never match a line with invalid sequence */
goto L_skip_line;
}
if (iflag) {
}
}
if (xflag) {
/* matched */
break;
}
}
} else {
!= NULL) {
/* matched */
break;
}
}
}
} else if (Fflag) {
/* fgrep in byte-oriented handling */
char *fptr;
if (iflag) {
} else {
}
if (xflag) {
/* fgrep -x */
/* matched */
break;
}
}
} else {
/* matched */
break;
}
}
}
} else {
/* grep or egrep */
int rv;
/* matched */
break;
}
switch (rv) {
case REG_NOMATCH:
break;
case REG_ECHAR:
"%s: input file \"%s\": line %lld: invalid multibyte character\n"),
break;
default:
sizeof (errstr));
"%s: input file \"%s\": line %lld: %s\n"),
exit(2);
}
}
}
/*
* Here, if pp points to non-NULL, something has been matched
* to the pattern.
*/
matches++;
/*
* Handle q, l, and c flags.
*/
if (qflag) {
/* no need to continue */
/*
* End of this line is ptrend.
* We have read up to ptr + data_len.
*/
exit(0);
}
if (lflag) {
break;
}
if (!cflag) {
}
if (bflag) {
(line_offset / BSIZE));
}
if (nflag) {
}
*ptrend = '\n';
}
return (0);
}
}
if (!newlinep)
break;
}
if (cflag) {
}
if (!qflag) {
}
}
return (matches != 0);
}
/*
* usage message for grep
*/
static void
usage(void)
{
gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvx] "
"pattern_list [file ...]\n"));
gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvx] "
"[-e pattern_list]... "
"[-f pattern_file]... [file...]\n"));
} else {
gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvwx] "
"pattern_list [file ...]\n"));
gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvwx] "
"[-e pattern_list]... "
"[-f pattern_file]... [file...]\n"));
gettext(" -E [-c|-l|-q] [-r|-R] [-bhHinsvx] "
"pattern_list [file ...]\n"));
gettext(" -E [-c|-l|-q] [-r|-R] [-bhHinsvx] "
"[-e pattern_list]... "
"[-f pattern_file]... [file...]\n"));
gettext(" -F [-c|-l|-q] [-r|-R] [-bhHinsvx] "
"pattern_list [file ...]\n"));
gettext(" -F [-c|-l|-q] [-bhHinsvx] [-e pattern_list]... "
"[-f pattern_file]... [file...]\n"));
}
exit(2);
/* NOTREACHED */
}
/*
* Compile literal pattern into BMG tables
*/
static void
{
int i;
int tlen;
for (i = 0; i < M_CSETSIZE; i++) {
}
len--;
}
}
/*
* BMG search.
*/
static char *
{
int t;
char *k, *s, *p;
if (bmglen == 1) {
}
for (; ; ) {
/* inner loop, should be most optimized */
k += t;
}
if (k >= end) {
return (NULL);
}
if (p == bmgpat) {
return (s);
}
}
k++;
}
/* NOTREACHED */
}