glob.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2001 by Sun Microsystems, Inc.
* All rights reserved.
*
* logadm/glob.c -- globbing routines
*
* these routines support two kinds of globs. first, the
* usual kind of filename globbing, like:
*
* *.c
* /var/log/syslog.?
* log[0-9]*file
* /var/apache/logs/x*{access,error}_log
*
* this is basically the same syntax that csh supports for globs and
* is provided by the routine glob_glob() which takes a filename and
* returns a list of filenames that match the glob.
*
* the second type is something called a "reglob" which is a pathname
* where the components are regular expressions as described in regex(3c).
* some examples:
*
* .*\.c
* /var/log/syslog\..
* log[0-9].*file
* /var/log/syslog\.([0-9]+)$0
*
* the last example uses the ()$n form to assign a numeric extension
* on a filename to the "n" value kept by the fn routines with each
* filename (see fn_setn() in fn.c). logadm uses this mechanism to
* correctly sort lognames when templates containing $n are used.
*
* the routine glob_reglob() is used to expand reglobs. glob_glob()
* is implemented by expanding the curly braces, converting the globs
* to reglobs, and then passing the work to glob_reglob().
*
* finally, since expanding globs and reglobs requires doing a stat(2)
* on the files, we store the resulting stat information in the filename
* struct (see fn_setstat() in fn.c).
*
* the glob(3c) routines are not used here since they don't support
* braces, and don't support the more powerful reglobs required by logadm.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <stdio.h>
#include <libintl.h>
#include <stdlib.h>
#include <libgen.h>
#include <strings.h>
#include <sys/types.h>
#include <sys/param.h>
#include <sys/stat.h>
#include <dirent.h>
#include "err.h"
#include "fn.h"
#include "glob.h"
/* forward declarations for functions used internally by this module */
static struct fn_list *glob_debrace(struct fn *fnp);
static struct fn_list *glob_reglob_list(struct fn_list *fnlp);
static boolean_t glob_magic(struct fn *fnp);
/* expand curly braces (like file{one,two,three}name) */
static struct fn_list *
glob_debrace(struct fn *fnp)
{
struct fn_list *ret = fn_list_new(NULL);
struct fn_list *newret;
char *sp = fn_s(fnp);
char *left;
char *right;
char *comma;
/* start with an empty string in the list */
fn_list_adds(ret, "");
/* while braces remain... */
while ((left = strchr(sp, '{')) != NULL)
if ((right = strchr(left, '}')) == NULL) {
err(EF_FILE|EF_JMP, "Missing }");
fn_list_free(ret);
return (NULL);
} else {
/* stuff before "left" is finished */
fn_list_appendrange(ret, sp, left);
/* stuff after "right" still need processing */
sp = right + 1;
if (left + 1 == right)
continue; /* just an empty {} */
/* stuff between "left" and "right" is comma-sep list */
left++;
newret = fn_list_new(NULL);
while ((comma = strchr(left, ',')) != NULL) {
struct fn_list *dup = fn_list_dup(ret);
/* stuff from left to comma is one variant */
fn_list_appendrange(dup, left, comma);
fn_list_addfn_list(newret, dup);
left = comma + 1;
}
/* what's left is the last item in the list */
fn_list_appendrange(ret, left, right);
fn_list_addfn_list(newret, ret);
ret = newret;
}
/* anything remaining in "s" is finished */
fn_list_appendrange(ret, sp, &sp[strlen(sp)]);
return (ret);
}
/* return true if filename contains any "magic" characters (*,?,[) */
static boolean_t
glob_magic(struct fn *fnp)
{
char *s = fn_s(fnp);
for (; *s; s++)
if (*s == '*' ||
*s == '?' ||
*s == '[')
return (B_TRUE);
return (B_FALSE);
}
/*
* glob_glob -- given a filename glob, return the list of matching filenames
*
* fn_setn() and fn_setstat() are called to set the "n" and stat information
* for the resulting filenames.
*/
struct fn_list *
glob_glob(struct fn *fnp)
{
struct fn_list *tmplist = glob_debrace(fnp);
struct fn_list *ret;
struct fn *nextfnp;
struct fn *newfnp;
int magic = 0;
/* debracing produced NULL list? */
if (tmplist == NULL)
return (NULL);
/* see if anything in list contains magic characters */
fn_list_rewind(tmplist);
while ((nextfnp = fn_list_next(tmplist)) != NULL)
if (glob_magic(nextfnp)) {
magic = 1;
break;
}
if (!magic)
return (tmplist); /* no globs to expand */
/* foreach name in the list, call glob_glob() to expand it */
fn_list_rewind(tmplist);
ret = fn_list_new(NULL);
while ((nextfnp = fn_list_next(tmplist)) != NULL) {
newfnp = glob_to_reglob(nextfnp);
fn_list_addfn(ret, newfnp);
}
fn_list_free(tmplist);
tmplist = ret;
ret = glob_reglob_list(tmplist);
fn_list_free(tmplist);
return (ret);
}
/*
* glob_glob_list -- given a list of filename globs, return all matches
*/
struct fn_list *
glob_glob_list(struct fn_list *fnlp)
{
struct fn_list *ret = fn_list_new(NULL);
struct fn *fnp;
fn_list_rewind(fnlp);
while ((fnp = fn_list_next(fnlp)) != NULL)
fn_list_addfn_list(ret, glob_glob(fnp));
return (ret);
}
/*
* glob_reglob -- given a filename reglob, return a list of matching filenames
*
* this routine does all the hard work in this module.
*/
struct fn_list *
glob_reglob(struct fn *fnp)
{
struct fn_list *ret = fn_list_new(NULL);
struct fn_list *newret;
struct fn *nextfnp;
char *mys = STRDUP(fn_s(fnp));
char *sp = mys;
char *slash;
int skipdotfiles;
char *re;
char ret0[MAXPATHLEN];
/* start with the initial directory in the list */
if (*sp == '/') {
fn_list_adds(ret, "/");
while (*sp == '/')
sp++;
} else
fn_list_adds(ret, "./");
/* while components remain... */
do {
if ((slash = strchr(sp, '/')) != NULL) {
*slash++ = '\0';
/* skip superfluous slashes */
while (*slash == '/')
slash++;
}
/* dot files are skipped unless a dot was specifically given */
if (sp[0] == '\\' && sp[1] == '.')
skipdotfiles = 0;
else
skipdotfiles = 1;
/* compile the regex */
if ((re = regcmp("^", sp, "$", (char *)0)) == NULL)
err(EF_FILE|EF_JMP, "regcmp failed on <%s>", sp);
/* apply regex to every filename we've matched so far */
newret = fn_list_new(NULL);
fn_list_rewind(ret);
while ((nextfnp = fn_list_next(ret)) != NULL) {
DIR *dirp;
struct dirent *dp;
/* go through directory looking for matches */
if ((dirp = opendir(fn_s(nextfnp))) == NULL)
continue;
while ((dp = readdir(dirp)) != NULL) {
if (skipdotfiles && dp->d_name[0] == '.')
continue;
*ret0 = '\0';
if (regex(re, dp->d_name, ret0)) {
struct fn *matchfnp = fn_dup(nextfnp);
struct stat stbuf;
int n;
fn_puts(matchfnp, dp->d_name);
if (stat(fn_s(matchfnp), &stbuf) < 0) {
fn_free(matchfnp);
continue;
}
/* skip non-dirs if more components */
if (slash &&
(stbuf.st_mode & S_IFMT) !=
S_IFDIR) {
fn_free(matchfnp);
continue;
}
/*
* component matched, fill in "n"
* value, stat information, and
* append component to directory
* name just searched.
*/
if (*ret0)
n = atoi(ret0);
else
n = -1;
fn_setn(matchfnp, n);
fn_setstat(matchfnp, &stbuf);
if (slash)
fn_putc(matchfnp, '/');
fn_list_addfn(newret, matchfnp);
}
}
(void) closedir(dirp);
}
fn_list_free(ret);
ret = newret;
sp = slash;
} while (slash);
FREE(mys);
return (ret);
}
/* reglob a list of filenames */
static struct fn_list *
glob_reglob_list(struct fn_list *fnlp)
{
struct fn_list *ret = fn_list_new(NULL);
struct fn *fnp;
fn_list_rewind(fnlp);
while ((fnp = fn_list_next(fnlp)) != NULL)
fn_list_addfn_list(ret, glob_reglob(fnp));
return (ret);
}
/*
* glob_to_reglob -- convert a glob (*, ?, etc) to a reglob (.*, ., etc.)
*/
struct fn *
glob_to_reglob(struct fn *fnp)
{
int c;
struct fn *ret = fn_new(NULL);
fn_rewind(fnp);
while ((c = fn_getc(fnp)) != '\0')
switch (c) {
case '.':
case '(':
case ')':
case '^':
case '+':
case '{':
case '}':
case '$':
/* magic characters need backslash */
fn_putc(ret, '\\');
fn_putc(ret, c);
break;
case '?':
/* change '?' to a single dot */
fn_putc(ret, '.');
break;
case '*':
/* change '*' to ".*" */
fn_putc(ret, '.');
fn_putc(ret, '*');
break;
default:
fn_putc(ret, c);
}
return (ret);
}
#ifdef TESTMODULE
/*
* test main for glob module, usage: a.out [-r] [pattern...]
* -r means the patterns are reglobs instead of globs
*/
main(int argc, char *argv[])
{
int i;
int reglobs = 0;
struct fn *argfnp = fn_new(NULL);
struct fn *fnp;
struct fn_list *fnlp;
err_init(argv[0]);
setbuf(stdout, NULL);
for (i = 1; i < argc; i++) {
if (strcmp(argv[i], "-r") == 0) {
reglobs = 1;
continue;
}
if (SETJMP) {
printf(" skipped due to errors\n");
continue;
} else {
printf("<%s>:\n", argv[i]);
fn_renew(argfnp, argv[i]);
if (reglobs)
fnlp = glob_reglob(argfnp);
else
fnlp = glob_glob(argfnp);
}
fn_list_rewind(fnlp);
while ((fnp = fn_list_next(fnlp)) != NULL)
printf(" <%s>\n", fn_s(fnp));
printf("total size: %d\n", fn_list_totalsize(fnlp));
while ((fnp = fn_list_popoldest(fnlp)) != NULL) {
printf(" oldest <%s>\n", fn_s(fnp));
fn_free(fnp);
}
fn_list_free(fnlp);
}
fn_free(argfnp);
err_done(0);
}
#endif /* TESTMODULE */