/***********************************************************************
* *
* This software is part of the ast package *
* Copyright (c) 1985-2011 AT&T Intellectual Property *
* and is licensed under the *
* Common Public License, Version 1.0 *
* by AT&T Intellectual Property *
* *
* A copy of the License is available at *
* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
* *
* Information and Software Systems Research *
* AT&T Research *
* Florham Park NJ *
* *
* Glenn Fowler <gsf@research.att.com> *
* David Korn <dgk@research.att.com> *
* Phong Vo <kpv@research.att.com> *
* *
***********************************************************************/
#pragma prototyped
/*
* original code
*
* James A. Woods, Informatics General Corporation,
* NASA Ames Research Center, 6/81.
*
* discipline/method interface
*
* Glenn Fowler
* AT&T Research
* modified from the original BSD source
*
* 'fastfind' scans a file list for the full pathname of a file
* given only a piece of the name. The list is processed with
* with "front-compression" and bigram coding. Front compression reduces
* space by a factor of 4-5, bigram coding by a further 20-25%.
*
* there are 4 methods:
*
* FF_old original with 7 bit bigram encoding (no magic)
* FF_gnu 8 bit clean front compression (FF_gnu_magic)
* FF_typ FF_dir with (mime) types (FF_typ_magic)
*
* the bigram encoding steals the eighth bit (that's why its FF_old)
* maybe one day we'll limit it to readonly:
*
* 0-2*FF_OFF likeliest differential counts + offset to make nonnegative
* FF_ESC 4 byte big-endian out-of-range count+FF_OFF follows
* FF_MIN-FF_MAX ascii residue
* >=FF_MAX bigram codes
*
* a two-tiered string search technique is employed
*
* a metacharacter-free subpattern and partial pathname is matched
* backwards to avoid full expansion of the pathname list
*
* then the actual shell glob-style regular expression (if in this form)
* is matched against the candidate pathnames using the slower regexec()
*
* The original BSD code is covered by the BSD license:
*
* Copyright (c) 1985, 1993, 1999
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "findlib.h"
/*
* this db could be anywhere
* findcodes[] directories are checked for findnames[i]
*/
static char* findcodes[] =
{
0,
0,
};
static char* findnames[] =
{
"find/find.codes",
"locatedb",
"locate.database",
"slocate.db",
};
/*
* convert t to lower case and drop leading x- and x- after /
* converted value copied to b of size n
*/
char*
{
register int c;
register char* b = buf;
if ((*t == 'x' || *t == 'X') && *(t + 1) == '-')
t += 2;
while (c = *t++)
{
if (isupper(c))
c = tolower(c);
if ((*b++ = c) == '/' && (*t == 'x' || *t == 'X') && *(t + 1) == '-')
t += 2;
}
*b = 0;
return buf;
}
/*
* return a fastfind stream handle for pattern
*/
{
register char* p;
register char* s;
register char* b;
register int i;
register int j;
char* path;
int brace = 0;
int paren = 0;
int k;
int q;
int fd;
int uid;
goto nospace;
/*
* NOTE: searching for FIND_CODES would be much simpler if we
* just stuck with our own, but we also support GNU
* locate codes and have to search for the one of a
* bazillion possible names for that file
*/
if (!findcodes[1])
{
goto nospace;
file = 0;
/*
* look for the codes file, but since it may not exist yet,
* also look for the containing directory if i<2 or if
* it is sufficiently qualified (FIND_MATCH)
*/
for (i = 0; i < j; i++)
{
if (*path == '/')
{
{
{
for (k = 0; k < elementsof(findnames); k++)
{
{
break;
}
{
*b = 0;
{
*b = '/';
break;
}
}
}
if (k < elementsof(findnames))
break;
}
{
break;
}
}
{
{
*b = 0;
{
*b = '/';
break;
}
}
}
}
else if (pathpath(path, "", PATH_REGULAR|PATH_READ|PATH_WRITE, fp->encode.file, sizeof(fp->encode.file)))
{
break;
}
{
if (pathpath(fp->encode.file, "", PATH_EXECUTE|PATH_READ|PATH_WRITE, fp->encode.temp, sizeof(fp->encode.temp)) &&
{
break;
}
}
}
if (i >= j)
{
goto drop;
}
{
/*
* FF_old generates temp data that is read
* in a second pass to generate the real codes
*/
{
goto drop;
}
}
else
{
/*
* the rest generate into a temp file that
* is simply renamed on completion
*/
{
*s = 0;
p = path;
}
else
p = ".";
{
(*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot create tmp file in this directory", p ? p : ".");
goto drop;
}
if (s)
*s = '/';
{
goto drop;
}
{
if (!(fp->encode.namedict = dtopen(&fp->encode.namedisc, Dttree)) || !(fp->encode.indexdict = dtopen(&fp->encode.indexdisc, Dttree)) || !(tp = newof(0, Type_t, 1, strlen(s) + 1)))
{
goto drop;
}
/*
*/
}
{
}
else
{
}
}
}
else
{
pattern = "*";
{
return 0;
}
for (i = 0; i < j; i++)
{
if (*path == '/')
{
{
{
for (k = 0; k < elementsof(findnames); k++)
{
{
break;
}
}
break;
}
break;
}
}
else if ((path = pathpath(path, "", PATH_REGULAR|PATH_READ, fp->decode.path, sizeof(fp->decode.path))) && (fp->fp = sfopen(NiL, path, "r")))
break;
}
{
goto drop;
}
{
goto drop;
}
if (fp->secure = ((st.st_mode & (S_IRGRP|S_IROTH)) == S_IRGRP) && st.st_gid == getegid() && getegid() != getgid())
{
goto invalid;
{
i = -i;
break;
}
goto invalid;
if (!(*s++ = fp->decode.bigram2[i] = j) && (i || fp->decode.bigram1[0] >= '0' && fp->decode.bigram1[0] <= '1'))
break;
}
if (streq(b, FF_typ_magic))
{
if (type)
{
}
for (j = 0, i = 1;; i++)
{
goto invalid;
if (!*s)
break;
{
FF_SET_TYPE(fp, i);
j++;
}
}
if (type && !j)
goto drop;
}
else if (streq(b, FF_dir_magic))
else if (streq(b, FF_gnu_magic))
else if (!*b && *--b >= '0' && *b <= '1')
{
{
goto invalid;
}
}
else
{
if (i < 0)
{
goto invalid;
}
{
goto invalid;
goto invalid;
}
goto invalid;
}
/*
* set up the physical dir table
*/
{
{
k = 0;
if (k)
{
goto drop;
goto drop;
p = 0;
/*
* fill the dir list with logical and
* physical names since we don't know
* which way the db was encoded (it
* could be *both* ways)
*/
for (i = q = 0; i < k; i++)
{
goto nospace;
else
*s = '/';
*(s + 1) = 0;
goto nospace;
if (j)
q++;
*s = 0;
*s = '/';
*(s + 1) = 0;
{
goto nospace;
if (j)
q++;
}
}
for (i = 0; i < q; i++)
}
}
}
{
{
(*fp->disc->errorf)(fp, fp->disc, 2, "%s: %s code format does not support directory verification", path, fp->method == FF_gnu ? FF_gnu_magic : "OLD-BIGRAM");
goto drop;
}
}
/*
* extract last glob-free subpattern in name for fast pre-match
* prepend 0 for backwards match
*/
if (p = s = (char*)pattern)
{
for (;;)
{
switch (*b++ = *p++)
{
case 0:
break;
case '\\':
s = p;
if (!*p++)
break;
continue;
case '[':
if (!brace)
{
brace++;
if (*p == ']')
p++;
}
continue;
case ']':
if (brace)
{
brace--;
s = p;
}
continue;
case '(':
if (!brace)
paren++;
continue;
case ')':
s = p;
continue;
case '|':
case '&':
{
s = "";
break;
}
continue;
case '*':
case '?':
s = p;
continue;
default:
continue;
}
break;
}
{
if (i = regcomp(&fp->decode.re, pattern, REG_SHELL|REG_AUGMENTED|(fp->decode.ignorecase?REG_ICASE:0)))
{
{
}
goto drop;
}
}
if (*s)
{
*b++ = 0;
while (i = *s++)
*b++ = i;
*b-- = 0;
if (isupper(*s))
*s = tolower(*s);
}
}
}
return fp;
if (!vm)
return 0;
if (!fp)
{
return 0;
}
goto drop;
drop:
return 0;
}
/*
* return the next fastfind path
* 0 returned when list exhausted
*/
char*
{
register char* p;
register char* q;
register char* s;
register char* b;
register char* e;
register int c;
register int n;
register int m;
int ignorecase;
int t;
unsigned char w[4];
return 0;
{
}
next:
for (;;)
{
{
case FF_dir:
t = 0;
goto grab;
case FF_gnu:
return 0;
if (c == 0x80)
{
return 0;
n = c << 8;
return 0;
n |= c;
if (n & 0x8000)
n = (n - 0xffff) - 1;
}
else if ((n = c) & 0x80)
n = (n - 0xff) - 1;
t = 0;
goto grab;
case FF_typ:
grab:
do
{
return 0;
} while (*p++ = c);
p -= 2;
break;
case FF_old:
if (c == EOF)
{
return 0;
}
if (c == FF_ESC)
{
return 0;
{
{
/*
* the old format uses machine
* byte order; this test uses
* the smallest magnitude of
* both byte orders on the
* first encoded path motion
* to determine the original
* byte order
*/
m = c;
if (m < 0)
m = -m;
if (n < 0)
n = -n;
if (m < n)
else
{
}
}
}
else
}
{
}
else
*p++ = c;
*p-- = 0;
t = 0;
break;
}
else
for (;;)
{
return 0;
/*
* use the ordering and lengths to prune
* comparison function calls
* (*fp->dirs)[*fp->lens]=='/' if its
* already been matched
*/
{
goto next;
break;
}
else if (n == m)
{
{
if (!(n = strcasecmp(*fp->dirs, fp->decode.path)) && (ignorecase || !strcmp(*fp->dirs, fp->decode.path)))
{
if (m > 0)
{
}
break;
}
if (n >= 0)
goto next;
}
}
goto next;
}
{
*p = 0;
else
n = 1;
n = -1;
n = 1;
else
n = 0;
*p = '/';
/*
* n<0 skip this subtree
* n==0 keep as is
* n>0 read this dir now
*/
/* NOT IMPLEMENTED YET */
}
if (FF_OK_TYPE(fp, t))
{
{
if (*(s = p) == '/')
s--;
b--;
for (; s >= b; s--)
{
if (ignorecase)
else
if (!*e)
{
if (!fp->decode.match || strgrpmatch(fp->decode.path, fp->decode.pattern, NiL, 0, STR_MAXIMAL|STR_LEFT|STR_RIGHT|ignorecase))
{
if (*p == '/')
}
break;
}
}
}
{
}
else if (n != REG_NOMATCH)
{
{
}
return 0;
}
}
}
}
/*
* add path to the code table
* paths are assumed to be in sort order
*/
int
{
register unsigned char* s;
register unsigned char* e;
register unsigned char* p;
register int n;
register int d;
register Type_t* x;
register unsigned long u;
return -1;
{
}
s = (unsigned char*)path;
if (len <= 0)
e = s + len++;
else
{
e = s + len;
}
while (s < e)
{
if (*s != *p++)
break;
s++;
}
n = s - (unsigned char*)path;
{
case FF_gnu:
if (d >= -127 && d <= 127)
else
{
}
break;
case FF_old:
p = s;
while (s < e)
{
n = *s++;
if (s >= e)
break;
}
while (p < e)
{
n = '?';
}
break;
case FF_typ:
if (type)
{
u = x->index;
u = 0;
else
{
}
}
else
u = 0;
/*FALLTHROUGH...*/
case FF_dir:
break;
}
return 0;
}
/*
* findsync() helper
*/
static int
{
int r;
{
return -1;
}
{
return -1;
}
if (r)
{
return -1;
}
return 0;
}
/*
* finish the code table
*/
static int
{
register char* s;
register int n;
register int m;
register int d;
register Type_t* x;
char* t;
int b;
long z;
{
case FF_dir:
case FF_gnu:
/*
* replace the real file with the temp file
*/
goto bad;
{
(*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot rename from tmp file %s", fp->encode.file, fp->encode.temp);
return -1;
}
break;
case FF_old:
/*
* determine the top FF_MAX bigrams
*/
for (n = 0; n < FF_MAX; n++)
for (m = 0; m < FF_MAX; m++)
m = 1;
for (n = USHRT_MAX; n >= 0; n--)
{
if ((m += d) > FF_MAX)
break;
}
while (--n >= 0)
for (n = FF_MAX - 1; n >= 0; n--)
for (m = FF_MAX - 1; m >= 0; m--)
{
}
else
/*
* commit the real file
*/
{
return -1;
}
goto badcreate;
/*
* dump the bigrams
*/
/*
* encode the massaged paths
*/
{
z = strtol(s, &t, 0);
s = t;
if (z < 0 || z > 2 * FF_OFF)
{
}
else
while (n = *s++)
{
if (!(m = *s++))
{
break;
}
else
{
}
}
}
goto bad;
break;
case FF_typ:
goto bad;
{
return -1;
}
/*
* commit the output file
*/
goto badcreate;
/*
* write the header magic
*/
/*
* write the type table in index order starting with 1
*/
/*
* append the front compressed strings
*/
{
goto bad;
}
goto bad;
break;
}
return 0;
bad:
{
}
return -1;
}
/*
* close an open fastfind stream
*/
int
{
int n = 0;
if (!fp)
return -1;
{
}
else
{
n = 0;
}
return n;
}