ctags.c revision 23a1ccea6aac035f084a7a4cdc968687d1b02daf
/*
*/
/* Copyright (c) 1988 AT&T */
/* All Rights Reserved */
/*
* Copyright (c) 1980 Regents of the University of California.
* All rights reserved. The Berkeley software License Agreement
* specifies the terms and conditions for redistribution.
*/
/*
* Modify ctags to handle C++ in C_entries(), etc:
* - Handles C++ comment token "//"
* - Handles C++ scope operator "::".
* This helps to distinguish between xyz()
* definition and X::xyz() definition.
* - Recognizes C++ reserved word "class" in typedef processing
* (for "-t" option)
* - Handles Sun C++ special file name extensions: .c, .C, .cc, and .cxx.
* Doesn't handle yet:
* - inline functions in class definition (currently they get
* swallowed within a class definition)
* - Tags with scope operator :: with spaces in between,
* e.g. classz ::afunc
*
* Enhance operator functions support:
* - Control flow involving operator tokens scanning are
* consistent with that of other function tokens - original
* hacking method for 2.0 is removed. This will accurately
* identify tags for declarations of the form 'operator+()'
* (bugid 1027806) as well as allowing spaces in between
* 'operator' and 'oprtk', e.g. 'operator + ()'.
*
*/
#ifndef lint
char copyright[] = "@(#) Copyright (c) 1980 Regents of the University of "
"California.\nAll rights reserved.\n";
#endif
#include <stdio.h>
#include <ctype.h>
#include <locale.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
/*
* ctags: create a tags file
*/
#define bool char
#define TRUE (1)
#define FALSE (0)
/* an overloaded operator token */
struct nd_st { /* sorting structure */
char *entry; /* function or type name */
char *file; /* file name */
bool f; /* use pattern or line no */
int lno; /* for -x option */
char *pat; /* search pattern */
bool been_warned; /* set if noticed dup */
};
long ftell();
static bool
number, /* T if on line starting with # */
gotone, /* found a func already on line */
/* boolean "func" (see init) */
/* boolean array for overloadable operator symbols */
static bool _opr[0177];
/*
* typedefs are recognized using a simple finite automata,
* tydef is its state variable.
*/
static int lineno; /* line number of current line */
static char
*curfile, /* current input file name */
*endtk = " \t\n\"'#()[]{}=-+%*/&|^~!<>;,.:?",
/* token ending chars */
*begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz",
/* token starting chars */
*intk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"
"0123456789",
/* valid in-token chars */
static int file_num; /* current file number */
static int aflag; /* -a: append to tags */
#ifndef XPG4 /* XPG4: handle typedefs by default */
static int tflag; /* -t: create tags for typedefs */
#endif /* !XPG4 */
static int uflag; /* -u: update tags */
static int wflag; /* -w: suppress warnings */
static int vflag; /* -v: create vgrind style index output */
static int xflag; /* -x: create cxref style output */
static FILE
*inf, /* ioptr for current input file */
*outf; /* ioptr for tags file */
static long lineftell; /* ftell after getc( inf ) == '\n' */
#ifdef __STDC__
#else
#endif
static int infile_fail; /* Count of bad opens. Fix bug ID #1082298 */
static int pfcnt;
static int mac; /* our modified argc, after parseargs() */
static char **mav; /* our modified argv, after parseargs() */
/* our local functions: */
static void init();
static void find_entries(char *file);
static void pfnote();
static void C_entries();
static void Y_entries();
static char *toss_comment(char *start);
static void takeprec();
static void getit();
static int first_char();
static void toss_yysec();
static void Usage();
int
{
int i;
char cmd[100];
#if !defined(TEXT_DOMAIN)
#define TEXT_DOMAIN "SYS_TEST"
#endif
(void) textdomain(TEXT_DOMAIN);
switch (i) {
case 'a': /* -a: Append output to existing tags file */
aflag++;
break;
case 'B': /* -B: Use backward search patterns (?...?) */
searchar = '?';
break;
case 'F': /* -F: Use forward search patterns (/.../) */
searchar = '/';
break;
case 't': /* -t: Create tags for typedefs. */
/* for XPG4 , we silently ignore "-t". */
#ifndef XPG4
tflag++;
#endif /* !XPG4 */
break;
case 'u': /* -u: Update the specified tags file */
uflag++;
break;
case 'v': /* -v: Index listing on stdout */
vflag++;
xflag++;
break;
case 'w': /* -w: Suppress warnings */
wflag++;
break;
case 'x': /* -x: Produce a simple index */
xflag++;
break;
case 'f': /* -f tagsfile: output to tagsfile */
break;
default:
Usage(); /* never returns */
break;
}
}
/* if we didn't specify any source code to parse, complain and die. */
Usage(); /* never returns */
}
init(); /* set up boolean "functions" */
/*
* loop through files finding functions
*/
if (xflag) {
}
if (uflag) {
for (i = 1; i < mac; i++) {
"mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
}
aflag++;
}
exit(1);
}
if (uflag) {
}
}
/*
* This routine sets up the boolean psuedo-functions which work
* by seting boolean flags dependent upon the corresponding character
* Every char which is NOT in that string is not a white char. Therefore,
* all of the array "_wht" is set to FALSE, and then the elements
* subscripted by the chars in "white" are set to TRUE. Thus "_wht"
* of a char is TRUE if it is the string "white", else FALSE.
*/
static void
init()
{
char *sp;
int i;
for (i = 0; i < 0177; i++) {
/* array of operator symbols */
}
/* mark overloadable operator symbols */
}
/*
* This routine opens the specified file and calls the function
* which finds the function and type definitions.
*/
static void
char *file;
{
char *cp;
/* skip anything that isn't a regular file */
return;
infile_fail++; /* Count bad opens. ID #1082298 */
return;
}
lineno = 0;
#ifdef __STDC__
#else
#endif
/* .l implies lisp or lex source code */
#ifdef __STDC__
#else
#endif
{
return;
} else { /* lex */
/*
* throw away all the code before the second "%%"
*/
toss_yysec();
toss_yysec();
C_entries();
return;
}
}
/* .y implies a yacc file */
toss_yysec();
Y_entries();
C_entries();
return;
}
/*
* Add in file name extension support for Sun C++ which
* permits .C/.c (AT&T), .cc (G++) and .cxx (Gloksp.)
*/
/* if not a .c, .C, .cc, .cxx or .h file, try fortran */
return;
}
}
C_entries();
}
static void
char *name;
int ln;
bool f; /* f == TRUE when function */
{
char *fp;
char *nametk; /* hold temporary tokens from name */
gettext("ctags: too many entries to sort\n"));
}
#ifdef __STDC__
#else
#endif
if (fp == 0)
else
fp++;
#ifdef __STDC__
#else
#endif
/* Chop off .cc and .cxx as well as .c, .h, etc */
*fp = 0;
}
/* remove in-between blanks operator function tags */
#ifdef __STDC__
#else
#endif
{
}
np->f = f;
if (xflag == 0) {
lbuf[50] = 0;
lbuf[50] = 0;
}
else
}
/*
* This routine finds functions and typedefs in C syntax and adds them
* to the list.
*/
static void
{
int c;
int level;
char *sp;
long int tokftell;
level = 0;
lineno++;
for (;;) {
break;
if (c == '\n') {
lineno++;
} else if (c == '\\') {
lineno++;
c = ' ';
}
} else if (incomm) {
if (c == '*') {
continue;
/* c == EOF 1091005 */
if ((c == '\n') || (c == EOF)) {
lineno++;
}
if (c == '/')
}
} else if (inquote) {
/*
* Too dumb to know about \" not being magic, but
* they usually occur in pairs anyway.
*/
if (c == '"')
continue;
} else if (inchar) {
if (c == '\'')
continue;
goto dotoken;
} else switch (c) {
case '"':
continue;
case '\'':
continue;
case '/':
/* Handles the C++ comment token "//" */
if (c == '*')
else if (c == '/') {
/*
* Skip over all the characters after
* "//" until a newline character. Now also
* includes fix for 1091005, check for EOF.
*/
do {
/* 1091005: */
} while ((c != '\n') && (c != EOF));
/*
* Fixed bugid 1030014
* Return the current position of the
* file after the newline.
*/
lineno++;
*--sp = c;
}
else
continue;
case '#':
continue;
case '{':
}
level++;
continue;
case '}':
/*
* Heuristic for function or structure end;
* common for #ifdef/#else blocks to add extra "{"
*/
level = 0; /* reset */
else
level--;
}
goto dotoken;
continue;
}
if (midtoken) {
if (endtoken(c)) {
/*
*
* ':' +---> ':' -> midtok
*
* +---> operator{+,-, etc} -> midtok
* (continue)
* +---> endtok
*/
/*
* Enhance operator function support and
* fix bugid 1027806
*
* For operator token, scanning will continue until
* '(' is found. Spaces between 'operater' and
* 'oprtk' are allowed (e.g. 'operator + ()'), but
* will be removed when the actual entry for the tag
* is made.
* Note that functions of the form 'operator ()(int)'
* will be recognized, but 'operator ()' will not,
* even though this is legitimate in C.
*/
if (optoken(c)) {
if (isoperator) {
if (optfound) {
if (c != '(') {
tp++;
goto next_char;
}
} else {
if (c != ' ') {
}
tp++;
goto next_char;
}
} else {
/* start: this code shifted left for cstyle */
/* This is an overloaded operator */
isoperator = TRUE;
if (c != ' ') {
}
tp++;
goto next_char;
} else if (c == '~') {
/* This is a destructor */
tp++;
goto next_char;
}
/* end: above code shifted left for cstyle */
}
} else if (c == ':') {
tp += 2;
c = *sp;
goto next_char;
} else {
--sp;
}
}
/* start: this code shifted left for cstyle */
{
int f;
gotone = f; /* function */
}
}
/* end: above code shifted left for cstyle */
} else if (intoken(c))
tp++;
} else if (begtoken(c)) {
}
}
sp++;
/* The "c == }" was added to fix #1034126 */
}
}
}
/*
* This routine checks to see if the current token is
* at the start of a function, or corresponds to a typedef
* It updates the input line * so that the '(' will be
* in it when it returns.
*/
static int
int *f;
{
char *sp;
int c;
static bool found;
bool firsttok; /* T if have seen first token in ()'s */
int bad;
*f = 1; /* a function */
c = *sp;
if (!number) { /* space is not allowed in macro defs */
while (iswhite(c)) {
lineno++;
goto ret;
}
}
/* the following tries to make it so that a #define a b(c) */
/* doesn't count as a define of b. */
} else {
found = 0;
else
found++;
if (found >= 2) {
goto ret;
}
}
/* check for the typedef cases */
#ifdef XPG4
#else /* !XPG4 */
#endif /* XPG4 */
goto badone;
}
/* Handles 'class' besides 'struct' etc. */
goto badone;
}
goto badone;
}
goto badone;
}
goto gottydef; /* Fall through to "tydef==end" */
}
*f = 0;
goto ret;
}
if (c != '(')
goto badone;
lineno++;
goto ret;
}
/*
* This line used to confuse ctags:
* int (*oldhup)();
* This fixes it. A nonwhite char before the first
* token, other than a / (in case of a comment in there)
* makes this not a declaration.
*/
if (begtoken(c) || c == '/')
goto badone;
}
lineno++;
break;
}
ret:
if (c == '\n')
lineno--;
/* hack for typedefs */
}
/*
* Y_entries:
* Find the yacc tags and put them in.
*/
static void
{
int brace;
brace = 0;
switch (*sp) {
case '\n':
lineno++;
/* FALLTHROUGH */
case ' ':
case '\t':
case '\f':
case '\r':
break;
case '"':
do {
while (*++sp != '"')
continue;
break;
case '\'':
do {
while (*++sp != '\'')
continue;
break;
case '/':
if (*++sp == '*')
else
--sp;
break;
case '{':
brace++;
break;
case '}':
brace--;
break;
case '%':
return;
break;
case '|':
case ';':
break;
default:
*sp == '.' ||
*sp == '_')) {
++sp;
*sp == '.')
sp++;
sp++;
first_char() == ':')) {
}
else
sp--;
}
break;
}
}
static char *
char *start;
{
char *sp;
/*
* first, see if the end-of-comment is on the same line
*/
do {
#ifdef __STDC__
#else
#endif
return (++sp);
else
lineno++;
/*
* running this through lint revealed that the original version
* of this routine didn't explicitly return something; while
* the return value was always used!. so i've added this
* next line.
*/
return (sp);
}
static void
long int where;
{
char *cp;
#ifdef __STDC__
#else
#endif
if (cp)
*cp = 0;
}
static void
{
while (node) {
}
}
static void
{
int dif;
if (dif == 0) {
if (!wflag) {
gettext("Duplicate entry in file %s, line %d: %s\n"),
gettext("Second entry ignored\n"));
}
return;
}
if (!cur_node->been_warned)
if (!wflag) {
"entry in files %s and %s: %s "
"(Warning only)\n"),
}
return;
}
if (dif < 0) {
else
return;
}
else
}
static void
{
char *sp;
return;
/*
* while the code in the following #ifdef section could be combined,
* it's explicitly separated here to make maintainance easier.
*/
#ifdef XPG4
/*
* POSIX 2003: we no longer have a "-t" flag; the logic is
* automatically assumed to be "turned on" here.
*/
if (xflag == 0) {
if (*sp == '\\')
else
} else if (vflag)
else
#else /* XPG4 */
/*
* original way of doing things. "-t" logic is only turned on
* when the user has specified it via a command-line argument.
*/
if (xflag == 0)
if (node->f) { /* a function */
if (*sp == '\\')
else
} else { /* a typedef; text pattern inadequate */
} else if (vflag)
else
#endif /* XPG4 */
}
static int
{
pfcnt = 0;
lineno++;
dbp++;
if (*dbp == 0)
continue;
switch (*dbp |' ') {
case 'i':
if (tail("integer"))
takeprec();
break;
case 'r':
if (tail("real"))
takeprec();
break;
case 'l':
if (tail("logical"))
takeprec();
break;
case 'c':
takeprec();
break;
case 'd':
if (tail("double")) {
dbp++;
if (*dbp == 0)
continue;
if (tail("precision"))
break;
continue;
}
break;
}
dbp++;
if (*dbp == 0)
continue;
switch (*dbp|' ') {
case 'f':
if (tail("function"))
getit();
continue;
case 's':
if (tail("subroutine"))
getit();
continue;
case 'p':
if (tail("program")) {
getit();
continue;
}
if (tail("procedure"))
getit();
continue;
}
}
return (pfcnt);
}
static int
char *cp;
{
int len = 0;
if (*cp == 0) {
return (1);
}
return (0);
}
static void
takeprec()
{
dbp++;
if (*dbp != '*')
return;
dbp++;
dbp++;
--dbp; /* force failure */
return;
}
do
dbp++;
}
static void
getit()
{
char *cp;
char c;
;
*--cp = 0; /* zap newline */
dbp++;
return;
continue;
c = cp[0];
cp[0] = 0;
cp[0] = c;
pfcnt++;
}
static char *
char *cp;
{
int len;
char *dp;
return (dp);
}
#ifndef __STDC__
/*
* Return the ptr in sp at which the character c last
* appears; NULL if not found
*
* Identical to v7 rindex, included for portability.
*/
static char *
char *sp, c;
{
char *r;
r = NULL;
do {
if (*sp == c)
r = sp;
} while (*sp++);
return (r);
}
#endif
/*
* lisp tag functions
* just look for (def or (DEF
*/
static void
{
int special;
pfcnt = 0;
lineno++;
if (dbp[0] == '(' &&
dbp += 4;
else
dbp++;
dbp++;
}
}
}
static void
int special;
{
char *cp;
char c;
continue;
*--cp = 0; /* zap newline */
if (*dbp == 0)
return;
if (special) {
#ifdef __STDC__
#else
#endif
return;
cp--;
return;
cp++;
}
else
continue;
c = cp[0];
cp[0] = 0;
cp[0] = c;
pfcnt++;
}
/*
* striccmp:
* Compare two strings over the length of the second, ignoring
* case distinctions. If they are the same, return 0. If they
* are different, return the difference of the first two different
* characters. It is assumed that the pattern (second string) is
* completely lower case.
*/
static int
{
int c1;
while (*pat) {
else
pat++;
str++;
}
return (0);
}
/*
* first_char:
* Return the first non-blank character in the file. After
* finding it, rewind the input file so we start at the beginning
* again.
*/
static int
{
int c;
long off;
if (!isspace(c) && c != '\r') {
return (c);
}
return (EOF);
}
/*
* toss_yysec:
* Toss away code until the next "%%" line.
*/
static void
{
for (;;) {
return;
lineno++;
return;
}
}
static void
Usage()
{
#ifdef XPG4
#else /* !XPG4 */
#endif /* XPG4 */
"[-f tagsfile] file ...\n"));
exit(1);
}
/*
* parseargs(): modify the args
* the purpose of this routine is to transform any ancient argument
* usage into a format which is acceptable to getopt(3C), so that we
* retain backwards Solaris 2.[0-4] compatibility.
*
* This routine allows us to make full use of getopts, without any
* funny argument processing in main().
*
* The other alternative would be to hand-craft the processed arguments
* during and after getopt(3C) - which usually leads to uglier code
* in main(). I've opted to keep the ugliness isolated down here,
* instead of in main().
*
* In a nutshell, if the user has used the old Solaris syntax of:
* ctags [-aBFtuvwx] [-f tagsfile] filename ...
* We simply change this into:
* ctags [-a] [-B] [-F] [-t] [-u] [-v] [-w] [-x] [-f tags] file...
*
* If the user has specified the new getopt(3C) syntax, we merely
* copy that into our modified argument space.
*/
static void
int ac; /* argument count */
char **av; /* ptr to original argument space */
{
int i; /* current argument */
int a; /* used to parse combined arguments */
int fflag; /* 1 = we're only parsing filenames */
perror("Can't malloc argument space");
exit(1);
}
/* for each argument, see if we need to change things: */
}
/*
* if the argument starts with a "-", and has more than
* 1 flag, then we have to search through each character,
* and separate any flags which have been combined.
*
* so, if we've found a "-" string which needs separating:
*/
if (fflag == 0 && /* not handling filename args */
/* then for each flag after the "-" sign: */
for (a = 1; av[i][a]; a++) {
/* copy the flag into mav space. */
if (a > 1) {
/*
* we need to call realloc() after the
* 1st combined flag, because "ac"
* doesn't include combined args.
*/
mav_sz += sizeof (char *);
(char **)NULL) {
perror("Can't realloc "
"argument space");
exit(1);
}
}
(char *)NULL) {
perror("Can't malloc argument space");
exit(1);
}
++mac;
}
} else {
/* otherwise, just copy the argument: */
perror("Can't malloc argument space");
exit(1);
}
++mac;
}
}
}