tr.c revision 3f54fd611f536639ec30dd53c48e5ec1897cc7d9
/***********************************************************************
* *
* This software is part of the ast package *
* Copyright (c) 1992-2012 AT&T Intellectual Property *
* and is licensed under the *
* Eclipse Public License, Version 1.0 *
* by AT&T Intellectual Property *
* *
* A copy of the License is available at *
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
* *
* Information and Software Systems Research *
* AT&T Research *
* Florham Park NJ *
* *
* Glenn Fowler <gsf@research.att.com> *
* David Korn <dgk@research.att.com> *
* *
***********************************************************************/
#pragma prototyped
/*
* David Korn
* Glenn Fowler
* AT&T Research
*
* tr
*/
static const char usage[] =
"[-?\n@(#)$Id: tr (AT&T Research) 2012-05-31 $\n]"
"[+DESCRIPTION?\btr\b copies the standard input to the standard output"
" with substitution or deletion of selected characters. Input"
" characters in \aset1\a are mapped to corresponding characters"
" in \aset2\a.]"
"[c:complement?Complement \aset1\a.]"
"[d:delete?Delete characters in \aset1\a but do not translate.]"
"[s:squeeze-repeats?Replace sequences of the same character with one.]"
"[t:truncate-set1?Truncate \aset1\a to the length of \aset2\a.]"
"[+?\asets\a are specified as strings of characters. Most represent"
" themselves. Interpreted sequences are:]{"
" [+\\nnn?character with octal value \annn\a]"
" [+\\xnn?character with hexadecimal value \ann\a]"
" [+\\\\?backslash]"
" [+\\a?alert]"
" [+\\b?backpace]"
" [+\\f?form feed]"
" [+\\r?return]"
" [+\\t?horizontal tab]"
" [+\\v?vertical tab]"
" [+\\E?escape]"
" [+c1-c2?all characters from \ac1\a to \ac2\a in ascending order]"
" [+[c1-c2]]?same as \ac1-c2\a if both \asets\a use this form]"
" [+[[c*]]]]?in \aset2\a, copies of \\ac\\a until length of \aset1\a]"
" [+[[c*n]]]]?\\an\\a copies of \\ac\\a]"
" [+[[::alnum::]]]]?all letters and digits]"
" [+[[::alpha::]]]]?all letters]"
" [+[[::blank::]]]]?all horizontal whitespace]"
" [+[[::cntrl::]]]]?all control characters]"
" [+[[::digit::]]]]?all digits]"
" [+[[::graph::]]]]?all printable characters, not including space]"
" [+[[::lower::]]]]?all lower case letters]"
" [+[[::print::]]]]?all printable characters, including space]"
" [+[[::punct::]]]]?all punctuation characters]"
" [+[[::space::]]]]?all horizontal or vertical whitespace]"
" [+[[::upper::]]]]?all upper case letters]"
" [+[[::xdigit::]]]]?all hexadecimal digits]"
" [+[[=c=]]]]?all characters which are equivalent to \\ac\\a]"
" }"
"[+?Translation occurs if \b-d\b is not given and both \aset1\a"
" and \aset2\a appear. \b-t\b may be used only when translating."
" \aset2\a is extended to the length of \aset1\a by repeating its last"
" character as necessary. Excess characters in \aset2\a are ignored."
" Only [:lower:]] and [:upper:]] are guaranteed to expand in ascending"
" order. They may only be used in pairs to specify case conversion."
" \b-s\b uses \aset1\a if neither translating nor deleting, otherwise"
" squeeze uses \aset2\a and occurs after translation or deletion.]"
"\n"
"\n[ set1 [ set2 ] ]\n"
"\n"
"[+SEE ALSO?\bsed\b(1), \bascii\b(5)]"
;
#include <cmd.h>
#include <ctype.h>
#include <error.h>
#include <regex.h>
#define TR_COMPLEMENT (1<<0)
#define setchar(p,s,t) ((p)->type=(t),(p)->prev=(p)->last=(-1),(p)->isit=0,(p)->count=0,(p)->base=(p)->next=(s))
typedef struct
{
int convert;
int count;
int prev;
int last;
int level;
int position;
int src;
int dst;
int type;
int truncate;
unsigned char* base;
unsigned char* next;
unsigned char* hold;
} Tr_t;
/*
* return next string character
* the string pointer is advanced
* returns -1 for end of string
* returns -2 for string format error
*/
static int
{
register int c;
int q;
unsigned char* e;
regclass_t f;
char buf[32];
/*
*/
{
}
/*
* tr.last>=0 when string contains char class
*/
next:
{
return (!tr->type || !tr->convert) ? tr->prev : tr->convert == 'l' ? tolower(tr->prev) : toupper(tr->prev);
}
{
case 0:
break;
case '\\':
break;
case '[':
{
case ':':
f = 0;
{
{
goto member;
}
{
return -2;
}
}
{
if (f)
c = ':';
goto member;
}
if (f)
case '.':
case '=':
{
c = q ? buf[0] : 0;
break;
}
/*FALLTHROUGH*/
{
while (*++e && *e != c && *e != ']');
if (*e != ']' && *++e == ']')
return -2;
}
default:
{
{
{
return -2;
}
return -2;
{
/*
* tr->src chars total
* tr->dst chars so far
* count what's left
*/
}
goto next;
}
}
break;
}
break;
case '-':
{
return -2;
goto next;
}
break;
case ']':
{
}
break;
}
}
/*
* return a tr handle for <src,dst>
*/
static Tr_t*
{
register int c;
register int n;
register int x;
register int squeeze;
{
return 0;
}
{
case TR_DELETE:
case TR_SQUEEZE:
case TR_DELETE|TR_SQUEEZE:
break;
default:
break;
}
dst = 0;
for (n = 0; n < (1<<CHAR_BIT); n++)
n = 0;
if (src)
{
{
#if DEBUG_TRACE
#endif
set[n++] = c;
}
if (c < -1)
goto bad;
}
if (flags & TR_COMPLEMENT)
{
for (n = c = 0; n < (1<<CHAR_BIT); n++)
set[c++] = n;
}
{
else if (dst)
{
{
#if DEBUG_TRACE
#endif
}
else if (x < -1)
goto bad;
{
{
}
break;
}
}
else
{
x = squeeze ? c : 0;
}
}
{
{
#if DEBUG_TRACE
#endif
}
if (x < -1)
goto bad;
}
return tr;
bad:
return 0;
}
/*
* close a tr handle
*/
void
{
}
/*
* tr each char of ip and put results to op
* stop after <ncopy> bytes are written
*/
static ssize_t
{
register int c;
register int oldc = -1;
register unsigned char* inp = 0;
register unsigned char* outp = 0;
register unsigned char* inend;
register unsigned char* outend = 0;
unsigned char* inbuff = 0;
unsigned char* outbuff = 0;
{
{
{
return -1;
}
break;
}
/*
* process the next input buffer
*/
{
{
/*
* write out current buffer
*/
{
break;
}
/*
* get write buffer space
*/
if (!(outbuff = (unsigned char*)sfreserve(op, (ncopy < 0) ? SF_UNBOUND : (ncopy - nwrite), SF_LOCKR)))
break;
}
{
*outp++ = c;
}
}
}
{
if (!ERROR_PIPE(errno))
return -1;
}
return nwrite;
}
int
{
register int flags = 0;
flags = 0;
for (;;)
{
{
case 'c':
flags |= TR_COMPLEMENT;
continue;
case 'd':
continue;
case 's':
flags |= TR_SQUEEZE;
continue;
case 't':
flags |= TR_TRUNCATE;
continue;
case ':':
continue;
case '?':
continue;
}
break;
}
if (error_info.errors)
if (tr = tropen((unsigned char*)argv[0], (unsigned char*)argv[0] ? (unsigned char*)argv[1] : (unsigned char*)0, flags))
{
}
return error_info.errors != 0;
}