iconv.c revision 3f54fd611f536639ec30dd53c48e5ec1897cc7d9
/***********************************************************************
* *
* This software is part of the ast package *
* Copyright (c) 1989-2012 AT&T Intellectual Property *
* and is licensed under the *
* Eclipse Public License, Version 1.0 *
* by AT&T Intellectual Property *
* *
* A copy of the License is available at *
* http://www.eclipse.org/org/documents/epl-v10.html *
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
* *
* Information and Software Systems Research *
* AT&T Research *
* Florham Park NJ *
* *
* Glenn Fowler <gsf@research.att.com> *
* *
***********************************************************************/
#pragma prototyped
/*
* Glenn Fowler
* AT&T Research
*/
static const char usage[] =
"[-?\n@(#)$Id: iconv (AT&T Research) 2011-01-11 $\n]"
USAGE_LICENSE
"[+NAME?iconv - codeset conversion]"
"[+DESCRIPTION?\biconv\b converts the encoding of characters in the \afile\a"
" operands from one codeset to another and writes the results to"
" the standard output. If \afile\a is \b-\b or omitted then the"
" standard input is read.]"
"[+?Character encodings in either codeset may include single-byte values"
" (for example, for the ISO 8859-1:1987 standard characters) or"
" multi-byte values (for example, for certain characters in the"
" ISO 6937:1983 standard). Invalid characters in the input stream"
" (either those that are not valid members of the input codeset or"
" those that have no corresponding value in the output codeset) are"
" output as the underscore character (\b_\b) in the output codeset.]"
"[+?The \bnative\b codeset is determined by the \bLANG\b, \bLC_ALL\b and"
" \bLC_CTYPE\b environment variables. The supported codesets"
" are matched by these left-anchored case-insensitive \bksh\b(1)"
" patterns:]{\fcodesets\f}"
"[+?Conversion between certain codesets may not be supported. Also, since the"
" standard(s) provide no support for listing the known codesets, the"
" above list may be incomplete.]"
"[a:all?List all conversion errors. By default (and \b--omit\b is not "
"specified) \biconv\b stops after the first error.]"
"[c:omit?Omit invalid input characters from the output. Invalid input "
"characters still affect the exit status.]"
"[e:errors?Do not ignore conversion errors.]"
"[f:from?The input codeset is set to \acodeset\a.]:[codeset:=native]"
"[i:ignore?Ignore conversion errors.]"
"[l:list?List all known codesets on the standard output.]"
"[s:silent?Suppress invalid character diagnostics. Invalid input "
"characters still affect the exit status. If \b--all\b is also specified "
"then non-zero invalid character counts are listed.]"
"[t:to?The output codeset is set to \acodeset\a.]:[codeset:=native]"
"\n"
"\n[ pid ... ]\n"
"\n"
"[+SEE ALSO?\bdd\b(1), \biconv\b(3), \bsetlocale\b(3)]"
;
#include <ast.h>
#include <error.h>
#include <iconv.h>
/*
* optget() info discipline function
*/
static int
optinfo(Opt_t* op, Sfio_t* sp, const char* s, Optdisc_t* dp)
{
register iconv_list_t* ic;
register const char* p;
register int c;
if (streq(s, "codesets"))
for (ic = iconv_list(NiL); ic; ic = iconv_list(ic))
{
sfputc(sp, '[');
sfputc(sp, '+');
sfputc(sp, '\b');
p = ic->match;
if (*p == '(')
p++;
while (c = *p++)
{
if (c == ')' && !*p)
break;
if (c == '?' || c == ']')
sfputc(sp, c);
sfputc(sp, c);
}
sfputc(sp, '?');
p = ic->desc;
while (c = *p++)
{
if (c == ']')
sfputc(sp, c);
sfputc(sp, c);
}
sfputc(sp, ']');
}
return 0;
}
static int
listall(void)
{
register iconv_list_t* ic;
register const char* p;
sfprintf(sfstdout, "Patterns:\n\n");
for (ic = iconv_list(NiL); ic; ic = iconv_list(ic))
sfprintf(sfstdout, " %s -- %s\n", ic->match, ic->desc);
p = "/usr/bin/iconv";
if (!access(p, X_OK) || !access(p += 4, X_OK))
{
sfprintf(sfstdout, "\n");
execl(p, "iconv", "-l", 0);
}
return 0;
}
int
main(int argc, register char** argv)
{
char* file;
char* from;
char* to;
iconv_t cvt;
int all;
int fail;
int ignore;
int list;
Sfio_t* ip;
Optdisc_t od;
Iconv_disc_t id;
NoP(argc);
error_info.id = "iconv";
from = to = "native";
all = ignore = list = 0;
setlocale(LC_ALL, "");
/*
* set up the disciplines
*/
optinit(&od, optinfo);
iconv_init(&id, errorf);
id.flags |= ICONV_FATAL;
/*
* grab the options
*/
for (;;)
{
switch (optget(argv, usage))
{
case 'a':
all = 1;
id.flags &= ~ICONV_FATAL;
continue;
case 'c':
id.flags |= ICONV_OMIT;
id.flags &= ~ICONV_FATAL;
continue;
case 'e':
ignore = 0;
continue;
case 'f':
from = opt_info.arg;
continue;
case 'i':
ignore = 1;
continue;
case 'l':
list = 1;
continue;
case 's':
id.errorf = 0;
continue;
case 't':
to = opt_info.arg;
continue;
case '?':
error(ERROR_USAGE|4, "%s", opt_info.arg);
break;
case ':':
error(2, "%s", opt_info.arg);
break;
}
break;
}
argv += opt_info.index;
if (error_info.errors)
error(ERROR_USAGE|4, "%s", optusage(NiL));
if (list)
return listall();
if ((cvt = iconv_open(to, from)) == (iconv_t)(-1))
{
if ((cvt = iconv_open(to, "utf-8")) == (iconv_t)(-1))
error(3, "%s: unknown destination codeset", to);
iconv_close(cvt);
if ((cvt = iconv_open("utf-8", from)) == (iconv_t)(-1))
error(3, "%s: unknown source codeset", from);
iconv_close(cvt);
error(3, "cannot convert from %s to %s", from, to);
}
fail = 0;
if (file = *argv)
argv++;
do
{
if (!file || streq(file, "-"))
{
file = "/dev/stdin";
ip = sfstdin;
}
else if (!(ip = sfopen(NiL, file, "r")))
{
error(ERROR_SYSTEM|2, "%s: cannot open", file);
continue;
}
id.errors = 0;
iconv_move(cvt, ip, sfstdout, SF_UNBOUND, &id);
if (!id.errors && (!sfeof(ip) || sferror(ip)))
error(ERROR_SYSTEM|2, "%s: conversion read error", file);
if (id.errors)
{
if (ignore || !id.errors)
fail = 1;
else if (!id.errorf && all)
{
if (id.errors == 1)
error(2, "%s: %d character conversion error", file, id.errors);
else if (id.errors)
error(2, "%s: %d character conversion errors", file, id.errors);
}
}
if (ip != sfstdin)
sfclose(ip);
} while (file = *argv++);
if (sfsync(sfstdout))
error(ERROR_SYSTEM|3, "conversion write error");
return error_info.errors != 0 || fail;
}