cmd/msgcc/msgcc.sh

	msgcc.sh revision 3f54fd611f536639ec30dd53c48e5ec1897cc7d9
########################################################################
#                                                                      #
#               This software is part of the ast package               #
#          Copyright (c) 2000-2011 AT&T Intellectual Property          #
#                      and is licensed under the                       #
#                 Eclipse Public License, Version 1.0                  #
#                    by AT&T Intellectual Property                     #
#                                                                      #
#                A copy of the License is available at                 #
#          http://www.eclipse.org/org/documents/epl-v10.html           #
#         (with md5 checksum b35adb5213ca9657e911e9befb180842)         #
#                                                                      #
#              Information and Software Systems Research               #
#                            AT&T Research                             #
#                           Florham Park NJ                            #
#                                                                      #
#                 Glenn Fowler <gsf@research.att.com>                  #
#                                                                      #
########################################################################
: C language message catalog compiler

# NOTE: all variable names match __*__ to avoid clash with msgcpp def vars

__command__=msgcc
integer __similar__=30

case `(getopts '[-][123:xyz]' opt --xyz; echo 0$opt) 2>/dev/null` in
0123)   ARGV0="-a $__command__"
    USAGE=$'
[-?
@(#)$Id: msgcc (AT&T Labs Research) 2010-10-20 $
]
'$USAGE_LICENSE$'
[+NAME?msgcc - C language message catalog compiler]
[+DESCRIPTION?\bmsgcc\b is a C language message catalog compiler. It accepts
    \bcc\b(1) style options and arguments. A \bmsgcpp\b(1) \b.mso\b file
    is generated for each input \b.c\b file. If the \b-c\b option is not
    specified then a \bgencat\b(1) format \b.msg\b file is generated from
    the input \b.mso\b and \b.msg\b files. If \b-c\b is not specified then
    a \b.msg\b suffix is appended to the \b-o\b \afile\a if it doesn\'t
    already have a suffix. The default output is \ba.out.msg\b if \b-c\b
    and \b-o\b are not specified.]
[+?If \b-M-new\b is not specified then messages are merged with those in the
    pre-existing \b-o\b file.]
[M?Set a \bmsgcc\b specific \aoption\a. \aoption\a may be:]:[-option]{
    [+mkmsgs?The \b-o\b file is assumed to be in \bmkmsgs\b(1) format.]
    [+new?Create a new \b-o\b file.]
    [+preserve?Messages in the \b-o\b file that are not in new
        \b.msg\b file arguments are preserved. The default is to
        either reuse the message numbers with new message text that
        is similar to the old or to delete the message text, leaving
        an unused message number.]
    [+set=\anumber\a?Set the message set number to \anumber\a. The default
        is \b1\b.]
    [+similar=\anumber\a?The message text similarity measure threshold.
        The similarity measure between \aold\a and \anew\a message
        text is 100*(2*gzip(\aold\a+\anew\a)/(gzip(\aold\a)+gzip(\anew\a))-1),
        where gzip(\ax\a) is the size of text \ax\a when compressed by
        \bgzip\b(1). The default threshold is '$__similar__$'. A
        threshold of \b0\b turns off message replacement, but unused
        old messages are still deleted. Use \b-M-preserve\b to preserve
        all old messages.]
    [+verbose?Trace similar message replacements on the standard error.]
}

file ...

[+SEE ALSO?\bcc\b(1), \bcpp\b(1), \bgencat\b(1), \bmsggen\b(1),
    \bmsgcpp\b(1), \bmsgcvt\b(1)]
'
    ;;
*)  ARGV0=""
    USAGE="M:[-option] [ cc-options ] file ..."
    ;;
esac

usage()
{
    OPTIND=0
    getopts $ARGV0 "$USAGE" OPT '-?'
    exit 2
}

keys()
{
    $1 --??keys -- 2>&1 | grep '^".*"$'
}

typeset -A __index__
typeset __keep__ __text__ __drop__ __oz__ __nz__ __z__ __hit__ __hit_i__
typeset __compile__ __debug__ __mkmsgs__ __preprocess__
typeset __merge__=1 __preserve__ __verbose__
integer __i__=0 __args__=0 __code__=0 __files__=0 __max__=0 __num__=0 __skip__=0
integer __set__=1 __sources__=0 __cmds__=0 __ndrop__=0 __new__=0 __old__=0
__out__=a.out.msg
__OUT__=

case " $* " in
*" --"*|*" -?"*)
    while   getopts $ARGV0 "$USAGE" OPT
    do  case $OPT in
        *)  break ;;
        esac
    done
    ;;
esac
while   :
do  case $# in
    0)  break ;;
    esac
    __arg__=$1
    case $__arg__ in
    -c) __compile__=1
        ;;
    -[DIU]*)__argv__[__args__]=$__arg__
        (( __args__++ ))
        ;;
    -E) __preprocess__=1
        ;;
    -M-debug)
        __debug__=1
        ;;
    -M-mkmsgs)
        __mkmsgs__=1
        ;;
    -M-new) __merge__=
        ;;
    -M-perserve)
        __preserve__=1
        ;;
    -M-set=*)
        __set__=$(msggen -s ${__arg__#*=}.1)
        ;;
    -M-similar=*)
        __similar__=${__arg__#*=}
        ;;
    -M-verbose)
        __verbose__=1
        ;;
    -o) case $# in
        1)  print -u2 $"$__command__: output argument expected"
            exit 1
            ;;
        esac
        shift
        __out__=${1%.*}.msg
        __OUT__=$1
        ;;
    [-+]*|*.[aAlLsS]*)
        ;;
    *.[cCiI]*|*.[oO]*)
        case $__arg__ in
        *.[oO]*);;
        *)  __srcv__[__files__]=$__arg__
            (( __sources__++ ))
            ;;
        esac
        __arg__=${__arg__##*/}
        __arg__=${__arg__%.*}.mso
        __objv__[__files__]=$__arg__
        (( __files__++ ))
        ;;
    *.ms[go])
        __objv__[__files__]=$__arg__
        (( __files__++ ))
        ;;
    *)  __cmdv__[__cmds__]=$__arg__
        (( __cmds__++ ))
        ;;
    esac
    shift
done
__arg__=${__out__##*/}
__arg__=${__arg__%.msg}
if  [[ -x $__arg__ ]]
then    __cmdv__[__cmds__]=$__arg__
    (( __cmds__++ ))
fi

# generate the .mso files

if  [[ $__OUT__ && $__compile__ ]]
then    __objv__[0]=$__OUT__
fi

if  (( __sources__ ))
then    for (( __i__=0; __i__<=__files__; __i__++ ))
    do  if  [[ ${__srcv__[__i__]} ]]
        then    if  (( __sources__ > 1 ))
            then    print "${__srcv__[__i__]}:"
            fi
            if  [[ $__preprocess__ ]]
            then    msgcpp "${__argv__[@]}" "${__srcv__[__i__]}"
            else    msgcpp "${__argv__[@]}" "${__srcv__[__i__]}" > "${__objv__[__i__]}"
            fi
        fi
    done
fi

# combine the .mso and .msg files

if  [[ ! $__compile__ && ! $__preprocess__ ]]
then    if  [[ $__merge__ && -r $__out__ ]]
    then    __tmp__=$__out__.tmp
        trap '__code__=$?; rm -f ${__tmp__}*; exit $__code__' 0 1 2
        while   read -r __line__
        do  if  (( $__skip__ ))
            then    if  [[ $__line__ == '%}'* ]]
                then    __skip__=0
                fi
                continue
            fi
            if  [[ $__mkmsgs__ && $__line__ == '%{'* ]]
            then    __skip__=1
                continue
            fi
            if  [[ $__mkmsgs__ ]]
            then    if  [[ $__line__ == '%#'*';;'* ]]
                then    __line__=${__line__#'%#'}
                    __num__=${__line__%';;'*}
                    read -r __line__
                elif    [[ $__line__ == %* ]]
                then    continue
                else    print -u2 $"$__command__: unrecognized line=$__line__"
                    __code__=1
                fi
            else    case $__line__ in
                +([0-9])' '*)
                    __num__=${__line__%%' '*}
                    __line__=${__line__#*'"'}
                    __line__=${__line__%'"'}
                    ;;
                *)  continue
                    ;;
                esac
            fi
            __index__["$__line__"]=$__num__
            __text__[$__num__]=$__line__
            if  (( __max__ < __num__ ))
            then    (( __max__=__num__ ))
            fi
        done < $__out__
        (( __new__=__max__+1 ))
    else    __tmp__=$__out__
        (( __new__=1 ))
    fi
    if  (( __code__ ))
    then    exit $__code__
    fi
    exec 1>$__tmp__ 9>&1
    print -r -- '$'" ${__out__%.msg} message catalog"
    print -r -- '$translation'" $__command__ $(date +%Y-%m-%d)"
    print -r -- '$set'" $__set__"
    print -r -- '$quote "'
    sort -u "${__objv__[@]}" | {
        __raw__=
        while   read -r __line__
        do  __op__=${__line__%% *}
            __line__=${__line__#* }
            case $__op__ in
            cmd)    __a1__=${__line__%% *}
                case $__a1__ in
                dot_cmd)    __a1__=. ;;
                esac
                keys $__a1__
                ;;
            def)    __a1__=${__line__%% *}
                __a2__=${__line__#* }
                eval $__a1__='$'__a2__
                ;;
            str)    print -r -- "$__line__"
                ;;
            raw)    __raw__=$__raw__$'\n'$__line__
                ;;
            var)    __a1__=${__line__%% *}
                __a2__=${__line__#* }
                case $__a1__ in
                [[:digit:]]*)
                    eval __v__='$'$__a2__
                    __v__='"'${__v__:__a1__+1}
                    ;;
                *)  eval __v__='$'$__a1__
                    ;;
                esac
                if  [[ $__v__ == '"'*'"' ]]
                then    print -r -- "$__v__"
                fi
                ;;
            [[:digit:]]*)
                [[ $__preserve__ ]] && print -r -- "$__line__"
                ;;
            '$')    print -r -u9 $__op__ include $__line__
                ;;
            esac
        done
        for (( __i__=0; __i__ < __cmds__; __i__++ ))
        do  keys ${__cmdv__[__i__]}
        done
        [[ $__raw__ ]] && print -r "${__raw__#?}" | sed -e 's/^"//' -e 's/"$//' -e 's/\\/&&/g' -e 's/"/\\"/g' -e 's/.*/$RAW$"&"/'
    } | {
        __num__=1
        while   read -r __line__
        do  case $__line__ in
            '$RAW$'*)
                ;;
            '$'[\ \ ]*)
                print -r -- "$__line__"
                continue
                ;;
            '$'*|*"@(#)"*|*"<"*([[:word:] .-])"@"*([[:word:] .-])">"*([     ])'"'|"http://"*)
                continue
                ;;
            *[[:alpha:]][[:alpha:]]*)
                ;;
            *)  continue
                ;;
            esac
            __line__=${__line__#*'"'}
            __line__=${__line__%'"'}
            if  [[ $__line__ ]]
            then    if  [[ ${__index__["$__line__"]} ]]
                then    if [[ ! $__preserve__ ]]
                    then    __num__=${__index__["$__line__"]}
                        __keep__[$__num__]=1
                    fi
                else    while    [[ ${__text__[$__num__]} ]]
                    do  (( __num__++ ))
                    done
                    if  (( __max__ < __num__ ))
                    then    (( __max__=__num__ ))
                    fi
                    if  [[ ! $__preserve__ ]]
                    then     __keep__[$__num__]=1
                    fi
                    __text__[$__num__]=$__line__
                    __index__["$__line__"]=$__num__
                    (( __num__++ ))
                fi
            fi
        done
        if  (( __max__ < __num__ ))
        then    (( __max__=__num__ ))
        fi
        if [[ $__debug__ ]]
        then    for (( __num__=1; __num__<=__max__; __num__++ ))
            do  if  [[ ${__text__[$__num__]} ]]
                then    if  (( __num__ > __new__ ))
                    then    if  [[ ! ${__keep__[$__num__]} ]]
                        then    print -r -u2 -- $__num__ HUH '"'"${__text__[$__num__]}"'"'
                        else    print -r -u2 -- $__num__ NEW '"'"${__text__[$__num__]}"'"'
                        fi
                    elif    [[ ${__keep__[$__num__]} ]]
                    then    print -r -u2 -- $__num__ OLD '"'"${__text__[$__num__]}"'"'
                    else    print -r -u2 -- $__num__ XXX '"'"${__text__[$__num__]}"'"'
                    fi
                fi
            done
            exit 0
        fi
        # check for replacements
        if  [[ ! $__preserve__ ]]
        then    for (( __num__=1; __num__<__new__; __num__++ ))
            do  if  [[ ${__text__[$__num__]} && ! ${__keep__[$__num__]} ]]
                then    (( __ndrop__++ ))
                    __drop__[__ndrop__]=$__num__
                fi
            done
            [[ $__verbose__ ]] && print -u2 $__command__: old:1-$((__new__-1)) new:$__new__-$__max__ drop $__ndrop__ add $((__max__-__new__+1))
            if  (( __ndrop__ ))
            then    for (( __i__=1; __i__<=__ndrop__; __i__++ ))
                do  (( __old__=${__drop__[$__i__]} ))
                    __oz__[__i__]=$(print -r -- "\"${__text__[$__old__]}\"" | gzip | wc -c)
                done
                for (( __num__=__new__; __num__<=__max__; __num__++ ))
                do  [[ ${__text__[$__num__]} ]] || continue
                    __nz__=$(print -r -- "\"${__text__[$__num__]}\"" | gzip | wc -c)
                    __hit__=0
                    (( __bz__=__similar__ ))
                    for (( __i__=1; __i__<=__ndrop__; __i__++ ))
                    do  if  (( __old__=${__drop__[$__i__]} ))
                        then    __z__=$(print -r -- "\"${__text__[$__old__]}\"""\"${__text__[$__num__]}\"" | gzip | wc -c)
                            (( __z__ = (__z__ * 200 / (${__oz__[__i__]} + $__nz__)) - 100 ))
                            if  (( __z__ < __bz__ ))
                            then    (( __bz__=__z__ ))
                                (( __hit__=__old__ ))
                                (( __hit_i__=__i__ ))
                            fi
                        fi
                    done
                    if  (( __hit__ ))
                    then    [[ $__verbose__ ]] && print -u2 $__command__: $__hit__ $__num__ $__bz__
                        __text__[$__hit__]=${__text__[$__num__]}
                        __keep__[$__hit__]=1
                        __drop__[$__hit_i__]=0
                        __text__[$__num__]=
                        __keep__[$__num__]=
                    fi
                done
            fi
        fi
        # final output
        for (( __num__=1; __num__<=__max__; __num__++ ))
        do  if  [[ ${__text__[$__num__]} && ( $__preserve__ || ${__keep__[$__num__]} ) ]]
            then    print -r -- $__num__ "\"${__text__[$__num__]}\""
            fi
        done
    }
    if [[ $__tmp__ != $__out__ ]]
    then    grep -v '^\$' $__tmp__ > ${__tmp__}n
        [[ -f $__out__ ]] && grep -v '^\$' $__out__ > ${__tmp__}o
        cmp -s ${__tmp__}n ${__tmp__}o || {
            [[ -f $__out__ ]] && mv $__out__ $__out__.old
            mv $__tmp__ $__out__
        }
    fi
fi
exit $__code__