#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
#
#
# This test checks whether the AST "cut" utility's "-d" option
# works with multibyte characters
#
# This was reported as CR #6904575 ("cut -d with multibyte character no longer works"):
# ------------ snip ------------
# cut -d with multibyte char no longer work correctly.
#
# $ echo $LANG
# ja
# $ od -tx1 mb.eucjp
# 0000000 a4 a2 a4 a4 a4 a4 a4 a6 a4 a8 0a
# 0000013
# $ od -tx1 delim
# 0000000 a4 a4 0a
# 0000003
# $ wc -m mb.eucjp
# 6 mb.eucjp
#
# It has 5 characters (2byte each).
#
# 0000000 0a
# 0000001
#
# correct output is
#
# 0000000 a4 a2 0a
# 0000003
#
# files are attached.
# ------------ snip ------------
#
# test setup
function err_exit
{
print -u2 -n "\t"
}
alias err_exit='err_exit $LINENO'
set -o nounset
integer Errors=0
typeset ocwd
typeset tmpdir
typeset out
# create temporary test directory
tmpdir="$(mktemp -t -d "test_sun_solaris_cr_6904575_cut_-d_with_multibyte_character_no_longer_works.XXXXXXXX")" || err_exit "Cannot create temporary directory"
# run tests
function test1
{
typeset cut_cmd="$1"
typeset testid
typeset out
typeset testname
compound saved_locale
# save locale information
[[ -v LC_ALL ]] && saved_locale.LC_ALL="${LC_ALL}"
[[ -v LC_CTYPE ]] && saved_locale.LC_CTYPE="${LC_CTYPE}"
[[ -v LANG ]] && saved_locale.LANG="${LANG}"
compound -r -a testcases=(
(
typeset name="ascii_plain"
typeset locale="C"
typeset input_format='abcdefg'
typeset output_format='abcd'
)
(
typeset name="unicode_plain"
typeset locale="<unicode>"
typeset input_format='abcd\u[20ac]fg'
typeset output_format='abcd'
)
(
typeset name="unicode_plain2"
typeset locale="<unicode>"
typeset input_format='abcd\u[20ac]fg'
typeset output_format='abcd\u[20ac]'
)
)
fi
else
fi
# build "cut_args" array with multibyte characters in the current locale
typeset -a cut_args
integer arg_index
done
#printf "args=|%q|\n" "${cut_args[@]}"
out="$(printf "${tc.input_format}" | ${SHELL} -c "${cut_cmd} \"\$@\"" dummy "${cut_args[@]}" 2>&1)" || err_exit "${testname}: Command returned exit code $?"
[[ "${out}" == ${output_format} ]] || err_exit "${testname}: Expected match for $(printf "%q\n" "${output_format}"), got $(printf "%q\n" "${out}")"
# cleanup and restore locale settings
done
return 0
}
function test2
{
typeset cutcmd=$1
typeset testname="${cutcmd}"
typeset out
# create files
# run test
out=$( LC_ALL=ja_JP.eucJP ${SHELL} -o pipefail -o errexit -c '$1 -d $(cat delim) -f1 "mb.eucjp" | od -tx1' dummy "${cutcmd}" 2>&1 ) || err_exit "${testname}: Test failed with exit code $?"
[[ "${out}" == $'0000000 a4 a2 0a\n0000003' ]] || err_exit "${testname}: Expected \$'0000000 a4 a2 0a\n0000003', got $(printf "%q\n" "${out}")"
# cleanup
return 0
}
test1 "${cmd}"
test2 "${cmd}"
done
cd "${ocwd}"
# tests done