sun_solaris_cr_6904878_join_-t_no_longer_works_with_multibyte_char_separator.sh revision 3e14f97f673e8a630f076077de35afdd43dc1587
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
#
#
# This test checks whether the AST "join" utility works with
# multibyte characters as seperator.
#
# This was reported as CR #6904878 ("join -t no longer works with multibyte char separator"):
# ------------ snip ------------
# join doesn't handle multibyte separator correctly.
#
# $ echo $LANG
# ja
# $ od -tx1 input1
# 0000000 66 31 a1 f7 66 32 0a
# 0000007
# $ od -tx1 input2
# 0000000 74 32 a1 f7 66 31 0a
# 0000007
# # 0xa1 0xf7 in the file is multibyte character.
# $ od -tx1 delim
# 0000000 a1 f7 0a
# 0000003
#
# $
#
# It should output "f1".
#
# files are attached.
# ------------ snip ------------
#
# test setup
function err_exit
{
print -u2 -n "\t"
}
alias err_exit='err_exit $LINENO'
set -o nounset
integer Errors=0
typeset ocwd
typeset tmpdir
typeset out
# create temporary test directory
tmpdir="$(mktemp -t -d "test_sun_solaris_cr_6904878_join_-t_no_longer_works_with_multibyte_char_separator.XXXXXXXX")" || err_exit "Cannot create temporary directory"
# run tests
function test1
{
typeset join_cmd="$1"
typeset testid
typeset out
typeset testname
compound saved_locale
# save locale information
[[ -v LC_ALL ]] && saved_locale.LC_ALL="${LC_ALL}"
[[ -v LC_CTYPE ]] && saved_locale.LC_CTYPE="${LC_CTYPE}"
[[ -v LANG ]] && saved_locale.LANG="${LANG}"
compound -r -a testcases=(
(
typeset name="ascii_simple"
typeset locale="C"
typeset input1_format="fish 81 91\n"
typeset input2_format="fish B A\n"
typeset output_format="fish 81 91 B A"
)
(
typeset name="ja_JP.eucJP_multibyte_delimiter"
typeset locale="ja_JP.eucJP"
typeset input1_format="\x66\x31\xa1\xf7\x66\x32\x0a"
typeset input2_format="\x74\x32\xa1\xf7\x66\x31\x0a"
typeset output_format="f1"
)
)
fi
else
fi
# build "join_args" array with multibyte characters in the current locale
typeset -a join_args
integer arg_index
done
#printf "args=|%q|\n" "${join_args[@]}"
out="$(${SHELL} -c "${join_cmd} \"\$@\"" dummy "${join_args[@]}" 2>&1)" || err_exit "${testname}: Command returned exit code $?"
[[ "${out}" == ${output_format} ]] || err_exit "${testname}: Expected match for $(printf "%q\n" "${output_format}"), got $(printf "%q\n" "${out}")"
rm "input1" "input2"
# cleanup and restore locale settings
done
return 0
}
function test2
{
typeset joincmd=$1
typeset testname="${joincmd}"
typeset out
# create files
# run test
out=$( LC_ALL=ja_JP.eucJP ${SHELL} -o pipefail -o errexit -c '$1 -j1 1 -j2 2 -o 1.1 -t $(cat delim) input1 input2' dummy "${joincmd}" 2>&1 ) || err_exit "${testname}: Test failed with exit code $?"
# cleanup
rm "input1" "input2" "delim"
return 0
}
test1 "${cmd}"
test2 "${cmd}"
done
cd "${ocwd}"
# tests done