#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
#
function fatal_error
{
print -u 2 "${progname}: $*"
exit 1
}
function attrstrtoattrarray
{
#set -o xtrace
typeset s="$1"
nameref aa=$2 # attribute array
integer aa_count=0
integer aa_count=0
typeset nextattr
integer currattrlen=0
typeset tagstr
typeset tagval
while (( ${#s} > 0 )) ; do
# skip whitespaces
done
s="${s:currattrlen:${#s}}"
# anything left ?
(( ${#s} == 0 )) && break
# Pattern tests:
#x="foo=bar huz=123" ; print "${x##~(E)[[:alnum:]_-:]*=[^[:blank:]\"]*}"
#x='foo="ba=r o" huz=123' ; print "${x##~(E)[[:alnum:]_-:]*=\"[^\"]*\"}"
#x="foo='ba=r o' huz=123" ; print "${x##~(E)[[:alnum:]_-:]*=\'[^\"]*\'}"
#x="foox huz=123" ; print "${x##~(E)[[:alnum:]_-:]*}"
# All pattern combined via eregex (w|x|y|z):
#x='foo="bar=o" huz=123' ; print "${x##~(E)([[:alnum:]_-:]*=[^[:blank:]\"]*|[[:alnum:]_-:]*=\"[^\"]*\"|[[:alnum:]_-:]*=\'[^\"]*\')}"
nextattr="${s##~(E)([[:alnum:]_-:]*=[^[:blank:]\"]*|[[:alnum:]_-:]*=\"[^\"]*\"|[[:alnum:]_-:]*=\'[^\"]*\'|[[:alnum:]_-:]*)}"
# add entry
tagstr="${s:0:currattrlen}"
if [[ "${tagstr}" == *=* ]] ; then
# normal case: attribute with value
tagval="${tagstr#*=}"
# strip quotes ('' or "")
tagval="${tagval:1:${#tagval}-2}"
fi
else
# special case for HTML where you have something like <foo baz>
fi
done
}
function handle_document
{
#set -o xtrace
nameref callbacks=${1}
typeset tag_type="${2}"
typeset tag_value="${3}"
typeset tag_attributes="${4}"
nameref nodepath="${stack.items[stack.pos]}"
nameref nodesnum="${stack.items[stack.pos]}num"
typeset tagtype="element"
typeset tagname="${tag_value}"
compound -A tagattributes
compound -A nodes
integer nodesnum=0
)
# fill attributes
if [[ "${tag_attributes}" != "" ]] ; then
attrstrtoattrarray "${tag_attributes}" "nodepath[${nodesnum}].tagattributes"
fi
stack.items[stack.pos]="${stack.items[stack.pos-1]}[${nodesnum}].nodes"
;;
;;
typeset tagtype="text"
typeset tagvalue="${tag_value}"
)
;;
typeset tagtype="comment"
typeset tagvalue="${tag_value}"
)
;;
;;
;;
esac
# print "xmltok: '${tag_type}' = '${tag_value}'"
}
function xml_tok
{
typeset buf=""
typeset namebuf=""
typeset attrbuf=""
typeset c=""
typeset issingletag # bool: true/false (used for tags like "<br />")
nameref callbacks=${1}
[[ ! -z "${callbacks["document_start"]}" ]] && ${callbacks["document_start"]} "${1}" "document_start"
isendtag=false
if [[ "$c" == "<" ]] ; then
# flush any text content
buf=""
fi
if [[ "$c" == "/" ]] ; then
isendtag=true
else
buf="$c"
fi
buf+="$c"
# handle comments
# did we read the comment completely ?
buf+=">"
buf+="$c"
done
fi
[[ ! -z "${callbacks["tag_comment"]}" ]] && ${callbacks["tag_comment"]} "${1}" "tag_comment" "${buf:3:${#buf}-5}"
buf=""
continue
fi
# check if the tag starts and ends at the same time (like "<br />")
issingletag=true
buf="${buf%*/}"
else
issingletag=false
fi
# check if the tag has attributes (e.g. space after name)
namebuf="${buf%%~(E)[[:space:][:blank:]].*}"
attrbuf="${buf#~(E).*[[:space:][:blank:]]}"
else
attrbuf=""
fi
if ${isendtag} ; then
else
[[ ! -z "${callbacks["tag_begin"]}" ]] && ${callbacks["tag_begin"]} "${1}" "tag_begin" "$namebuf" "$attrbuf"
# handle tags like <br/> (which are start- and end-tag in one piece)
if ${issingletag} ; then
fi
fi
buf=""
else
buf+="$c"
fi
done
[[ ! -z "${callbacks["document_end"]}" ]] && ${callbacks["document_end"]} "${1}" "document_end" "exit_success"
print # final newline to make filters like "sed" happy
}
function print_sample1_xml
{
cat <<EOF
<br />
<score-partwise instrument="flute1">
<identification>
<kaiman>nocrocodile</kaiman>
</identification>
<!-- a comment -->
<partlist>
<foo>myfootext</foo>
<bar>mybartext</bar>
<snap />
<!-- another
comment -->
<ttt>myttttext</ttt>
</partlist>
</score-partwise>
EOF
}
function usage
{
OPTIND=0
exit 2
}
# program start
builtin basename
builtin cat
builtin date
builtin uname
typeset -r xmldocumenttree1_usage=$'+
[-?\n@(#)\$Id: xmldocumenttree1 (Roland Mainz) 2009-05-09 \$\n]
[-author?Roland Mainz <roland.mainz@nrubsig.org>]
[+NAME?xmldocumenttree1 - XML tree demo]
[+DESCRIPTION?\bxmldocumenttree\b is a small ksh93 compound variable demo
which reads a XML input file, converts it into an internal
variable tree representation and outputs it in the format
specified by viewmode (either "list", "namelist", "tree" or "compacttree").]
file viewmode
[+SEE ALSO?\bksh93\b(1)]
'
# printmsg "## OPT=|${OPT}|, OPTARG=|${OPTARG}|"
*) usage ;;
esac
done
typeset xmlfile="$1"
typeset viewmode="$2"
if [[ "${xmlfile}" == "" ]] ; then
fatal_error $"No file given."
fi
fatal_error $"Invalid view mode \"${viewmode}\"."
fi
compound xdoc
compound -A xdoc.nodes
integer xdoc.nodesnum=0
compound stack
integer stack.pos=0
# setup callbacks for xml_tok
typeset -A document_cb # callbacks for xml_tok
# argument for "handle_document"
if [[ "${xmlfile}" == "#sample1" ]] ; then
elif [[ "${xmlfile}" == "#sample2" ]] ; then
else
fi
print -u2 "#parsing completed."
list)
;;
typeset + | egrep "xdoc.*(tagname|tagtype|tagval|tagattributes)"
;;
tree)
print -v xdoc
;;
print -C xdoc
;;
*)
fatal_error $"Invalid view mode \"${viewmode}\"."
;;
esac
print -u2 "#done."
exit 0
# EOF.