9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen#!/bin/bash
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen# CDDL HEADER START
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen#
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen# The contents of this file are subject to the terms of the
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen# Common Development and Distribution License (the "License").
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen# You may not use this file except in compliance with the License.
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen#
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen# See LICENSE.txt included in this distribution for the specific
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen# language governing permissions and limitations under the License.
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen#
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen# When distributing Covered Code, include this CDDL HEADER in each
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen# If applicable, add the following below this CDDL HEADER, with the
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen# fields enclosed by brackets "[]" replaced with your own identifying
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen# information: Portions Copyright [yyyy] [name of copyright owner]
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen#
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen# CDDL HEADER END
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen#
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen# Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen#
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen#
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen# A script that scans through the generated xrefs in an OpenGrok data
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen# directory and checks if they contain valid XHTML 1.0 data.
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen#
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen# The script should be run with one argument, which should be the path
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen# to the OpenGrok data directory (the parent of the xref directory).
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen# If no problems are found, the script will be silent. Otherwise, it
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen# will print which files are ill-formed and what the problems are.
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen#
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen# The script has only been tested on Debian systems with the
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen# libxml2-utils and w3c-sgml-lib packages installed.
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen#
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlenif [ $# -ne 1 ] || ! [ -d "$1"/xref ] ; then
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen echo "Usage: `basename $0` opengrok-data-dir"
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen exit 1
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlenfi
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlendtd=/usr/share/xml/w3c-sgml-lib/schema/dtd/REC-xhtml1-20020801/xhtml1-strict.dtd
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlenif ! [ -f "$dtd" ]; then
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen echo "Cannot find $dtd."
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen echo "Please install w3c-sgml-lib."
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen exit 1
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlenfi
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlenif ! [ -x "`which xmllint`" ]; then
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen echo "Cannot find the xmllint executable."
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen echo "Please install libxml2-utils."
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen exit 1
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlenfi
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlenxml_begin() {
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen cat <<EOF
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen<?xml version="1.0" encoding="UTF-8"?>
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen<html xmlns="http://www.w3.org/1999/xhtml">
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen<head><title></title></head>
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen<body>
9814a04211427efb3435bfd700f5df9ba51f7e4eKnut Anders Hatlen<div><pre>
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders HatlenEOF
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen}
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlenxml_end() {
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen cat <<EOF
9814a04211427efb3435bfd700f5df9ba51f7e4eKnut Anders Hatlen</pre></div>
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen</body>
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen</html>
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders HatlenEOF
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen}
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlentmp="`mktemp`"
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlenshopt -s globstar
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlenfor i in "$1"/xref/**/*.gz
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlendo
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen if ! (xml_begin; zcat "$i"; xml_end) \
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen | xmllint --noout --valid --dtdvalid "$dtd" - > "$tmp" 2>&1
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen then
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen echo "*** ILL-FORMED DATA IN $i ***"
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen echo
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen cat "$tmp"
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen echo
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen fi
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlendone
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlen
9b20bb3b44dace98289b72030d7a3c3ac7090c7dKnut Anders Hatlenrm -f "$tmp"