check-markup revision 167be017296089bf57b904a7b060e6ad0354d201
# Try to detect markup errors in translations.
# Author: Peter Moulder <pmoulder@mail.csse.monash.edu.au>
# Copyright (C) 2004 Monash University
# License: GNU GPL v2 or (at your option) any later version.
# Initial egrep version:
#mydir=`dirname "$0"`
#egrep '<b>[^<>]*(>|<([^/]|/([^b"]|b[^>])))' "$mydir"/*.po
# Somewhat simplified by use of negative lookahead in perl.
# (The egrep version as written can't detect problems that span a line,
# e.g. unterminated `<b>'. One way of doing the s/"\n"//g thing would be with
# tr and sed, but that requires a sed that allows arbitrary line lengths, which
# many non-GNU seds don't.)
use strict;
my $attrsRE = qr/(?: +[^<>]*)?/;
my $span_attr = qr/(?:\ +(?:font_(?:desc|family)|face|size|style|weight|variant|stretch|(?:fore|back)ground|underline|rise|strikethrough|fallback|lang)\=\\\"[^\\\"]*\\\")/;
my $rc = 0;
sub po_error ($) {
my ($msg) = @_;
$name =~ s,.*/,,;
$rc = 1;
}
# Returns true iff successful.
sub check_str ($$) {
my ($certainly_pango_str, $str) = @_;
if ($str =~ /\AProject-Id-Version:.*POT-Creation-Date/
or $str =~ /\A<[^<>]*>\Z/) {
# Not a Pango string.
return 1;
}
my $is_xml = 0;
# Remove valid sequences.
while ($str =~ s{<([bisu]|big|su[bp]|small|tt|span)(${attrsRE})>[^<>]*</\1>}{}) {
$is_xml = 1;
if ($tag eq 'span') {
$attrs =~ s/${span_attr}*//g;
if ($attrs ne '') {
$attrs =~ s/\A *//;
return 0;
}
} else {
return 0;
}
}
}
return 0;
}
return 0;
}
return 0;
}
# Check for attributes etc. in non-<span> element.
return 0;
}
return 0;
}
return 0;
}
$str =~ s/<[ 01]//g;
$str =~ s/\A>+//;
$str =~ s/<+\Z//;
$str =~ s/\([<>][01]\)//g;
$str =~ s/ -> //g;
# Quoting.
$str =~ s/\[[<>]\]//g;
}
$str =~ s/\A[^<>]*//;
$str =~ s/[^<>]*\Z//;
return 0;
} else {
return 1;
}
}
if ($str ne '') {
po_error("parsing error for `$str'");
return 0;
}
return 1;
}
sub check_strs ($@) {
my $is_pango_str = shift(@_);
if ($#_ < 1) {
}
if ((($_[0] eq '""') && ($_[1] =~ /Project-Id-Version:.*POT-Creation-Date:/s))
or $is_pango_str == 0)
{
# Not a Pango string.
return 1;
}
foreach my $str (@_) {
$str eq '""' or check_str($is_pango_str - 1, $str) or return 0;
}
return 1;
}
$/ = '';
# Reference for the markup language:
# (though not all translation strings will be pango markup strings).
ENTRY: while(<>) {
if (m{\A${com}*\Z}) {
next ENTRY;
}
s/"\n"//g;
if (!m{\A${com}*msgid[^\n]*\n${com}*msgstr[^\n]*\n${com}*\Z} &&
!m{\A${com}*msgid[^\n]*\n${com}*msgid_plural[^\n]*\n${com}*(msgstr\[[^\n]*\n${com}*)+\Z}) {
po_error('Not in msg format');
next ENTRY;
}
if (!m{\A${com}*msgid ${str}\s*\n${com}*msgstr ${str}\s*\n${com}*\Z} &&
!m{\A${com}*msgid ${str}\s*\n${com}*msgid_plural ${str}\s*\n${com}*(msgstr\[\d+\] ${str}\s*\n${com}*)+\Z}) {
po_error('Mismatched quotes');
next ENTRY;
}
if (m{\n\#(?:,\ [-a-z0-9]+)*,\ fuzzy}) {
# Fuzzy entries aren't used, so ignore them.
# (This prevents warnings about mismatching <>/ pattern.)
next ENTRY;
}
# 0 for known not pango format, 2 for known pango format.
my $is_pango_format = 1;
if (m{\n\#\.\ .*\bxgettext:(no-)?pango-format\s}) {
$is_pango_format = ( defined($1) ? 0 : 2 );
}
if (m{\n\#(?:,\ [-a-z0-9+])*,\ (no-)?pango-format[,\n]}) {
$is_pango_format = ( defined($1) ? 0 : 2 );
}
if (m{\n\#:\ \.\./share/extensions/[-a-zA-Z0-9_]+\.inx(?:\.h)?:}) {
$is_pango_format = 0;
}
if (m{\A${com}*msgid\ (${str})\n
${com}*msgstr\ (${str})\n
${com}*\Z}x) {
check_strs($is_pango_format, $1, $2) or next ENTRY;
}
elsif (m{\A${com}*msgid\ (${str})\n
${com}*msgid_plural\ (${str})\n
((?:${com}*msgstr\[\d+\]\ ${str}\n${com}*)+)\Z}x) {
while ($rest =~ s/\A${com}*msgstr\[\d+\]\ (${str})\n${com}*//) {
push @strs, ($1);
}
}
next ENTRY;
} else {
}
}
# Some makefiles (currently the top-level Makefile.am) expect this script to
# exit 1 if any problems found.
exit $rc;
# vi: set autoindent shiftwidth=4 tabstop=8 encoding=utf-8 softtabstop=4 filetype=perl :