words.pl revision 34bff097c14521b5e57ce37279a34256e1f78aa5
3141ac1ae0a0b9f86af05f439bc79316451b94f3Carsten Fischer#!/usr/bin/perl -w
e9458b1a7a19a63aa4c179f9ab20f4d50681c168Jens Elkner
1caad5414a81bd3593e10ca6d089ebabac2d9ad8Christian Maederuse strict;
d5fe06af711a6912ae028ebf873eada4ee8733f8Christian Maederuse File::Basename;
98890889ffb2e8f6f722b00e265a211f13b5a861Corneliu-Claudiu Prodescu
3141ac1ae0a0b9f86af05f439bc79316451b94f3Carsten Fischer# ##### old intention below
3f69b6948966979163bdfe8331c38833d5d90ecdChristian Maeder# read file "words.input" from current directory and produce
acc7fe823b23e829cfbde7464f92557822a2a7d9Christian Maeder# "\wordline{<word>}\hline" lines. plus patterns written in lines
acc7fe823b23e829cfbde7464f92557822a2a7d9Christian Maeder# starting with "%" . THe pattern should contain a variable calles
3141ac1ae0a0b9f86af05f439bc79316451b94f3Carsten Fischer# "$word. It functions as input.
1caad5414a81bd3593e10ca6d089ebabac2d9ad8Christian Maeder# words.input contains words seperated
1caad5414a81bd3593e10ca6d089ebabac2d9ad8Christian Maeder# by space, newline or tabular creates or overwrites a file called
3141ac1ae0a0b9f86af05f439bc79316451b94f3Carsten Fischer# "generated-words.tex"
3141ac1ae0a0b9f86af05f439bc79316451b94f3Carsten Fischer
f323228f78695312bd7e881484e18df94c1e8f0eChristian Maeder# the fonts file can be created by this bash line:
f323228f78695312bd7e881484e18df94c1e8f0eChristian Maeder# for f in `ls /usr/share/texmf/tex/latex/psnfss/*.sty` ; do f=`basename $f| sed 's/\.sty//'`;echo '\usepackage{'$f'} ::: '$f ; done > fonts.input
f323228f78695312bd7e881484e18df94c1e8f0eChristian Maeder
f323228f78695312bd7e881484e18df94c1e8f0eChristian Maeder########
f323228f78695312bd7e881484e18df94c1e8f0eChristian Maeder# conf #
3141ac1ae0a0b9f86af05f439bc79316451b94f3Carsten Fischer########
f323228f78695312bd7e881484e18df94c1e8f0eChristian Maeder
3141ac1ae0a0b9f86af05f439bc79316451b94f3Carsten Fischermy $PDFLATEX_BIN = 'pdflatex';
acc7fe823b23e829cfbde7464f92557822a2a7d9Christian Maedermy $PDFTOTEXT_BIN = 'pdftotext';
3141ac1ae0a0b9f86af05f439bc79316451b94f3Carsten Fischer
bccea164bdfc2ddc3d1e20749bb5477a46eab3a6Christian Maedermy $DO_PDFLATEX = 1; # 1 = do it
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maedermy $DO_PDFTOTEXT = 1; # 0 = don't do it
10b02b2343246df6773585636fe3ddbefa3b6a1bChristian Maeder
bccea164bdfc2ddc3d1e20749bb5477a46eab3a6Christian Maeder#$ENV{'TEXINPUTS'} =$ENV{'TEXINPUTS'}."::".dirname($0);
f323228f78695312bd7e881484e18df94c1e8f0eChristian Maeder
3141ac1ae0a0b9f86af05f439bc79316451b94f3Carsten Fischermy $haskell_header =
bccea164bdfc2ddc3d1e20749bb5477a46eab3a6Christian Maeder'{- |
ad270004874ce1d0697fb30d7309f180553bb315Christian MaederModule : $Header$
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian MaederCopyright : (c) Klaus L�ttich, Uni Bremen 2002-2004
bccea164bdfc2ddc3d1e20749bb5477a46eab3a6Christian MaederLicense : similar to LGPL, see HetCATS/LICENSE.txt or LIZENZ.txt
bccea164bdfc2ddc3d1e20749bb5477a46eab3a6Christian Maeder
bccea164bdfc2ddc3d1e20749bb5477a46eab3a6Christian MaederMaintainer : Christian.Maeder@dfki.de
3141ac1ae0a0b9f86af05f439bc79316451b94f3Carsten FischerStability : provisional
f323228f78695312bd7e881484e18df94c1e8f0eChristian MaederPortability : portable
f323228f78695312bd7e881484e18df94c1e8f0eChristian Maeder
f323228f78695312bd7e881484e18df94c1e8f0eChristian Maeder-}
f323228f78695312bd7e881484e18df94c1e8f0eChristian Maeder{-
f323228f78695312bd7e881484e18df94c1e8f0eChristian Maeder HetCATS/Common/LaTeX_maps
f323228f78695312bd7e881484e18df94c1e8f0eChristian Maeder $Id$
d5a644d593fc653a5476f9b3efffe1d34693f1e4Christian Maeder Author: Klaus L�ttich
3141ac1ae0a0b9f86af05f439bc79316451b94f3Carsten Fischer Year: 2002
8f197d81eada0a49fd7e1afdc5ef401d24bab104Christian Maeder
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder Created by a Perl-script (HetCATS/utils/words.pl)!
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder DO NOT MODIFY BY HAND!!
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder
3d3889e0cefcdce9b3f43c53aaa201943ac2e895Jonathan von Schroeder-}
29ea000c39f0fa524f04f850816cebd37f8b0208Christian Maeder
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maedermodule Common.LaTeX_maps where
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder
f323228f78695312bd7e881484e18df94c1e8f0eChristian Maederimport Common.Lib.Map(fromList,Map)
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder';
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder########
ea39a854498febb718cbdd6035fb935fd145daacEugen Kuksa# main #
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder########
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maedermy %used_words = &process_the_words; # the words, LaTeX makros,
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder # syllables (ligatures), letters widely
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder # used in (Het)CASL
ea39a854498febb718cbdd6035fb935fd145daacEugen Kuksa# Split into sections that give the names of the Haskell maps.
ea39a854498febb718cbdd6035fb935fd145daacEugen Kuksa
acc7fe823b23e829cfbde7464f92557822a2a7d9Christian Maedermy @fonts = &read_fonts; # adds an "empty font" for LaTeX-default
acc7fe823b23e829cfbde7464f92557822a2a7d9Christian Maedermy %widths = (); # a table of width from various fonts
3141ac1ae0a0b9f86af05f439bc79316451b94f3Carsten Fischer
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maederforeach my $font (@fonts) {
bccea164bdfc2ddc3d1e20749bb5477a46eab3a6Christian Maeder $widths{$font->[1]} = &process_one_font($font);
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder # debugging
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder 0 && do {
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder foreach my $sec (keys %{$widths{$font->[1]}}) {
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder print "$sec :", join(",", @{${$widths{$font->[1]}}{$sec}}),"\n";
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder }
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder };
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder}
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maedermy %word_widths = &calc_max_width(\%widths,\%used_words);
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder # a table of sections to table of
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder # words per section to max width over all fonts
e0f1794e365dd347e97b37d7d22b2fce27296fa1Christian Maeder
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder# debugging
e0f1794e365dd347e97b37d7d22b2fce27296fa1Christian Maeder1 && do {
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder foreach my $sec (keys %word_widths) {
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder print "$sec :\n",
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder join(", ", (map {"$_: ".$word_widths{$sec}{$_}; }
3141ac1ae0a0b9f86af05f439bc79316451b94f3Carsten Fischer (sort (keys %{$word_widths{$sec}})))),
d5a644d593fc653a5476f9b3efffe1d34693f1e4Christian Maeder "\n";
f323228f78695312bd7e881484e18df94c1e8f0eChristian Maeder }
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder };
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder&generate_haskell_FM(\%word_widths); # uses %word_widths
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder########
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder# subs #
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder########
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maedersub generate_haskell_FM {
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder my $word_widths = $_[0];
3141ac1ae0a0b9f86af05f439bc79316451b94f3Carsten Fischer
d5a644d593fc653a5476f9b3efffe1d34693f1e4Christian Maeder
3141ac1ae0a0b9f86af05f439bc79316451b94f3Carsten Fischer open HASKELL, "> LaTeX_maps.hs" or
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder die "cannot create Haskell module \"LaTeX_maps.hs\"";
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder print HASKELL $haskell_header;
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder my @two_letter_words = ();
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder foreach my $sec (keys %{$word_widths}) {
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder # generate a list of pairs for each section named after the section
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder print HASKELL &fm_header($sec);
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder my @words = sort (keys %{$word_widths->{$sec}});
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder push @two_letter_words, (grep {length($_) == 2;} @words);
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder print HASKELL " [",
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder join(",", (map {"(\"".&escape_String($_)."\",".
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder int($word_widths->{$sec}{$_} * 0.351 * 1000).
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder ')';
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder }
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder (@words))),
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder "]\n";
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder print HASKELL &key_fm_header($sec);
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder my @long_words = sort (grep {length($_) > 2;} @words);
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder my $last_word = '';
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder my @first_letters =
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder grep {my $ret = $_ ne $last_word;$last_word = $_;$ret; }
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder (sort (map {m/^(.)/o;$1} @long_words));
3141ac1ae0a0b9f86af05f439bc79316451b94f3Carsten Fischer print HASKELL " [",
f323228f78695312bd7e881484e18df94c1e8f0eChristian Maeder join(",",map {my $c = &escape_String($_);
f323228f78695312bd7e881484e18df94c1e8f0eChristian Maeder "('".$c."',[".
3141ac1ae0a0b9f86af05f439bc79316451b94f3Carsten Fischer join(",",map {"\"".
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder &escape_String($_).
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder "\""}
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder (grep {m/^$c/}
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder @long_words)).
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder "])"} @first_letters), "]\n";
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder }
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder my $last_word = '';
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder print HASKELL "\nligatures :: Map String Bool\n",
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder "ligatures = fromList [",
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder join(",", map { "(\"".&escape_String($_)."\",True)";}
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder grep {my $ret = $_ ne $last_word;$last_word = $_;$ret; }
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder (sort @two_letter_words)),
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder "]\n";
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder}
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maedersub escape_String {
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder return join("", map {
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder if (m/^\\$/o) {
f323228f78695312bd7e881484e18df94c1e8f0eChristian Maeder "\\$_";
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder } elsif(m/^"$/o) { # "
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder "\\$_";
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder } else {
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder # substitute ������� with \196\214\220\223\228\246\252
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder $_ =~ s/�/\\196/o; $_ =~ s/�/\\214/o; $_ =~ s/�/\\220/o;
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder $_ =~ s/�/\\223/o;
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder $_ =~ s/�/\\228/o; $_ =~ s/�/\\246/o; $_ =~ s/�/\\252/o;
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder $_;
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder }
3141ac1ae0a0b9f86af05f439bc79316451b94f3Carsten Fischer } split(//o,$_[0]));
3d3889e0cefcdce9b3f43c53aaa201943ac2e895Jonathan von Schroeder}
ea39a854498febb718cbdd6035fb935fd145daacEugen Kuksa
ea39a854498febb718cbdd6035fb935fd145daacEugen Kuksasub fm_header {
ea39a854498febb718cbdd6035fb935fd145daacEugen Kuksa my $map_name = $_[0]."_map";
ea39a854498febb718cbdd6035fb935fd145daacEugen Kuksa return "\n$map_name :: Map String Int\n$map_name = fromList";
3d3889e0cefcdce9b3f43c53aaa201943ac2e895Jonathan von Schroeder}
ea39a854498febb718cbdd6035fb935fd145daacEugen Kuksa
ea39a854498febb718cbdd6035fb935fd145daacEugen Kuksasub key_fm_header {
f323228f78695312bd7e881484e18df94c1e8f0eChristian Maeder my $map_name = "key_".$_[0]."_map";
f323228f78695312bd7e881484e18df94c1e8f0eChristian Maeder return "\n$map_name :: Map Char [String] \n$map_name = fromList";
3141ac1ae0a0b9f86af05f439bc79316451b94f3Carsten Fischer}
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maedersub calc_max_width {
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder my $font_widths = $_[0];
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder my $words = $_[1];
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder my @fonts = keys %{$font_widths};
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder my %max_widths = ();
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder foreach my $sec (keys %{$words}) {
f323228f78695312bd7e881484e18df94c1e8f0eChristian Maeder my %word_max_width = ();
3141ac1ae0a0b9f86af05f439bc79316451b94f3Carsten Fischer my $width_word_index = 0;
d5a644d593fc653a5476f9b3efffe1d34693f1e4Christian Maeder print "$sec: ";
3141ac1ae0a0b9f86af05f439bc79316451b94f3Carsten Fischer foreach my $word (@{$words->{$sec}}) {
3141ac1ae0a0b9f86af05f439bc79316451b94f3Carsten Fischer my $max = 0;
3141ac1ae0a0b9f86af05f439bc79316451b94f3Carsten Fischer #print "$word: ";
d5a644d593fc653a5476f9b3efffe1d34693f1e4Christian Maeder foreach my $font (@fonts) {
f323228f78695312bd7e881484e18df94c1e8f0eChristian Maeder my $cur = $font_widths->{$font}->{$sec}->[$width_word_index];
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder $cur = 0 unless defined $cur;
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder $max = &max($max,$cur);
01ad2c5a32e6cf168ac76d1b4b737ec4372db9f9Christian Maeder print "$font: ".int($cur * 0.351 * 1000)." " if $word eq "~";
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder }
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder print "\n" if $word eq "~";
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder print STDERR
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder "\nWarning: max length of $word is undefined or zero\n"
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder if ! defined $max || $max == 0;
3141ac1ae0a0b9f86af05f439bc79316451b94f3Carsten Fischer $word_max_width{$word} = $max;
d5a644d593fc653a5476f9b3efffe1d34693f1e4Christian Maeder $width_word_index++;
f323228f78695312bd7e881484e18df94c1e8f0eChristian Maeder #exit if $width_word_index >= 5;
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder }
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder $max_widths{$sec} = \%word_max_width;
01ad2c5a32e6cf168ac76d1b4b737ec4372db9f9Christian Maeder #exit;
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder }
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder return %max_widths;
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder}
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maedersub max {
3141ac1ae0a0b9f86af05f439bc79316451b94f3Carsten Fischer return (($_[0] >= $_[1]) ? $_[0] : $_[1]);
d5a644d593fc653a5476f9b3efffe1d34693f1e4Christian Maeder}
3141ac1ae0a0b9f86af05f439bc79316451b94f3Carsten Fischer
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maedersub process_one_font {
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder # debugging: print "font: ".join(", ", @{$_[0]})."\n";
01ad2c5a32e6cf168ac76d1b4b737ec4372db9f9Christian Maeder my ($font_cmnd,$font_name) = @{$_[0]};
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder # generate two documents one human readable and one for the machine
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder my $computer_tex_filename =
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder # &gen_tex('width-table.tex.svmono.templ',
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder &gen_tex('width-table.tex.templ',
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder $font_name,'computer',$font_cmnd);
3141ac1ae0a0b9f86af05f439bc79316451b94f3Carsten Fischer my $human_tex_filename =
d5a644d593fc653a5476f9b3efffe1d34693f1e4Christian Maeder # &gen_tex('width-table.tex.svmono.templ',
e0f1794e365dd347e97b37d7d22b2fce27296fa1Christian Maeder &gen_tex('width-table.tex.templ',
e0f1794e365dd347e97b37d7d22b2fce27296fa1Christian Maeder $font_name,'human',$font_cmnd);
acc7fe823b23e829cfbde7464f92557822a2a7d9Christian Maeder if ($DO_PDFLATEX) {
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder &pdflatex($computer_tex_filename);
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder &pdflatex($human_tex_filename);
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder }
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder my $computer_pdf_filename =
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder basename($computer_tex_filename,'.tex').'.pdf';
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder return &get_widths($computer_pdf_filename); # seperated in sections
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder}
01ad2c5a32e6cf168ac76d1b4b737ec4372db9f9Christian Maeder
f323228f78695312bd7e881484e18df94c1e8f0eChristian Maedersub pdflatex {
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder my $tex_filename = $_[0];
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder system($PDFLATEX_BIN,$tex_filename);
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder}
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder
3141ac1ae0a0b9f86af05f439bc79316451b94f3Carsten Fischersub get_widths {
34c2f7e11bdd76a2ecd037697d25faf24b94f719Klaus Luettich my $pdf_filename = $_[0];
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder my $txt_filename = basename($pdf_filename,'.pdf').'.txt';
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder my %widths = ();
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder if ($DO_PDFTOTEXT) {
acc7fe823b23e829cfbde7464f92557822a2a7d9Christian Maeder system($PDFTOTEXT_BIN,"-raw",$pdf_filename);
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder }
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder #open WIDTH, "pdftotext $pdf_filename | egrep 'section: |wl: ' |"
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder# or die "cannot call pdftotext or egrep or cannot fork";
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder open WIDTH, "< $txt_filename"
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder or die "cannot read file \"$txt_filename\"";
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder my $section = '';
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder my @widths = ();
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder while (<WIDTH>) {
6cc0d8b77759c557e7d9459cd2734625a4db78b9Christian Maeder m/section: (\w+)\+\+\+/o && do {
unless ($section eq '') {
$widths{$section} = [@widths];
@widths = ();
}
$section = $1;
};
m/wl: (\d+\.\d+)pt/o && $section ne '' && do {
push @widths, $1;
};
}
close WIDTH;
$widths{$section} = [@widths];
# debugging
0 && do {
foreach my $sec (keys %widths) {
print "$sec :", join(",", @{$widths{$sec}}),"\n";
}
};
return \%widths;
}
sub gen_tex {
my ($input_filename,$font_name,$purpose,$font_cmnd) = @_;
my $output_filename =
basename($input_filename,'.tex.templ').".$font_name.".
substr($purpose,0,1).".tex";
my $no_cols = '';
open TEMPL, "< $input_filename";
open OUT, "> $output_filename";
$purpose eq 'human' && do {$no_cols = '';};
$purpose eq 'computer' && do {$no_cols = '% ';};
while (<TEMPL>) {
s/<set-font>/$font_cmnd/;
s/<no-columns>/$no_cols/;
print OUT $_;
}
close TEMPL;
close OUT;
return $output_filename;
}
sub read_fonts {
open FONTS, "< fonts.input"
or die "cannot open file \"fonts.input\" for reading";
my @read_fonts = ();
while (<FONTS>) {
chomp;
my @fnt_descrp = split /\s+:::\s+/;
push @read_fonts, \@fnt_descrp;
}
close FONTS;
unshift @read_fonts, [('','default')];
return @read_fonts;
}
my $count_words = 0;
sub process_the_words {
my $pat = '%s';
my $line = '';
my @sec_words = ();
my %all_words = ();
my $section = '';
open WORDS, "< words.input" or die("no file named \"words.input\" found");
open GENWORDS, "> generated_words.tex"
or die "cannot write to file \"generated_words.tex\"!";
while ($line = <WORDS>) {
&sep_tabular,next if $line=~ m/^\s*$/o;
if ($line =~ m/^%/o) {
chomp $line;
if ($line =~ m/^\%pattern:\s*/o) {
$line =~ s///o;
$pat = $line;
} elsif ($line =~ m/^%section:\s*/o) {
#print STDERR "$section : ",join(", ",@sec_words),"\n";
unless ($section eq '') {
$all_words{$section} = [@sec_words];
#print STDERR "pushed: $section : ",join(", ",@sec_words),"\n";
}
@sec_words = ();
$line =~ s///o;
$section = $line;
&sep_tabular("\\newpage\n\\section*{section: $section+++}\n");
} else {
print STDERR "unknown directive: $line\n";
}
} elsif ($line =~ m/^&/o) {
$line =~ s///o;
print GENWORDS $line;
} elsif ($line =~ m/^~(.*)$/o) {
&sep_tabular($1);
} else {
my @words = split(/\s+/o, $line);
push @sec_words, @words;
foreach my $word (@words) {
my $fpat= sprintf($pat,$word);
print GENWORDS "\\wordline{$fpat}\n\\hline\n";
if(++$count_words >= 37) {
&sep_tabular;
}
}
}
}
close WORDS;
close GENWORDS;
$all_words{$section} = [@sec_words];
return %all_words;
}
sub sep_tabular {
my $fill_in = defined $_[0] ? $_[0] : '';
$count_words = 0;
print GENWORDS "\\end{tabular}\n$fill_in\n\\begin{tabular}{l|l}\n\\hline\n";
}