words.pl revision a9a5ac69d1d35b1a8f4288f7de005b16589cc0c5
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich#!/usr/bin/perl -w
e9458b1a7a19a63aa4c179f9ab20f4d50681c168Jens Elkner
aa91bb134be8585a8553002e226179e5d9b6e2dbTill Mossakowskiuse strict;
ad0088483a1aa457afc0450f12f49701d1617ed2Christian Maederuse File::Basename;
98890889ffb2e8f6f722b00e265a211f13b5a861Corneliu-Claudiu Prodescu
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich# ##### old intention below
89ad40d3e38192dffaed9c10fad391a60fb5f273Klaus Luettich# read file "words.input" from current directory and produce
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich# "\wordline{<word>}\hline" lines. plus patterns written in lines
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich# starting with "%" . THe pattern should contain a variable calles
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich# "$word. It functions as input.
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder# words.input contains words seperated
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich# by space, newline or tabular creates or overwrites a file called
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich# "generated-words.tex"
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettich# the fonts file can be created by this bash line:
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettich# for f in `ls /usr/share/texmf/tex/latex/psnfss/*.sty` ; do f=`basename $f| sed 's/\.sty//'`;echo '\usepackage{'$f'} ::: '$f ; done > fonts.input
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettich
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettich########
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich# conf #
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich########
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettichmy $PDFLATEX_BIN = 'pdflatex';
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maedermy $PDFTOTEXT_BIN = 'pdftotext';
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettichmy $DO_PDFLATEX = 1; # 1 = do it
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettichmy $DO_PDFTOTEXT = 1; # 0 = don't do it
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich#$ENV{'TEXINPUTS'} =$ENV{'TEXINPUTS'}."::".dirname($0);
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettichmy $haskell_header =
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich'{- |
ae179fb240298858539e0ff2b2e515d39ac17efcChristian MaederModule : $Header$
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus LuettichCopyright : (c) Klaus L�ttich, Uni Bremen 2002-2004
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus LuettichLicense : similar to LGPL, see HetCATS/LICENSE.txt or LIZENZ.txt
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich
0eb106d6adeaced34e54123ff962ae91bb198709Christian MaederMaintainer : Christian.Maeder@dfki.de
575a55eadc8dcab8ee350324b417cbd9e52e69c0Christian MaederStability : provisional
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus LuettichPortability : portable
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder-}
792df0347edab377785d98c63e2be8e2ce0a8bdeChristian Maeder
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder{-
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder Created by a Perl-script (utils/words.pl)!
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder DO NOT MODIFY BY HAND!!
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich-}
7aeb33d3af3230641a33c39b7e7ba546bb35b969Christian Maeder
ae179fb240298858539e0ff2b2e515d39ac17efcChristian Maedermodule Common.LaTeX_maps where
ae179fb240298858539e0ff2b2e515d39ac17efcChristian Maeder
ae179fb240298858539e0ff2b2e515d39ac17efcChristian Maederimport Data.Map (fromList, Map)
ae179fb240298858539e0ff2b2e515d39ac17efcChristian Maeder
ae179fb240298858539e0ff2b2e515d39ac17efcChristian Maeder';
ae179fb240298858539e0ff2b2e515d39ac17efcChristian Maeder
792df0347edab377785d98c63e2be8e2ce0a8bdeChristian Maeder########
792df0347edab377785d98c63e2be8e2ce0a8bdeChristian Maeder# main #
792df0347edab377785d98c63e2be8e2ce0a8bdeChristian Maeder########
792df0347edab377785d98c63e2be8e2ce0a8bdeChristian Maeder
792df0347edab377785d98c63e2be8e2ce0a8bdeChristian Maedermy %used_words = &process_the_words; # the words, LaTeX makros,
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder # syllables (ligatures), letters widely
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder # used in (Het)CASL
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder# Split into sections that give the names of the Haskell maps.
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maedermy @fonts = &read_fonts; # adds an "empty font" for LaTeX-default
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maedermy %widths = (); # a table of width from various fonts
66f22d1887bb451915295fb454aa1baab88129fdChristian Maeder
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettichforeach my $font (@fonts) {
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich $widths{$font->[1]} = &process_one_font($font);
66f22d1887bb451915295fb454aa1baab88129fdChristian Maeder # debugging
66f22d1887bb451915295fb454aa1baab88129fdChristian Maeder 0 && do {
66f22d1887bb451915295fb454aa1baab88129fdChristian Maeder foreach my $sec (keys %{$widths{$font->[1]}}) {
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich print "$sec :", join(",", @{${$widths{$font->[1]}}{$sec}}),"\n";
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich }
b0739ea1290697a0a4e7b52f20801b1790f99aaaDominik Luecke };
a008ea3d3b5667969f058f75e9919f9b9c26260fChristian Maeder}
b0739ea1290697a0a4e7b52f20801b1790f99aaaDominik Luecke
b0739ea1290697a0a4e7b52f20801b1790f99aaaDominik Lueckemy %word_widths = &calc_max_width(\%widths,\%used_words);
b0739ea1290697a0a4e7b52f20801b1790f99aaaDominik Luecke # a table of sections to table of
b0739ea1290697a0a4e7b52f20801b1790f99aaaDominik Luecke # words per section to max width over all fonts
b0739ea1290697a0a4e7b52f20801b1790f99aaaDominik Luecke
b0739ea1290697a0a4e7b52f20801b1790f99aaaDominik Luecke# debugging
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder1 && do {
b0739ea1290697a0a4e7b52f20801b1790f99aaaDominik Luecke foreach my $sec (keys %word_widths) {
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich print "$sec :\n",
66f22d1887bb451915295fb454aa1baab88129fdChristian Maeder join(", ", (map {"$_: ".$word_widths{$sec}{$_}; }
66f22d1887bb451915295fb454aa1baab88129fdChristian Maeder (sort (keys %{$word_widths{$sec}})))),
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich "\n";
66f22d1887bb451915295fb454aa1baab88129fdChristian Maeder }
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich };
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich&generate_haskell_FM(\%word_widths); # uses %word_widths
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich########
66f22d1887bb451915295fb454aa1baab88129fdChristian Maeder# subs #
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich########
4d4ec273e5cb1f17985c6edcf90a295a8b612cefChristian Maeder
4d4ec273e5cb1f17985c6edcf90a295a8b612cefChristian Maedersub generate_haskell_FM {
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich my $word_widths = $_[0];
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich open HASKELL, "> LaTeX_maps.hs" or
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder die "cannot create Haskell module \"LaTeX_maps.hs\"";
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich print HASKELL $haskell_header;
66f22d1887bb451915295fb454aa1baab88129fdChristian Maeder
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich my @two_letter_words = ();
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich foreach my $sec (keys %{$word_widths}) {
66f22d1887bb451915295fb454aa1baab88129fdChristian Maeder # generate a list of pairs for each section named after the section
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder print HASKELL &fm_header($sec);
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich my @words = sort (keys %{$word_widths->{$sec}});
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder push @two_letter_words, (grep {length($_) == 2;} @words);
66f22d1887bb451915295fb454aa1baab88129fdChristian Maeder print HASKELL " [",
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich join(",", (map {"(\"".&escape_String($_)."\",".
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder int($word_widths->{$sec}{$_} * 0.351 * 1000).
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich ')';
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich }
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich (@words))),
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich "]\n";
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder print HASKELL &key_fm_header($sec);
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich my @long_words = sort (grep {length($_) > 2;} @words);
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich my $last_word = '';
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich my @first_letters =
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich grep {my $ret = $_ ne $last_word;$last_word = $_;$ret; }
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich (sort (map {m/^(.)/o;$1} @long_words));
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich print HASKELL " [",
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich join(",",map {my $c = &escape_String($_);
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder "('".$c."',[".
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettich join(",",map {"\"".
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich &escape_String($_).
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich "\""}
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich (grep {m/^$c/}
66f22d1887bb451915295fb454aa1baab88129fdChristian Maeder @long_words)).
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich "])"} @first_letters), "]\n";
792df0347edab377785d98c63e2be8e2ce0a8bdeChristian Maeder }
792df0347edab377785d98c63e2be8e2ce0a8bdeChristian Maeder my $last_word = '';
792df0347edab377785d98c63e2be8e2ce0a8bdeChristian Maeder print HASKELL "\nligatures :: Map String Bool\n",
792df0347edab377785d98c63e2be8e2ce0a8bdeChristian Maeder "ligatures = fromList [",
792df0347edab377785d98c63e2be8e2ce0a8bdeChristian Maeder join(",", map { "(\"".&escape_String($_)."\",True)";}
792df0347edab377785d98c63e2be8e2ce0a8bdeChristian Maeder grep {my $ret = $_ ne $last_word;$last_word = $_;$ret; }
792df0347edab377785d98c63e2be8e2ce0a8bdeChristian Maeder (sort @two_letter_words)),
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich "]\n";
792df0347edab377785d98c63e2be8e2ce0a8bdeChristian Maeder}
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich
b0739ea1290697a0a4e7b52f20801b1790f99aaaDominik Lueckesub escape_String {
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder return join("", map {
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich if (m/^\\$/o) {
66f22d1887bb451915295fb454aa1baab88129fdChristian Maeder "\\$_";
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder } elsif(m/^"$/o) { # "
66f22d1887bb451915295fb454aa1baab88129fdChristian Maeder "\\$_";
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettich } else {
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettich # substitute ������� with \196\214\220\223\228\246\252
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettich $_ =~ s/�/\\196/o; $_ =~ s/�/\\214/o; $_ =~ s/�/\\220/o;
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder $_ =~ s/�/\\223/o;
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettich $_ =~ s/�/\\228/o; $_ =~ s/�/\\246/o; $_ =~ s/�/\\252/o;
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettich $_;
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettich }
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettich } split(//o,$_[0]));
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettich}
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettich
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettichsub fm_header {
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettich my $map_name = $_[0]."_map";
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettich return "\n$map_name :: Map String Int\n$map_name = fromList";
eca4db63ed0bdbd93b62678feea6e3eb80aa47bbChristian Maeder}
eca4db63ed0bdbd93b62678feea6e3eb80aa47bbChristian Maeder
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettichsub key_fm_header {
792df0347edab377785d98c63e2be8e2ce0a8bdeChristian Maeder my $map_name = "key_".$_[0]."_map";
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder return "\n$map_name :: Map Char [String] \n$map_name = fromList";
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettich}
792df0347edab377785d98c63e2be8e2ce0a8bdeChristian Maeder
575a55eadc8dcab8ee350324b417cbd9e52e69c0Christian Maeder
792df0347edab377785d98c63e2be8e2ce0a8bdeChristian Maedersub calc_max_width {
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder my $font_widths = $_[0];
575a55eadc8dcab8ee350324b417cbd9e52e69c0Christian Maeder my $words = $_[1];
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder my @fonts = keys %{$font_widths};
575a55eadc8dcab8ee350324b417cbd9e52e69c0Christian Maeder my %max_widths = ();
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder foreach my $sec (keys %{$words}) {
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder my %word_max_width = ();
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder my $width_word_index = 0;
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder print "$sec: ";
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder foreach my $word (@{$words->{$sec}}) {
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder my $max = 0;
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder #print "$word: ";
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder foreach my $font (@fonts) {
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder my $cur = $font_widths->{$font}->{$sec}->[$width_word_index];
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder $cur = 0 unless defined $cur;
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder $max = &max($max,$cur);
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder print "$font: ".int($cur * 0.351 * 1000)." " if $word eq "~";
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder }
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder print "\n" if $word eq "~";
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder print STDERR
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder "\nWarning: max length of $word is undefined or zero\n"
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder if ! defined $max || $max == 0;
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettich $word_max_width{$word} = $max;
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettich $width_word_index++;
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder #exit if $width_word_index >= 5;
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder }
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder $max_widths{$sec} = \%word_max_width;
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder #exit;
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder }
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder return %max_widths;
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder}
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettichsub max {
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder return (($_[0] >= $_[1]) ? $_[0] : $_[1]);
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder}
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maedersub process_one_font {
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder # debugging: print "font: ".join(", ", @{$_[0]})."\n";
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder my ($font_cmnd,$font_name) = @{$_[0]};
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder # generate two documents one human readable and one for the machine
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettich my $computer_tex_filename =
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder # &gen_tex('width-table.tex.svmono.templ',
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder &gen_tex('width-table.tex.templ',
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder $font_name,'computer',$font_cmnd);
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettich my $human_tex_filename =
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder # &gen_tex('width-table.tex.svmono.templ',
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettich &gen_tex('width-table.tex.templ',
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettich $font_name,'human',$font_cmnd);
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettich if ($DO_PDFLATEX) {
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich &pdflatex($computer_tex_filename);
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich &pdflatex($human_tex_filename);
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder }
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder my $computer_pdf_filename =
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich basename($computer_tex_filename,'.tex').'.pdf';
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich return &get_widths($computer_pdf_filename); # seperated in sections
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich}
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettichsub pdflatex {
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich my $tex_filename = $_[0];
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich system($PDFLATEX_BIN,$tex_filename);
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich}
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettichsub get_widths {
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich my $pdf_filename = $_[0];
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich my $txt_filename = basename($pdf_filename,'.pdf').'.txt';
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich my %widths = ();
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich if ($DO_PDFTOTEXT) {
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich system($PDFTOTEXT_BIN,"-raw",$pdf_filename);
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich }
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich #open WIDTH, "pdftotext $pdf_filename | egrep 'section: |wl: ' |"
d9062d0570e060fc81cf0d1c06c33223eb99a578Dominik Luecke# or die "cannot call pdftotext or egrep or cannot fork";
96eff7c28634746abc329f73418b5a92c51b71eaDominik Luecke open WIDTH, "< $txt_filename"
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich or die "cannot read file \"$txt_filename\"";
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich my $section = '';
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich my @widths = ();
d9062d0570e060fc81cf0d1c06c33223eb99a578Dominik Luecke while (<WIDTH>) {
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich m/section: (\w+)\+\+\+/o && do {
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich unless ($section eq '') {
caba1382e17e7d9336ae3636195091325518388cChristian Maeder $widths{$section} = [@widths];
caba1382e17e7d9336ae3636195091325518388cChristian Maeder @widths = ();
caba1382e17e7d9336ae3636195091325518388cChristian Maeder }
caba1382e17e7d9336ae3636195091325518388cChristian Maeder $section = $1;
caba1382e17e7d9336ae3636195091325518388cChristian Maeder };
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder m/wl: (\d+\.\d+)pt/o && $section ne '' && do {
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich push @widths, $1;
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich };
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich }
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder close WIDTH;
d9062d0570e060fc81cf0d1c06c33223eb99a578Dominik Luecke $widths{$section} = [@widths];
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder # debugging
e7ddd5495421698701a2bbc57a5b3390a11d12caChristian Maeder 0 && do {
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder foreach my $sec (keys %widths) {
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder print "$sec :", join(",", @{$widths{$sec}}),"\n";
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder }
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder };
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder return \%widths;
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich}
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettichsub gen_tex {
6a50fa6b0d93a521d8e52c61a3ceb71d9f878cebChristian Maeder my ($input_filename,$font_name,$purpose,$font_cmnd) = @_;
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich my $output_filename =
6a50fa6b0d93a521d8e52c61a3ceb71d9f878cebChristian Maeder basename($input_filename,'.tex.templ').".$font_name.".
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich substr($purpose,0,1).".tex";
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich my $no_cols = '';
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder open TEMPL, "< $input_filename";
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder open OUT, "> $output_filename";
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder $purpose eq 'human' && do {$no_cols = '';};
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder $purpose eq 'computer' && do {$no_cols = '% ';};
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder while (<TEMPL>) {
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich s/<set-font>/$font_cmnd/;
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich s/<no-columns>/$no_cols/;
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder print OUT $_;
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder }
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder close TEMPL;
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder close OUT;
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder return $output_filename;
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder}
575a55eadc8dcab8ee350324b417cbd9e52e69c0Christian Maeder
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maedersub read_fonts {
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich open FONTS, "< fonts.input"
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich or die "cannot open file \"fonts.input\" for reading";
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder my @read_fonts = ();
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder while (<FONTS>) {
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich chomp;
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder my @fnt_descrp = split /\s+:::\s+/;
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich push @read_fonts, \@fnt_descrp;
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich }
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich close FONTS;
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder unshift @read_fonts, [('','default')];
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich return @read_fonts;
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich}
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettichmy $count_words = 0;
e5636f167d8113960d320407cbbd7cd3580241d4Christian Maedersub process_the_words {
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder my $pat = '%s';
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder my $line = '';
ec77db68a98cc1211be24b65707a0f20300e47e5Christian Maeder my @sec_words = ();
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich my %all_words = ();
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich my $section = '';
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder open WORDS, "< words.input" or die("no file named \"words.input\" found");
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder open GENWORDS, "> generated_words.tex"
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich or die "cannot write to file \"generated_words.tex\"!";
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich while ($line = <WORDS>) {
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich &sep_tabular,next if $line=~ m/^\s*$/o;
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich if ($line =~ m/^%/o) {
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich chomp $line;
8d806c1392e09b4ac2917a7b536e390fcf1d6265Dominik Luecke if ($line =~ m/^\%pattern:\s*/o) {
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich $line =~ s///o;
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder $pat = $line;
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich } elsif ($line =~ m/^%section:\s*/o) {
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich #print STDERR "$section : ",join(", ",@sec_words),"\n";
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich unless ($section eq '') {
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder $all_words{$section} = [@sec_words];
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich #print STDERR "pushed: $section : ",join(", ",@sec_words),"\n";
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich }
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder @sec_words = ();
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich $line =~ s///o;
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich $section = $line;
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich &sep_tabular("\\newpage\n\\section*{section: $section+++}\n");
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich } else {
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder print STDERR "unknown directive: $line\n";
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder }
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder } elsif ($line =~ m/^&/o) {
dade808234103ea180fa5a2457f2ee8ff76c8e1dChristian Maeder $line =~ s///o;
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich print GENWORDS $line;
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder } elsif ($line =~ m/^~(.*)$/o) {
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder &sep_tabular($1);
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich } else {
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich my @words = split(/\s+/o, $line);
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder push @sec_words, @words;
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder foreach my $word (@words) {
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder my $fpat= sprintf($pat,$word);
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder print GENWORDS "\\wordline{$fpat}\n\\hline\n";
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder if(++$count_words >= 37) {
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich &sep_tabular;
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder }
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich }
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich }
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich }
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder close WORDS;
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder close GENWORDS;
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich $all_words{$section} = [@sec_words];
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder return %all_words;
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich}
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maedersub sep_tabular {
342033b90ee6ffc0dd13f6cb9b1e95d649b4dd17Christian Maeder my $fill_in = defined $_[0] ? $_[0] : '';
342033b90ee6ffc0dd13f6cb9b1e95d649b4dd17Christian Maeder $count_words = 0;
342033b90ee6ffc0dd13f6cb9b1e95d649b4dd17Christian Maeder print GENWORDS "\\end{tabular}\n$fill_in\n\\begin{tabular}{l|l}\n\\hline\n";
342033b90ee6ffc0dd13f6cb9b1e95d649b4dd17Christian Maeder}
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder