words.pl revision a9a5ac69d1d35b1a8f4288f7de005b16589cc0c5
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich# ##### old intention below
89ad40d3e38192dffaed9c10fad391a60fb5f273Klaus Luettich# read file "words.input" from current directory and produce
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich# "\wordline{<word>}\hline" lines. plus patterns written in lines
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich# starting with "%" . THe pattern should contain a variable calles
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich# "$word. It functions as input.
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder# words.input contains words seperated
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich# by space, newline or tabular creates or overwrites a file called
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich# "generated-words.tex"
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettich# the fonts file can be created by this bash line:
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettich# for f in `ls /usr/share/texmf/tex/latex/psnfss/*.sty` ; do f=`basename $f| sed 's/\.sty//'`;echo '\usepackage{'$f'} ::: '$f ; done > fonts.input
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich#$ENV{'TEXINPUTS'} =$ENV{'TEXINPUTS'}."::".dirname($0);
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus LuettichCopyright : (c) Klaus L�ttich, Uni Bremen 2002-2004
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus LuettichLicense : similar to LGPL, see HetCATS/LICENSE.txt or LIZENZ.txt
792df0347edab377785d98c63e2be8e2ce0a8bdeChristian Maedermy %used_words = &process_the_words; # the words, LaTeX makros,
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder # syllables (ligatures), letters widely
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder # used in (Het)CASL
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder# Split into sections that give the names of the Haskell maps.
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maedermy @fonts = &read_fonts; # adds an "empty font" for LaTeX-default
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maedermy %widths = (); # a table of width from various fonts
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich $widths{$font->[1]} = &process_one_font($font);
66f22d1887bb451915295fb454aa1baab88129fdChristian Maeder foreach my $sec (keys %{$widths{$font->[1]}}) {
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich print "$sec :", join(",", @{${$widths{$font->[1]}}{$sec}}),"\n";
b0739ea1290697a0a4e7b52f20801b1790f99aaaDominik Lueckemy %word_widths = &calc_max_width(\%widths,\%used_words);
b0739ea1290697a0a4e7b52f20801b1790f99aaaDominik Luecke # a table of sections to table of
b0739ea1290697a0a4e7b52f20801b1790f99aaaDominik Luecke # words per section to max width over all fonts
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich print "$sec :\n",
66f22d1887bb451915295fb454aa1baab88129fdChristian Maeder join(", ", (map {"$_: ".$word_widths{$sec}{$_}; }
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich&generate_haskell_FM(\%word_widths); # uses %word_widths
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder die "cannot create Haskell module \"LaTeX_maps.hs\"";
66f22d1887bb451915295fb454aa1baab88129fdChristian Maeder # generate a list of pairs for each section named after the section
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich my @words = sort (keys %{$word_widths->{$sec}});
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder push @two_letter_words, (grep {length($_) == 2;} @words);
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich join(",", (map {"(\"".&escape_String($_)."\",".
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder int($word_widths->{$sec}{$_} * 0.351 * 1000).
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich my @long_words = sort (grep {length($_) > 2;} @words);
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich grep {my $ret = $_ ne $last_word;$last_word = $_;$ret; }
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich (grep {m/^$c/}
792df0347edab377785d98c63e2be8e2ce0a8bdeChristian Maeder print HASKELL "\nligatures :: Map String Bool\n",
792df0347edab377785d98c63e2be8e2ce0a8bdeChristian Maeder "ligatures = fromList [",
792df0347edab377785d98c63e2be8e2ce0a8bdeChristian Maeder join(",", map { "(\"".&escape_String($_)."\",True)";}
792df0347edab377785d98c63e2be8e2ce0a8bdeChristian Maeder grep {my $ret = $_ ne $last_word;$last_word = $_;$ret; }
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder return join("", map {
c827fed2bb995225b7b19eb673a40e207adf4bf6Klaus Luettich if (m/^\\$/o) {
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder } elsif(m/^"$/o) { # "
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettich # substitute ������� with \196\214\220\223\228\246\252
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettich $_ =~ s/�/\\196/o; $_ =~ s/�/\\214/o; $_ =~ s/�/\\220/o;
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder $_ =~ s/�/\\223/o;
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettich $_ =~ s/�/\\228/o; $_ =~ s/�/\\246/o; $_ =~ s/�/\\252/o;
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettich } split(//o,$_[0]));
589f2b9281243158d1cce8551241a25f485f5eb2Klaus Luettich return "\n$map_name :: Map String Int\n$map_name = fromList";
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder return "\n$map_name :: Map Char [String] \n$map_name = fromList";
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder print "$sec: ";
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder #print "$word: ";
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder my $cur = $font_widths->{$font}->{$sec}->[$width_word_index];
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder print "$font: ".int($cur * 0.351 * 1000)." " if $word eq "~";
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder "\nWarning: max length of $word is undefined or zero\n"
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder #exit if $width_word_index >= 5;
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder # debugging: print "font: ".join(", ", @{$_[0]})."\n";
0eb106d6adeaced34e54123ff962ae91bb198709Christian Maeder # generate two documents one human readable and one for the machine
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder # &gen_tex('width-table.tex.svmono.templ',
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder # &gen_tex('width-table.tex.svmono.templ',
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich basename($computer_tex_filename,'.tex').'.pdf';
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich return &get_widths($computer_pdf_filename); # seperated in sections
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich my $txt_filename = basename($pdf_filename,'.pdf').'.txt';
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich #open WIDTH, "pdftotext $pdf_filename | egrep 'section: |wl: ' |"
d9062d0570e060fc81cf0d1c06c33223eb99a578Dominik Luecke# or die "cannot call pdftotext or egrep or cannot fork";
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich or die "cannot read file \"$txt_filename\"";
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich m/section: (\w+)\+\+\+/o && do {
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder m/wl: (\d+\.\d+)pt/o && $section ne '' && do {
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder print "$sec :", join(",", @{$widths{$sec}}),"\n";
6a50fa6b0d93a521d8e52c61a3ceb71d9f878cebChristian Maeder my ($input_filename,$font_name,$purpose,$font_cmnd) = @_;
6a50fa6b0d93a521d8e52c61a3ceb71d9f878cebChristian Maeder basename($input_filename,'.tex.templ').".$font_name.".
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder $purpose eq 'human' && do {$no_cols = '';};
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder $purpose eq 'computer' && do {$no_cols = '% ';};
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich or die "cannot open file \"fonts.input\" for reading";
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder my @fnt_descrp = split /\s+:::\s+/;
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder open WORDS, "< words.input" or die("no file named \"words.input\" found");
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich or die "cannot write to file \"generated_words.tex\"!";
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich if ($line =~ m/^%/o) {
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich #print STDERR "$section : ",join(", ",@sec_words),"\n";
f3a6f2c8fb78045b95ce0f280a212cdea61b2a1fKlaus Luettich #print STDERR "pushed: $section : ",join(", ",@sec_words),"\n";
db43a648cdf1c1cab95853d46b89034812156419Klaus Luettich &sep_tabular("\\newpage\n\\section*{section: $section+++}\n");
e00f5b4d89ac027e883461aab6248e33ad10ae8eChristian Maeder } elsif ($line =~ m/^&/o) {
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder } elsif ($line =~ m/^~(.*)$/o) {
c96e48d62b116e61c0f25523f223d0dab08272c2Christian Maeder print GENWORDS "\\wordline{$fpat}\n\\hline\n";
342033b90ee6ffc0dd13f6cb9b1e95d649b4dd17Christian Maeder print GENWORDS "\\end{tabular}\n$fill_in\n\\begin{tabular}{l|l}\n\\hline\n";