# This file is part of reLyX # Copyright (c) 1998-9 Amir Karger karger@post.harvard.edu # You are free to use and modify this code under the terms of # the GNU General Public Licence version 2 or later. package CleanTeX; # This package prepares a LaTeX file for translation to LyX # - Translates some local commands (e.g., {\em blah} to {\emph{blah}}) # - Prepares math mode stuff for LyX. LyX reads LaTeX math mode directly, # so reLyX can basically copy all math mode exactly, but LyX is a # bit stricter than LaTeX. E.g., translate 'x^2' -> 'x^{2} # - Removes optional arguments if LyX doesn't understand them, e.g. \\ use strict; use Verbatim; ###### # Global variables my $last_eaten; # last token we ate # List of commands for which LyX doesn't support the optional argument my @DeleteOptArg = map {"\\$_"} qw(\\ \\* chapter section subsection subsubsection paragraph subparagraph ); my $debug_on; # was -d option given? ######################### PARSER INVOCATION ################################ sub call_parser { # This subroutine opens the TeX parser and processes the file. # Arg0 is the name of the input TeX file # Arg1 is the name of the output "clean" file my ($InFileName, $OutFileName) = (shift,shift); $debug_on = (defined($main::opt_d) && $main::opt_d); my $zzz=$debug_on ? " TeX file ($InFileName --> $OutFileName)\n" :"... "; print STDERR "Cleaning$zzz"; open (OUTFILE, ">$OutFileName") or die "problem opening $OutFileName: $!\n"; # Create the list of tokens for the parser # Parts of the token list are swiped from TeX.pm my %MyTokens = ( '{' => $Text::TeX::Tokens{'{'}, '}' => $Text::TeX::Tokens{'}'}, '$' => $Text::TeX::Tokens{'$'}, '$$' => $Text::TeX::Tokens{'$$'}, '\begin' => $Text::TeX::Tokens{'\begin'}, '\end' => $Text::TeX::Tokens{'\end'}, ); # Put local tokens, like \em, into %MyTokens #Note: \cal is "local", although it's found in math mode # (The "map" just puts a backslash in front of each word in the list) my @LocalTokens = qw (em rm bf tt sf sc sl it rmfamily ttfamily sffamily mdseries bfseries upshape itshape slshape scshape cal ); foreach (@LocalTokens) { $MyTokens{"\\$_"} = $Text::TeX::Tokens{'\em'} } # Now add any commands &ReadCommands::Merge(\%MyTokens); # Create the fileobject my $file = new Text::TeX::OpenFile $InFileName, 'defaultact' => \&clean_tex, 'tokens' => \%MyTokens; # Now actually process the file $file->process; close OUTFILE; #warn "Done cleaning TeX file\n"; } # end sub call_parser ####################### MAIN TRANSLATING SUBROUTINE ######################## # Routine called by the TeX-parser to perform token-processing. sub clean_tex { my($eaten,$txt) = (shift,shift); my ($outstr, $type); # Sub translate is given a string and one of the translation tables below. # It returns the translation, or just the string if there's no translation # Translation table for TT::Begin::Group tokens my %begtranstbl = ( '$' => '\(', # LyX math mode doesn't '$$' => '\[', # understand \$ or $$ ); # Translation table for TT::End::Group tokens my %endtranstbl = ( '$' => '\)', '$$' => '\]', ); # Translation table for TT::Token tokens whose translations should # NOT have whitespace after them! See sub translate... # Note that tokens of type TT::EndLocal are always translated to '}'. So, # any token defined as a local token *must* be translated to something # with a '{' (e.g., '\em' -> '\emph{') or we'll have mismatched braces my %no_ws_transtbl = ( '\em' => '\emph{', '\rm' => '\textrm{', '\bf' => '\textbf{', '\tt' => '\texttt{', '\sf' => '\textsf{', '\sc' => '\textsc{', '\sl' => '\textsl{', '\it' => '\textit{', '\rmfamily' => '\textrm{', '\ttfamily' => '\texttt{', '\sffamily' => '\textsf{', '\mdseries' => '\textmd{', '\bfseries' => '\textbf{', '\upshape' => '\textup{', '\itshape' => '\textit{', '\slshape' => '\textsl{', '\scshape' => '\textsc{', '\cal' => '\mathcal{', ); # a faux "switch" statement. sets $_ for later use in pattern # matching. $type = ref($eaten); $type =~ s/^Text::TeX::// or die "Non-Text::TeX object"; my $printstr = ""; # default for undefined printstrs etc. SWITCH: for ($type) { # Handle blank lines. if (/Paragraph/) { last SWITCH; } # Handle the end of a local font command - insert a '}' if (/EndLocal/) { # we could just say $printstr='}' $printstr = &translate('}', \%endtranstbl); last SWITCH; } # $eaten->exact_print is undefined for previous environments $outstr = $eaten->exact_print; if (! defined $outstr) { # comment at end of paragraph warn "Weird undefined token $eaten!" unless $eaten->comment; last SWITCH; } # Handle LaTeX tokens if (/^Token$/) { my $realtok = $eaten->print; # w/out whitespace # If a comment is its own paragraph, print nothing last SWITCH unless defined($realtok); # Special handling for \verb and \verb* if ($realtok =~ /^\\verb\*?/) { $printstr = &Verbatim::copy_verb($txt,$eaten); last SWITCH; } # Translate token if necessary, or just print it # "no_ws" is HACK to remove whitespace, so '\em ' -> '\emph{' $printstr = &translate($outstr, \%no_ws_transtbl, "no_ws"); # Ignore optional argument(s) if necessary $printstr .= &handle_opt_args($eaten,$txt); last SWITCH; } # Tokens taking arguments, like '^' # ADD '{' if there isn't one before the argument! if (/^BegArgsToken$/) { $printstr = $outstr; # Ignore optional argument(s) if necessary $printstr .= &handle_opt_args($eaten,$txt); # Add beginning brace before the 1st argument if there isn't one my $tok = $txt->lookAheadToken; $printstr .= '{' unless ($tok =~ /\{/); last SWITCH; } # End of one argument, beginning of next # Note: by default ArgToken,EndArgsToken print nothing # ADD '}' if there isn't one after the last argument # Then read and print any optional arguments which may exist # between this argument the next (we must do this here or we would # add a '{' before an optional argument!) # ADD '{' if there isn't one before the next argument! # (just like we do in BegArgsToken and EndArgsToken) if (/^ArgToken$/) { $printstr = $outstr; # = '' # Add '}' after the argument that ended if necessary $printstr .= '}' unless $last_eaten->print eq "\}"; # Eat and print any optional arguments $printstr .= &handle_opt_args($eaten,$txt); # Add '{' before the next argument if necessary my $tok = $txt->lookAheadToken; $printstr .= '{' unless ($tok =~ /\{/); last SWITCH; } # End of tokens taking arguments, like '^' # ADD '}' if there isn't one after the last argument, i.e., # if the previous token *wasn't* a '}' # Kludge: for TeX style \input command ("\input foo" with no # braces) we need to read the whole filename, but parser will have # read only one char. So read in the rest of the filename before # printing the '}'. if (/^EndArgsToken$/) { $printstr = $outstr; # = '' unless ($last_eaten->print eq "\}") { my $s = $eaten->base_token; if ($s->print eq "\\input") { my $t = $txt->lookAheadToken; # For one-char filename (a.tex) do nothing if ($t =~ /^[\w.\-]/) { my $u = $txt->eatMultiToken; $t = $u->print; $t =~ s/\s+//g; $printstr .= $t; # TeX \input always adds .tex ending $printstr .= ".tex"; } $printstr .= '}'; # Don't bother eating optional args coming after the last # required arg: they'll just be copied as text last SWITCH; } # Handle opening groups, like '{' and '$'. if (/Begin::Group$/) { $printstr = &translate($outstr,\%begtranstbl); last SWITCH; } # Handle closing groups, like '}' and '$'. if (/End::Group$/) { $printstr = &translate($outstr, \%endtranstbl); last SWITCH; } if (/Begin::Group::Args/) { my $env = $eaten->environment; $printstr = $outstr; if ($env eq "verbatim" || $env eq "reLyXskip") { # copy everything up to "\end{foo}" $printstr .= &Verbatim::copy_verbatim($txt, $eaten); } last SWITCH; } if (/End::Group::Args/) { $printstr = $outstr; last SWITCH; } if (/Text/) { $printstr = $outstr; last SWITCH; } # The default action - print the string. $printstr = $outstr; } # end SWITCH:for ($type) # Actually print the string if (defined $printstr) { print OUTFILE $printstr; $last_eaten = $eaten; #save for next time } else {warn "Undefined printstr";} } # end sub clean_tex #################### TRANSLATOR SUBROUTINES ############################### sub translate { # Replace a string (possibly with whitespace around it) with another # Arg0 is a string, Arg1 is a reference to a hash containing translations # If a token not in the table is passed in, do nothing # If Arg2 is defined AND the token is known, then remove whitespace from # the end of the translated token. This is a HACK to do '\em ' -> '\emph{' # Return the string, possibly modified my ($tokstr, $transref) = (shift, shift); my $remove_ws = shift; my %transtable = %$transref; # remove whitespace from the string (since transtable doesn't have it) my $stripstr = $tokstr; $stripstr =~ s/^\s*(\S+)\s*$/$1/ or warn "couldn't strip token"; if ( exists $transtable{$stripstr} ) { # use \Q or \, (, $, and [ will be misinterpreted $tokstr =~ s/\Q$stripstr\E/$transtable{$stripstr}/; # remove whitespace? if (defined $remove_ws) { $tokstr =~ s/\s*$//; } return $tokstr; sub handle_opt_args { # read and concatenate OR IGNORE optional arguments # Arg0 is a BegArgsToken or ArgToken my ($eaten,$fileobject) = (shift,shift); my $outstr = ""; # If at end of paragraph, don't bother looking for optArgs return "" unless $fileobject->lookAheadToken; # Get the next argument(s) expected for this token == /^o*[rR]?$/ # If there are no args expected, just return my $curr_args = $eaten->next_args($fileobject) or return ""; # Now print or ignore any optional arguments # If there's an 'r' in curr_args, we're done for now my $foo; my $token_name = $eaten->token_name; # (needed for EndArgsToken, e.g.) while ($curr_args =~ s/^o//) { my $opt = $fileobject->eatOptionalArgument; # Print any initial space before the optional argument if ($foo = $opt->exact_print) { if ($foo =~ /^(\s+)/) { $outstr .= $1; } # Print the argument or ignore it if ($opt->print) { if (grep /^\Q$token_name\E$/, @DeleteOptArg) { print "Optional argument '",$opt->print, "' to macro $token_name ignored\n"; } else { $outstr .= "[" . $opt->print . "]"; } } # Was an optional argument found? } return $outstr; } # end sub handle_opt_args 1; # return true value to calling program