#! /usr/bin/perl # $Log: tex2blog,v $ # Revision 1.20 2009/11/12 02:03:37 Peeter # *** empty log message *** # # Revision 1.19 2009/11/07 22:11:58 Peeter # hack in some wikipedia and physicsforums support. # # Revision 1.17 2009/09/18 04:43:41 Peeter # got it working again mostly (at least in latexmathml mode. # ) # # Revision 1.16 2009/09/18 02:25:52 Peeter # restructure totally, should now be broken and do nothing. Now fix it up. # # Revision 1.15 2009/09/18 01:42:56 Peeter # got some of the latex2mathml changes made. Going to revamp the # main loop to slurp it all in and use multiline patterns. # # BUGS: #------ # mathml: equation numbering requires that section headings be suppressed. # mathml: eqnarray requires more than one equation line, and &= shows up as # is otherwise. Not doing that in the original .tex is required. # wordpress: not tested since rewrite. use strict ; use warnings ; use Getopt::Long; my $filebase ; my $showUsage = 0 ; # output for LaTeXMathML.js or wordpress? my $mathml = 0 ; my $wordpress = 0 ; # for use of LaTeXMathML.js in a standalone html file: my $mathmlHtml = 0 ; # for use with physics Forums. my $pf = 0 ; # wikipedia output. my $wiki = 0 ; GetOptions( 'mathml!' => \$mathml, 'wordpress!' => \$wordpress, 'pf!' => \$pf, 'wikipedia!' => \$wiki, 'html!' => \$mathmlHtml, 'file=s' => \$filebase, 'help!' => \$showUsage, ) ; if ( $showUsage or (1 != ( $mathml + $wordpress + $mathmlHtml + $pf + $wiki )) ) { die "usage: ~/bin/tex2blog -f filenameWithoutTexSuffix {-pf | -html | -mathml | -wordpress -wiki} [-help]" } if ( $mathmlHtml ) { $mathml = 1 ; } # it is assumed that the .tex or .ltx file has been pdflatex'ed already generating a .aux file for the equation numbers. my %equations = processAuxFile( "$filebase.aux" ) ; my %refnumbers ; # side effect. creates the %refnumbers hash. my $bibString = processBib() ; # most of the file can be treated as single lines but we need # to handle \begin{align}, \begin{equation} ... as multiline # patterns. For this reason, slurp in the whole file, then handle # all these types of patterns first, then do the rest. # # first the slurp: # my $allInput = slurpLatex( $filebase ) ; # pre-filter out all the latex comments. $allInput =~ s/%.*//mg ; # now do the tricky bits. $allInput =~ s/ \\begin{ (?:equation|align|eqnarray) (\**) # $1. Is this a numbered equation or not. '*' for no. } (.*?) # $2. The latex text including all whitespace. \\end{ (?:equation|align|eqnarray) \** # the matching * if any. } /processOneEquation(0, $1, $2)/sgemx ; my $g_title = 'no title' ; # side effect: sets $g_title my $allOut = processRest( $allInput ) ; my $head = generatePrologue( $g_title ) ; generateEpilogueAndPrintIt( $head . $allOut ) ; exit ; my $g_EquationSeqNo = 1 ; sub processOneEquation { my ($bInline, $suppressNumbers, $eq) = @_ ; my $oeq = $eq ; # if ( $bInline ) # { # warn "inline: '$eq'\n" ; # } # $eq =~ s/%.*//mg ; my $curEquationLabel = '' ; my $eqnArray = "eqnarray" ; # warn "s: $suppressNumbers ; eq: $eq\n" ; if ( $eq =~ s/\\label{(eqn:.*?)}//s ) { my $label = $1 ; if ( $mathml ) { $equations{$label} = $g_EquationSeqNo++ ; } else { die "no equation number for '$label'\n" if ( !exists $equations{$label} ) ; $curEquationLabel = "\\quad\\quad\\quad($equations{$label})" ; } die "s: $suppressNumbers ; no label for equation '$oeq'" if ( $suppressNumbers ne '' ) ; } else { $eqnArray .= '*' ; die "expected * for non-labelled equation '$eq'" if ( $suppressNumbers ne '*' ) ; } $eq =~ s/\n//smg ; $eq = expandPeetersMathModeMacros( $eq ) ; # if ( $bInline ) # { # warn "inline post: '$eq'\n" ; # } # strip trailing \\ at the end of the equation: #$eq =~ s/ *\\\\ *$//g; # use so that this equation isn't processed again, In the final pass when everything is split by lines change these to '$' characters for final output. if ( $bInline ) { $eq = "$eq" ; } else { if ( $mathml ) { $eq = "\\begin{$eqnArray}$eq \\end{$eqnArray}" ; } elsif ( $wordpress ) { # sample output from latex2wp. centers things ... looks a bit nicer. #

$latex \displaystyle \forall g \in {\cal F}. g^2 = \eta \ \ \ \ \ (1)&fg=000000$

# latex2wp used this, but I don't see a difference: my $fgString = '' ; # $fgString = "&fg=000000" ; # omit. $eq = "\\begin{aligned}$eq\\end{aligned} $curEquationLabel$fgString" ; } else # wiki, and pf { $eq = "\\begin{align}$eq\\end{align} $curEquationLabel" ; } } return $eq ; } sub processBib { my $bibOut = '' ; my $haveBib = 1 ; open my $fh, "<$filebase.bbl" or $haveBib = 0 ; if ( $haveBib ) { my $curNum = 1 ; while (<$fh>) { chomp ; s,\\begin{thebibliography}.*,

References

, ; #\bibitem[Joot({\natexlab{a}})]{PJLorentzWave} s/\\bibitem.*]{/\\bibitem{/ ; if ( s/\\bibitem{(.*?)}// ) { $bibOut .= "\n\n" ; $refnumbers{$1} = $curNum ; $bibOut .= "[$curNum] " ; $curNum++ ; } else { next if (/Available from:/ or /\\providecommand/ or /\\expandafter/ or 0 ) ; if ( /\\end{thebibliography}/ ) { $bibOut =~ s,\\url{(.*?)},$1,; $bibOut .= "\n" ; last ; } s/~/ /g ; s,{ *\\em\s+(.*?)},$1,g; s,\\em *{(.*?)},$1,g ; s,\\emph *{(.*?)},$1,g ; s/\\newblock// ; # \url can be split across lines, as in the following: # \url{http://en.wikipedia.org/w/index.php?title=Relativistic_Doppler_effect&o% #ldid=298724264}. # #In case it isn't do a first check here before stripping out {}'s # s,\\url{(.*?)},$1,sg; s/{(.*?)}/$1/g ; #die "'$_'\n" if /\\url/ ; $bibOut .= "$_" ; } } close $fh ; } return $bibOut ; } sub generatePrologue { my ($title) = @_ ; my $prologue = '' ; if ( $mathmlHtml ) { $prologue .= qq( $title ) ; } if ( $mathml ) { $prologue .= qq(
\n) ; } return $prologue ; } sub generateEpilogueAndPrintIt { my ($outText) = @_ ; $outText .= $bibString ; $outText =~ s/[\r\l]+//smg ; if ( $mathml ) { $outText .= qq(\n
) ; } if ( $mathmlHtml ) { $outText .= qq(\n ) ; } print $outText ; } sub processAuxFile { my ($auxFileName) = @_ ; my %eq ; # in mathml mode, number sequentially only since the equation numbers are generated as a side effect of using \begin{eqnarray} if ( !$mathml ) { open my $fh, "<$auxFileName" or die "could not open '$auxFileName'\n" ; while (<$fh>) { chomp ; if ( /^\\newlabel{(.*?)}.*equation\.(.*?)}/ ) { $eq{$1} = $2 ; } } close $fh ; } # debug. turns out that equation labels don't work quite right when they # are also in a \enumerate \item ... context. # foreach (keys %eq) # { # warn "label: '$_'\n" ; # } return %eq ; } sub slurpLatex { my $failedOpen = 0 ; my $rawLatex = '' ; open my $fh, "<$filebase.tex" or $failedOpen = 1 ; if ( $failedOpen ) { open $fh, "<$filebase.ltx" or die "could not open '$filebase.tex' nor '$filebase.ltx'\n" ; } while (<$fh>) { $rawLatex .= $_ ; } close $fh ; return $rawLatex ; } sub processRest { my ($allOneLine) = @_ ; my @allLines = split( /\n/, $allOneLine ) ; my $urlMessage ; $urlMessage = "Click here for a PDF of this sequence of posts with nicer formatting" ; $urlMessage = "Click here for a PDF of this post with nicer formatting" ; my $latexInlineStart ; my $latexInlineEnd ; my $latexSepLineStart ; my $latexSepLineEnd ; if ( $pf ) { $latexInlineStart = '[itex]' ; $latexInlineEnd = '[/itex]' ; $latexSepLineStart = '[tex]' ; $latexSepLineEnd = '[/tex]' ; } elsif ( $wiki ) { $latexInlineStart = '' ; $latexInlineEnd = '' ; $latexSepLineStart = ":\n" ; $latexSepLineEnd = "\n\n" ; } elsif ( $wordpress ) { $latexInlineStart = '$latex ' ; $latexInlineEnd = '$' ; $latexSepLineStart = '

$latex ' ; $latexSepLineEnd = "\$

\n" ; } elsif ( $mathml ) { $latexInlineStart = '$' ; $latexInlineEnd = '$' ; $latexSepLineStart = '
$' ; $latexSepLineEnd = "$
\n" ; } # Lance is a cool dude # Lance is really a cool dude # Lance is super duper snooper cob dude else { die ; } my $out = '' ; foreach (@allLines) { next if ( /\\documentclass/ or /\\usepackage/ or /\\newcommand/ or /\\input/ or /\\revisionInfo/ or /\\bibliography/ or /\\beginArtWithToc/ or /\\EndArticle/ or /\\EndNoBibArticle/ or /\\beginArtNoToc/ or 0 ) ; if ( $mathml ) { if ( /\\(?:title|chapter){(.*)}/ ) { $g_title = $1 ; } } else { next if ( /\\begin{document}/ or /\\end{document}/ or /\\maketitle/ or /\\tableofcontents/ or /\\title/ or /\\email/ or /\\date/ or /\\author/ or 0 ) ; } # process inline math $$ delimited. Assume that it is all in one line. s/\$(.*?)\$/processOneEquation(1, '*', $1)/eg ; s//$latexSepLineStart/g ; s//$latexSepLineEnd/g ; s//$latexInlineStart/g ; s//$latexInlineEnd/g ; s/\\chapcite{(.*?)}/[$refnumbers{$1}]/g; s/\\cite{(.*?)}/[$refnumbers{$1}]/g; s,\\blogpage{(.*)},[$urlMessage],; s/\\C{(.*?)}/$latexInlineStart\\mathbb{C}^{$1}$latexInlineEnd/g; s/\\R{(.*?)}/$latexInlineStart\\mathbb{R}^{$1}$latexInlineEnd/g; s/\\ref{(eqn:.*?)}/$equations{$1}/g ; # latexMathMl can do section's but this makes the equation number matching harder (since you get 1.1 1.2 ...) # could deal with that by processing things section by section, and then within each section doing all the equations. # if ( !$mathml ) # { s,\\section{(.*)},

$1

,; s,\\subsection{(.*)},

$1

,; s,\\subsubsection{(.*)},

$1

,; # } s,\\href{(.*?)}{(.*?)},$2,g ; # remove any label's not associated with specific formulas s/\\label{(.*?)}//g; $out .= "$_\n" ; } return $out ; } sub expandPeetersMathModeMacros { my $out = "@_" ; my $r1 = qr/ ( # start of capture buffer 1 { # match an opening angle bracket (?: [^{}]++ # one or more non angle brackets, non backtracking | (?1) # found { or }, so recurse to capture buffer 1 )* } # match a closing angle bracket ) # end of capture buffer 1 /x; my $r2 = qr/ ( # start of capture buffer 1 { # match an opening angle bracket (?: [^{}]++ # one or more non angle brackets, non backtracking | (?1) # found { or }, so recurse to capture buffer 1 )* } # match a closing angle bracket ) # end of capture buffer 1 ( # start of capture buffer 1 { # match an opening angle bracket (?: [^{}]++ # one or more non angle brackets, non backtracking | (?1) # found { or }, so recurse to capture buffer 1 )* } # match a closing angle bracket ) # end of capture buffer 1 /x; $out =~ s/\\\\/\\\\ /g; # $out =~ s/&=/=/g; # convert some of my macros: # special case this one. bivector.tex shows that there is trouble with this # expansion. #$out =~ s/\\inv{\\B(.)}/\\frac{1}{\\mathbf{$1}}/g; $out =~ s/\\inv${r1}/\\frac{1}{$1}/g; #$out =~ s/\\Abs{(.*?)}/{\\left\\lvert{$1}\\right\\rvert}/g; if ( $mathml ) { # why \\left\lvert? $out =~ s/\\Abs${r1}/{\\left\\vert$1\\right\\vert}/g; $out =~ s/\\abs${r1}/{\\left\\vert$1\\right\\vert}/g; } else { $out =~ s/\\Abs${r1}/{\\left\\lvert$1\\right\\rvert}/g; $out =~ s/\\abs${r1}/{\\left\\lvert$1\\right\\rvert}/g; } $out =~ s/\\gpgrade${r2}/{\\left\\langle{$1}\\right\\rangle}_{$2}/g; # $out =~ s/\\gpgradezero{(.*?)}/\\left\\langle{$1}\\right\\rangle/g; # $out =~ s/\\gpgradeone{(.*?)}/{\\left\\langle{$1}\\right\\rangle}_{1}/g; # $out =~ s/\\gpgradetwo{(.*?)}/{\\left\\langle{$1}\\right\\rangle}_{2}/g; # $out =~ s/\\gpgradethree{(.*?)}/{\\left\\langle{$1}\\right\\rangle}_{3}/g; $out =~ s/\\gpgradezero${r1}/\\left\\langle{$1}\\right\\rangle/g; $out =~ s/\\gpgradeone${r1}/{\\left\\langle{$1}\\right\\rangle}_{1}/g; $out =~ s/\\gpgradetwo${r1}/{\\left\\langle{$1}\\right\\rangle}_{2}/g; $out =~ s/\\gpgradethree${r1}/{\\left\\langle{$1}\\right\\rangle}_{3}/g; $out =~ s/\\gpgradefour${r1}/{\\left\\langle{$1}\\right\\rangle}_{4}/g; $out =~ s/\\rgrad/\\stackrel{ \\rightarrow }\\grad/g; $out =~ s/\\lgrad/\\stackrel{ \\leftarrow }\\grad/g; $out =~ s/\\lrgrad/\\stackrel{ \\leftrightarrow }\\grad/g; $out =~ s/\\lrpartial/\\stackrel{ \\leftrightarrow }\\partial/g; $out =~ s/\\rspacegrad/\\stackrel{ \\rightarrow }\\spacegrad/g; $out =~ s/\\lspacegrad/\\stackrel{ \\leftarrow }\\spacegrad/g; $out =~ s/\\BCB/\\boldsymbol{\\mathcal{B}}/g; $out =~ s/\\EE/\\boldsymbol{\\mathcal{E}}/g; $out =~ s/\\kcap/\\hat{\\Bk}/g; $out =~ s/\\xcap/\\hat{\\Bx}/g; $out =~ s/\\ncap/\\hat{\\Bn}/g; $out =~ s/\\Bomega/\\boldsymbol{\\omega}/g; $out =~ s/\\Bsigma/\\boldsymbol{\\sigma}/g; $out =~ s/\\Brho/\\boldsymbol{\\rho}/g; $out =~ s/\\BTheta/\\boldsymbol{\\Theta}/g; $out =~ s/\\(.)cap/\\hat{\\B$1}/g; $out =~ s/\\B(.)/\\mathbf{$1}/g; $out =~ s/\\LL/\\mathcal{L}/g; $out =~ s/\\FF/\\mathcal{F}/g; $out =~ s/\\cross/\\times/g; $out =~ s/\\grad/\\nabla/g; $out =~ s/\\spacegrad/\\boldsymbol{\\nabla}/g; $out =~ s/\\delambertian/\\square/g; $out =~ s/\\conj/{*}/g; $out =~ s,\\PDi${r2},{\\partial $2}/{\\partial $1},g ; $out =~ s/\\PD${r2}/\\frac{\\partial $2}{\\partial $1}/g ; $out =~ s/\\PauliI/\\begin{bmatrix} 1 & 0 \\\\ 0 & 1 \\\\ \\end{bmatrix}/g; $out =~ s/\\PauliX/\\begin{bmatrix} 0 & 1 \\\\ 1 & 0 \\\\ \\end{bmatrix}/g; $out =~ s/\\PauliYNoI/\\begin{bmatrix} 0 & -1 \\\\ 1 & 0 \\\\ \\end{bmatrix}/g; $out =~ s/\\PauliY/\\begin{bmatrix} 0 & -i \\\\ i & 0 \\\\ \\end{bmatrix}/g; $out =~ s/\\PauliZ/\\begin{bmatrix} 1 & 0 \\\\ 0 & -1 \\\\ \\end{bmatrix}/g; $out =~ s/\\Clifford{(.*?)}{(.*?)}/\\mathcal{C}_{\\{{$1},{$2}\\}}/g; $out =~ s/\\scalarProduct{(.*?)}{(.*?)}/{$1} \\bullet {$2}/g; # $out =~ s/\\antisymmetric{(.*?)}{(.*?)}/\\left[{$1},{$2}\\right]/g; # $out =~ s/\\symmetric{(.*?)}{(.*?)}/\\left\\{{$1},{$2}\\right\\}/g; # run twice? $out =~ s/\\antisymmetric${r2}/\\left[$1,$2\\right]/g; $out =~ s/\\symmetric${r2}/\\left\\{$1,$2\\right\\}/g; $out =~ s/\\antisymmetric${r2}/\\left[$1,$2\\right]/g; $out =~ s/\\symmetric${r2}/\\left\\{$1,$2\\right\\}/g; $out =~ s/\\DETuvwijk{(.*?)}{(.*?)}{(.*?)}{(.*?)}{(.*?)}{(.*?)}/\\begin{vmatrix} {$1}_{$4} & {$1}_{$5} & {$1}_{$6} \\\\ {$2}_{$4} & {$2}_{$5} & {$2}_{$6} \\\\ {$3}_{$4} & {$3}_{$5} & {$3}_{$6} \\end{vmatrix}/g; $out =~ s/\\traceB${r1}/\\tr\\left({$1}\\right)/g; $out =~ s/\\trace${r1}/\\tr{$1}/g; $out =~ s/\\tr/\\text{Tr}/g; $out =~ s/\\RejName/\\text{Rej}/g; $out =~ s/\\Proj/\\text{Proj}/g; $out =~ s/\\Rot/\\text{Rot}/g; $out =~ s/\\Scalar/\\text{Scalar}/g; $out =~ s/\\Real/\\text{Real}/g; $out =~ s/\\Imag/\\text{Imag}/g; $out =~ s/\\symmetricVecBladePauli{(.*?)}{(.*?)}{(.*?)}/\\left\\{{$1},\\left[{$2},{$3}\\right]\\right\\}/g; $out =~ s/\\symmetricBladeVecPauli{(.*?)}{(.*?)}{(.*?)}/\\left\\{\\left[{$1},{$2}\\right],{$3}\\right\\}/g; $out =~ s/\\(.)hat/\\hat{$1}/g; $out =~ s/\\(.)dot\b/\\dot{$1}/g; $out =~ s/\\thetadot/\\dot{\\theta}/g; $out =~ s/\\phidot/\\dot{\\phi}/g; $out =~ s/\\thetacap/\\hat{\\boldsymbol{\\theta}}/g; $out =~ s/\\phicap/\\hat{\\boldsymbol{\\phi}}/g; $out =~ s/\\questionEquals/\\stackrel{?}{=}/g; $out =~ s/\\T\b/\\text{T}/g; $out =~ s/\\evalbar${r2}/{\\left.{$1}\\right\\vert}_{$2}/g ; $out =~ s/\\evalnobar${r2}/{$1}_{$2}/g ; $out =~ s/\\DETuvij{(.)}{(.)}{(.)}{(.)}/\\begin{vmatrix} $1_$3 & $1_$4 \\\\ $2_$3 & $2_$4 \\end{vmatrix}/g; # need a general way of dealing with this. wordpress latex doesn't like it # the multiply nested {{...}} content. # $out =~ s/{{\\mathbf{(.)}}/{\\mathbf{$1}/g; # bivector.tex. multiple passes of this required: ?? $out =~ s/\\gpgradetwo${r1}/{\\left\\langle{$1}\\right\\rangle}_{2}/g; if ( $mathml ) { $out =~ s/\\text/\\textrm/g; $out =~ s/\\lvert/\\left\\vert/g ; $out =~ s/\\rvert/\\right\\vert/g ; # hack. Not supported by the script. should use arrows over. $out =~ s/\\boldsymbol//g ; } return $out ; }