#! /usr/bin/perl # /usr/bin/pl2html # copyright 2007, Martin Rinehart # This project used by the author to learn Perl. # Hereby licensed to all for non-commercial use. # Version 1.0 #looks great on Konq, good on Firefox Linux, dunno MSIE # Version 1+ wishlist #want to have hyperlinks back to caller(s) for each sub # report bugs to MartinRinehart at gmail dot com use strict; #
Syntax: pl2html source.pl
Example: pl2html pl2html.pl
(Example generates this file)
This shows HTML in a comment.
use Switch; use warnings; my $FALSE=0; my $TRUE=1; # readability constants my @code; # list of the input file's lines my @subs; # list of names of subroutines my $bgcolor = "#f8f4f0"; # beige surround my $numcolor = "#e4e4e4"; # grey numbers my $toccolor="#fffcf8"; # table of contents my @rowcolors = ( "#f0f8ff", "f0fff8" ); # alternating code line stripes my $num_width="5%"; my $code_width="55%"; my $comment_width="40%"; # space for output pieces my $tab = 4; # width in spaces, set to suit yourself my $tab_spaces = " " x $tab; # following vars are used by hard_way_split_on_comment() and its callees. my ( $In_quote_or_pattern, $In_quote, $In_single_quote, $In_pattern, $In_single_pattern ); my ( $Line, $Char, $Ptr, $Eos, $Pattern_char, $Was_dollar ); &main; sub main { &help_requested(); # dies after displaying help if ( $#ARGV == -1 ) { # if no args, display syntax and exit push( @ARGV, "-?" ); &help_requested; } foreach my $file ( @ARGV ) { write_html( $file ); } # this will work for l*.pl or an explicit list } # end of main() sub add_anchors { # finds subroutine calls in code and adds hyperlinks to sub my $code = $_[0]; my $anchor = ""; my $match = $FALSE; foreach my $sname ( @subs ) { if ( ($code =~ /^$sname\W/) # "func(..." or ($code =~ /\W$sname\W/) # ...+func(..." or ($code =~ /\W$sname$/) ) { # ...+func" $anchor = "$sname"; # func $code =~ s/$sname/$anchor/; # replace the name with the hyperlinked name } } # end of foreach return $code; } # end of add_anchors() # to split on comment there's an easy way: #
   $line =~ /(^[^#]*)#(.*$)/;
#
   $code = $1;
#
   $comment = $2;
# # but that will split on "#" embedded in # # to get it right, you have to do it the hard way: sub hard_way_before_pattern { # pointer is at "m" in m/pattern/, at "s" in s/srch/rep/, etc. if ( $Ptr < $Eos ) { $Ptr++; $Char = substr( $Line, $Ptr, 1 ); } hard_way_start_pattern( $_[0] ); # note: this is harmless at EOS } # end of hard_way_before_pattern sub hard_way_check_for_close_pattern { # clear all flags on close pattern if ( $Char eq $Pattern_char ) { if ( $In_single_pattern ) { hard_way_clear(); } # clear flags else { $In_single_pattern = $TRUE; } # at end of first pattern, process the next pattern } # end if } # end of hard_way_check_for_close_pattern sub hard_way_check_for_close_quote { # clear all flags on close quote # note: don't clear if a single quote is inside a double or vice-versa if ( $In_single_quote and ($Char eq "'") ) { hard_way_clear(); } elsif ( (not $In_single_quote) and $Char eq '"' ) { hard_way_clear(); } } # end of hard_way_check_for_close_quote sub hard_way_check_for_func { # for finding patterns in calls to grep() and split() my $func = $_[0]; my $tail = substr( $Line, $Ptr ); my $is_func = ( $tail =~ /(^$func\s*\(\s*)\S/ ); # "func" [opt_white] "(" [opt_white] one non-white character if ( $is_func ) { $Ptr += length( $1 ); # $1 is all chars up to, but not including, start of pattern $Char = substr( $Line, $Ptr, 1 ); # pointer points to pattern start char hard_way_start_pattern( $TRUE ); # start single pattern } } # end of hard_way_check_for_func() sub hard_way_check_for_tilde { # found "=" or "!", checks for "=~" or "!~" if ( ($Eos - $Ptr) < 3 ) { return; } #

returns if not enough chars for pattern

if ( substr($Line, $Ptr+1, 1) ne "~" ) { return; } #

returns if not tilde

$Ptr += 2; # point to char after "=~" (or "!~") hard_way_skip_white_space(); if ( $Char eq "m" ) { hard_way_before_pattern( $TRUE ); # =~ m/ is single pattern } elsif ( $Char eq "s" ) { hard_way_before_pattern( $FALSE ); # =~ s/ is NOT single pattern } elsif ( $Char eq "t" ) { # could be =~ tr/... or "t" could be a pattern delimiter if ( $Ptr < $Eos ) { my $next = substr( $Line, $Ptr+1, 1 ); if ( $next eq "r" ) { $Ptr++; hard_way_before_pattern( $FALSE ); # =~ tr/ is NOT single pattern } else { hard_way_start_pattern( $TRUE ); }# =~ tpatternt is single } } else { hard_way_start_pattern( $TRUE ); } # !~ / is single pattern } # end of hard_way_check_for_tilde() sub hard_way_check_special { # look for open quotes, start of patterns - these checks are made when outside any pattern or quote switch ( $Char ) { case ( "'" ) { hard_way_set_single_quote( $TRUE ); } # quote starting with single case ( '"' ) { hard_way_set_single_quote( $FALSE ); } # quote starting with double case ( "=" ) { hard_way_check_for_tilde(); } # binding? "=~" case ( "!" ) { hard_way_check_for_tilde(); } # not binding? "!~" case ( "g" ) { hard_way_check_for_func( "grep" ) ; } # calling grep( /pattern/ ... case ( "s" ) { hard_way_check_for_func( "split" ); } # calling split( /pattern/ ... } # end of switch } # end of sub hard_way_check_special sub hard_way_clear { # called to initialize and when leaving a pattern or quote $In_quote_or_pattern = $FALSE; $In_quote = $FALSE; $In_single_quote = $FALSE; $In_pattern = $FALSE; $In_single_pattern = $FALSE; $Pattern_char = ""; $Was_dollar = $FALSE; } # end of hard_way_clear sub hard_way_crawl { # crawls down code line, one char at a time while ( $Ptr < $Eos ) { $Char = substr( $Line, $Ptr, 1 ); if ( ($Char eq "#") and # the basic check for a comment-starting "#" (not $In_quote_or_pattern) and (not $Was_dollar) ) { return $Ptr; } #

returns here on finding comment-starting "#"

if ( $In_quote_or_pattern and ($Char eq "\\") ) { $Ptr++; } # skip escaped chars elsif( $In_quote ) { hard_way_check_for_close_quote(); } elsif( $In_pattern ) { hard_way_check_for_close_pattern(); } else { hard_way_check_special(); } # check for quote start, pattern start, etc. $Ptr++; $Was_dollar = $Char eq "\$"; # used to tell "#" comment start from "$#name" } # end of while loop return $Ptr; } # end of hard_way_crawl sub hard_way_set_single_pattern { # pattern-starting char has been found $In_single_pattern = $_[0]; $In_pattern = $TRUE; $In_quote_or_pattern = $TRUE; } # end of hard_way_set_single_pattern() sub hard_way_set_single_quote{ # quote-starting char has been found $In_single_quote = $_[0]; # may be $TRUE or $FALSE $In_quote = $TRUE; $In_quote_or_pattern = $TRUE; } # end of hard_way_set_single_quote() sub hard_way_skip_white_space { # note that this is the same a C, but uglier and slower my $space = ord( " " ); # 32 "white space" is anything from 0 to 32 while ( $Ptr < $Eos ) { # stop at EOS $Char = substr( $Line, $Ptr, 1 ); if ( ord($Char) > $space ) { last; } # stop if non-white $Ptr++; # keep going } } # end of hard_way_skip_white_space() sub hard_way_split_on_comment { # this is the main routine in the "hard_way" group hard_way_clear(); # resets all $In_xxx flags $Line = $_[0]; # the line of code $Char = ""; # the current character $Ptr = 0; # current position $Eos = length( $Line ); $Pattern_char = ""; # character that starts a regex pattern chomp $Line; my $hash_pos = hard_way_crawl(); # return is 1 plus index of comment-starting "#" # there are three cases to handle if ( $hash_pos == 0 ) # this is why it's 1 plus { return ( "", substr($Line,1) ); } # whole line is comment elsif ( $hash_pos < $Eos ) # code and comment present { return ( substr($Line, 0, $hash_pos), substr($Line, $hash_pos+1) ); } else { return ( $Line, "" ); } # line is all code } # end of hard_way_split_on_comment() sub hard_way_start_pattern { # pattern starts at the current $Ptr and $Char $Pattern_char = $Char; hard_way_set_single_pattern( $_[0] ); } # end of hard_way_start_pattern #

end of "hard_way" code

sub has_sub_calls { # are there function/subroutine calls in this line of code? my $line = $_[0]; my ( $code, my $comment ) = split_on_comment( $line ); # break into code and comment my $found = $FALSE; foreach my $sname ( @subs ) { # check against every name in the list of sub names $found = ( $code =~ /\W$sname\W/ ); if ( $found ) { last; } # break when a sub name is found } return $found; } # end of has_sub_calls sub html_filename { # given "foo/bar.pl" return "foo/bar.html" # given "foo/bar" return "foo/bar.html" my $ret = $_[0]; if ( $ret =~ /\.pl$/ ) { # arg is "xxxxx.pl" $ret =~ s/\.pl$/\.html/; # find ".pl" replace with ".html" } else { $ret .= ".html" } # arg is "xxxxx", append ".html" return $ret; } # end of html_filename sub is_readable_text_file { # check and, if needed, die with informative message my $pathname = $_[0]; my $msg_no_file = "File \'$pathname\' does not exist.\n"; my $msg_not_readable = "File \'$pathname\' is not readable.\n"; my $msg_not_text = "File \'$pathname\' is not a text file.\n"; my $good = 0; # guilty until proven innocent if (not (-e $pathname) ) { print $msg_no_file; } elsif (not (-r $pathname) ) { print $msg_not_readable; } elsif (not (-T $pathname) ) { print $msg_not_text; } else { $good = 1; } # proven innocent! return $good; } # end of is_readable_text_file sub is_sub { # is line start of a sub? "sub" possibly preceded by whitespace my $line = $_[0]; my $ret = ( $line =~ /^\s*sub\s/ ); return $ret; } # end of is_sub() sub list_subs { # prepare the list of subroutine names @subs=(); #empty list foreach my $line ( @code ) { chomp( $line ); if ( is_sub($line) ) { push( @subs, sub_name($line) ); } # if the line starts "sub", push the name of the sub onto the list } # end of foreach } # end of list_subs() sub properly_indent { # puts the appropriate number of non-breaking spaces into the front of the line my $code = $_[0]; $code =~ /(\s*?)\S/; # find leading spaces if ( $1 ) { my $prefix = $1; my $spaces = length( $prefix ); my $new_prefix = " " x $spaces; # " " into the HTML $code =~ s/$prefix/$new_prefix/; # replace spaces with "&nbsp;"s (remember, comments are html, even when that's not convenient - the source reads "&amp;nbsp;") } # end if has leading blanks return $code; } # end of properly_indent() sub split_on_comment { # comment lines (start w/hash) left on left my $line = $_[0]; my $code = $line; my $comment = ""; if ( $code !~ /^#/ ) { # if first char is #, leave comment in code area if ( $code !~ /.#/ ) # no hashes in the line? { $code = $line; } # no comments, code is whole line else { ( $code, $comment ) = hard_way_split_on_comment( $code ); } } # end of if line doesn't start with a "#" $code =~ s/&/&/g; # preserve ampersands $code =~ s/\n"; print "\n"; } # end of write_footer() sub write_html { # the mainline for handling an input file my $pathname = $_[0]; if ( not is_readable_text_file($pathname) ) { return; } open( IN, "<", $pathname ); # read the input file into @code @code = ; close( IN ); list_subs(); # create the list of sub names my $out = html_filename( $pathname ); # change "xxx.pl" to "xxx.html" open( OUT, ">", $out ); select( OUT ); # redirect "print" statements to the .html file write_header( $pathname, $out ); write_rows(); write_footer( $out ); select( STDOUT ); close( OUT ); print "\'$out\' written.\n"; } # end of write_html() sub write_header { # write the part of the .html file above the actual listing my $pathname = $_[0]; # input file pathname my $out = $_[1]; # output file pathname print "\n"; # start with a comment my ($sec, $min, $hr, $day, $mon, $yr, $wd, $doy, $dst) = localtime( time() ); $mon++; $yr += 1900; if ( $min < 10 ) { $min = "0".$min; } if ( $day < 10 ) { $day = "0".$day; } my $timestamp = "$hr:$min:$sec, $mon\/$day\/$yr"; print "\n\n"; # and another comment print "\n\n"; print "\n\n"; print " $out\n\n"; print "\n\n"; print "\n\n"; # note, you don't see much of this color, it's a surround print "

$pathname

\n"; # big title print "

Formatted and hyperlinked by pl2html, $timestamp

\n"; # subtitle print "
\n\n"; # draw a line print "\n"; # anchor for "toc" links print "\n"; #this starts the toc print " # names go down first col, then down second, etc. # order is the order the programmer uses for the subs print " $sname
\n"; # link toc name to source location $i++; $j++; if ( $i == $subs_per_col ) { # on to next
\n"; my $cols = 4; my $subs_per_col = int( ($#subs + $cols)/$cols ); my $i = 0; my $j = 0; foreach my $sname ( @subs ) { # list names in four cols. Each col is one at end of column unless ( $j == @subs ) { # don't do it if we've finished print "\n"; $i=0; } } } # end of foreach print "
\n"; # close the table } # end of write_header() sub write_row_html { # writes the html for non "sub" code lines my ( $color, $code, $comment, $i ) = @_; print "\n"; # set the color for the row print " $i \n"; # write the line number (overrides row color) if ( $code =~ /^\#/ ) { # if line starts with hash, use both columns print " $code \n"; # write the code across two columns } else { print " $code \n"; # write the code print " $comment \n"; # write the comment } print "\n\n"; } # end of write_row_html() sub write_row_plain { # write row that does not include sub calls my $line = $_[0]; # input line my $i = $_[1]; # current line number my $color = $_[2]; #color (alternates) my ($code, $comment) = split_on_comment( $line ); # divide line into code and comment $code = properly_indent( $code ); # replace " " with "&nbsp" write_row_html( $color, $code, $comment, $i ); # write the html } # end of write_row_plain() sub write_row_sub { # write a row that starts a sub my $line = $_[0]; # input line my $i = $_[1]; # current line number my $color = $_[2]; #color my $sname = sub_name( $line ); # get the name from the "sub" line my ( $code, $comment) = split_on_comment( $line ); # split to code and comment $code = properly_indent( $code ); # replace " " with "&nbsp;" print "\n"; # start a row and set the color print " "; print " toc \n"; # write a link to "toc" instead of line number print " $code "; # write the code print " \n"; print "

$comment

\n"; # write the comment print "\n\n"; } # end of write_row_sub sub write_row_sub_calls { # write a row that includes sub calls my $line = $_[0]; # input line my $i = $_[1]; # line number my $color = $_[2]; # color my ($code, $comment) = split_on_comment( $line ); # split to code and comment $code = add_anchors( $code ); # replace sub calls with hyperlinks $code = properly_indent( $code ); # replace " "s with "&nbsp;"s write_row_html( $color, $code, $comment, $i ); # write the html } # end of write_row_sub_call() sub write_rows { # writes the reformatted, hyperlinked code my $i=1; # first line is 1, most editors print "\n"; # open a table foreach my $line ( @code ) { my $color = $rowcolors[ $i%2 ]; # alternate colors chomp( $line ); # eat EOL if ( is_sub($line) ) { write_sub_end(); # end the current table (you can't get anchors within a table) write_sub_begin( $line ); # add anchor and open a new table write_row_sub( $line, $i, $color ); } # write the "sub" line elsif( has_sub_calls($line) ) { write_row_sub_calls( $line, $i, $color ); } # or write a line with sub calls else { write_row_plain( $line, $i, $color ); } # or write a line w/o sub calls $i++; } # end of foreach } # end of write_rows() sub write_sub_begin { # each sub gets an anchor and starts a new table my $line = $_[0]; my $sname = sub_name( $line ); print "\n"; # the anchor print "
\n"; # the new table } # end of write_sub_begin() sub write_sub_end { # start of each sub ends previous table print "
\n\n"; } # end of write_sub_end() sub help_requested { # look for and respond to help switch "-?", "/h", etc. my $msg_help = <<'ENDHELP'; syntax: perl perldoc source_file_path_name ENDHELP my $arg = $ARGV[0]; # help must be first arg my $help = ( $arg =~ /(^-\?$)|(^\/\?$)|(^-h$)|(^\/h$)|(^-help$)|(^\/help$)/ ); # anything reasonable if ( $help ) { print $msg_help; die "\n"; # assumption, user is not asking for help AND providing other specs } return $FALSE; } # end of help_requested() # end of /usr/bin/pl2html