#! /usr/bin/perl
# /usr/bin/pl2html
# copyright 2007, Martin Rinehart
# This project used by the author to learn Perl.
# Hereby licensed to all for non-commercial use.
# Version 1.0
#looks great on Konq, good on Firefox Linux, dunno MSIE
# Version 1+ wishlist
#want to have hyperlinks back to caller(s) for each sub
# report bugs to MartinRinehart at gmail dot com
use strict; #
Syntax: pl2html source.pl Example: pl2html pl2html.pl (Example generates this file) This shows HTML in a comment. |
use Switch;
use warnings;
my $FALSE=0; my $TRUE=1; # readability constants
my @code; # list of the input file's lines
my @subs; # list of names of subroutines
my $bgcolor = "#f8f4f0"; # beige surround
my $numcolor = "#e4e4e4"; # grey numbers
my $toccolor="#fffcf8"; # table of contents
my @rowcolors = ( "#f0f8ff", "f0fff8" ); # alternating code line stripes
my $num_width="5%"; my $code_width="55%"; my $comment_width="40%"; # space for output pieces
my $tab = 4; # width in spaces, set to suit yourself
my $tab_spaces = " " x $tab;
# following vars are used by hard_way_split_on_comment() and its callees.
my ( $In_quote_or_pattern,
$In_quote, $In_single_quote,
$In_pattern, $In_single_pattern );
my ( $Line, $Char, $Ptr, $Eos, $Pattern_char, $Was_dollar );
&main;
sub main {
&help_requested(); # dies after displaying help
if ( $#ARGV == -1 ) { # if no args, display syntax and exit
push( @ARGV, "-?" );
&help_requested;
}
foreach my $file ( @ARGV ) { write_html( $file ); } # this will work for l*.pl or an explicit list
} # end of main()
sub add_anchors { # finds subroutine calls in code and adds hyperlinks to sub
my $code = $_[0];
my $anchor = "";
my $match = $FALSE;
foreach my $sname ( @subs ) {
if ( ($code =~ /^$sname\W/) # "func(..."
or ($code =~ /\W$sname\W/) # ...+func(..."
or ($code =~ /\W$sname$/) ) { # ...+func"
$anchor = "$sname"; # func
$code =~ s/$sname/$anchor/; # replace the name with the hyperlinked name
}
} # end of foreach
return $code;
} # end of add_anchors()
# to split on comment there's an easy way:
# $line =~ /(^[^#]*)#(.*$)/;
# $code = $1;
# $comment = $2;
#
# but that will split on "#" embedded in - quotes
- regex patterns
- $#list_names
#
# to get it right, you have to do it the hard way:
sub hard_way_before_pattern { # pointer is at "m" in m/pattern/, at "s" in s/srch/rep/, etc.
if ( $Ptr < $Eos ) {
$Ptr++;
$Char = substr( $Line, $Ptr, 1 );
}
hard_way_start_pattern( $_[0] ); # note: this is harmless at EOS
} # end of hard_way_before_pattern
sub hard_way_check_for_close_pattern { # clear all flags on close pattern
if ( $Char eq $Pattern_char ) {
if ( $In_single_pattern ) { hard_way_clear(); } # clear flags
else { $In_single_pattern = $TRUE; } # at end of first pattern, process the next pattern
} # end if
} # end of hard_way_check_for_close_pattern
sub hard_way_check_for_close_quote { # clear all flags on close quote
# note: don't clear if a single quote is inside a double or vice-versa
if ( $In_single_quote and ($Char eq "'") ) { hard_way_clear(); }
elsif ( (not $In_single_quote) and $Char eq '"' ) { hard_way_clear(); }
} # end of hard_way_check_for_close_quote
sub hard_way_check_for_func { # for finding patterns in calls to grep() and split()
my $func = $_[0];
my $tail = substr( $Line, $Ptr );
my $is_func = ( $tail =~ /(^$func\s*\(\s*)\S/ ); # "func" [opt_white] "(" [opt_white] one non-white character
if ( $is_func ) {
$Ptr += length( $1 ); # $1 is all chars up to, but not including, start of pattern
$Char = substr( $Line, $Ptr, 1 ); # pointer points to pattern start char
hard_way_start_pattern( $TRUE ); # start single pattern
}
} # end of hard_way_check_for_func()
sub hard_way_check_for_tilde { # found "=" or "!", checks for "=~" or "!~"
if ( ($Eos - $Ptr) < 3 ) { return; } # returns if not enough chars for pattern
if ( substr($Line, $Ptr+1, 1) ne "~" ) { return; } # returns if not tilde
$Ptr += 2; # point to char after "=~" (or "!~")
hard_way_skip_white_space();
if ( $Char eq "m" ) {
hard_way_before_pattern( $TRUE ); # =~ m/ is single pattern
}
elsif ( $Char eq "s" ) {
hard_way_before_pattern( $FALSE ); # =~ s/ is NOT single pattern
}
elsif ( $Char eq "t" ) { # could be =~ tr/... or "t" could be a pattern delimiter
if ( $Ptr < $Eos ) {
my $next = substr( $Line, $Ptr+1, 1 );
if ( $next eq "r" ) {
$Ptr++;
hard_way_before_pattern( $FALSE ); # =~ tr/ is NOT single pattern
}
else { hard_way_start_pattern( $TRUE ); }# =~ tpatternt is single
}
}
else { hard_way_start_pattern( $TRUE ); } # !~ / is single pattern
} # end of hard_way_check_for_tilde()
sub hard_way_check_special { # look for open quotes, start of patterns - these checks are made when outside any pattern or quote
switch ( $Char ) {
case ( "'" ) { hard_way_set_single_quote( $TRUE ); } # quote starting with single
case ( '"' ) { hard_way_set_single_quote( $FALSE ); } # quote starting with double
case ( "=" ) { hard_way_check_for_tilde(); } # binding? "=~"
case ( "!" ) { hard_way_check_for_tilde(); } # not binding? "!~"
case ( "g" ) { hard_way_check_for_func( "grep" ) ; } # calling grep( /pattern/ ...
case ( "s" ) { hard_way_check_for_func( "split" ); } # calling split( /pattern/ ...
} # end of switch
} # end of sub hard_way_check_special
sub hard_way_clear { # called to initialize and when leaving a pattern or quote
$In_quote_or_pattern = $FALSE;
$In_quote = $FALSE;
$In_single_quote = $FALSE;
$In_pattern = $FALSE;
$In_single_pattern = $FALSE;
$Pattern_char = "";
$Was_dollar = $FALSE;
} # end of hard_way_clear
sub hard_way_crawl { # crawls down code line, one char at a time
while ( $Ptr < $Eos ) {
$Char = substr( $Line, $Ptr, 1 );
if ( ($Char eq "#") and # the basic check for a comment-starting "#"
(not $In_quote_or_pattern) and
(not $Was_dollar) ) { return $Ptr; } # returns here on finding comment-starting "#"
if ( $In_quote_or_pattern and ($Char eq "\\") ) { $Ptr++; } # skip escaped chars
elsif( $In_quote ) { hard_way_check_for_close_quote(); }
elsif( $In_pattern ) { hard_way_check_for_close_pattern(); }
else { hard_way_check_special(); } # check for quote start, pattern start, etc.
$Ptr++;
$Was_dollar = $Char eq "\$"; # used to tell "#" comment start from "$#name"
} # end of while loop
return $Ptr;
} # end of hard_way_crawl
sub hard_way_set_single_pattern { # pattern-starting char has been found
$In_single_pattern = $_[0];
$In_pattern = $TRUE;
$In_quote_or_pattern = $TRUE;
} # end of hard_way_set_single_pattern()
sub hard_way_set_single_quote{ # quote-starting char has been found
$In_single_quote = $_[0]; # may be $TRUE or $FALSE
$In_quote = $TRUE;
$In_quote_or_pattern = $TRUE;
} # end of hard_way_set_single_quote()
sub hard_way_skip_white_space { # note that this is the same a C, but uglier and slower
my $space = ord( " " ); # 32 "white space" is anything from 0 to 32
while ( $Ptr < $Eos ) { # stop at EOS
$Char = substr( $Line, $Ptr, 1 );
if ( ord($Char) > $space ) { last; } # stop if non-white
$Ptr++; # keep going
}
} # end of hard_way_skip_white_space()
sub hard_way_split_on_comment { # this is the main routine in the "hard_way" group
hard_way_clear(); # resets all $In_xxx flags
$Line = $_[0]; # the line of code
$Char = ""; # the current character
$Ptr = 0; # current position
$Eos = length( $Line );
$Pattern_char = ""; # character that starts a regex pattern
chomp $Line;
my $hash_pos = hard_way_crawl(); # return is 1 plus index of comment-starting "#"
# there are three cases to handle - $hash_pos == 0, line is all code
- 0 < $hash_pos < $Eos - split into code and comment portions
- $hash_pos == $Eos - line is all code
if ( $hash_pos == 0 ) # this is why it's 1 plus
{ return ( "", substr($Line,1) ); } # whole line is comment
elsif ( $hash_pos < $Eos ) # code and comment present
{ return (
substr($Line, 0, $hash_pos),
substr($Line, $hash_pos+1) ); }
else { return ( $Line, "" ); } # line is all code
} # end of hard_way_split_on_comment()
sub hard_way_start_pattern { # pattern starts at the current $Ptr and $Char
$Pattern_char = $Char;
hard_way_set_single_pattern( $_[0] );
} # end of hard_way_start_pattern
# end of "hard_way" code
sub has_sub_calls { # are there function/subroutine calls in this line of code?
my $line = $_[0];
my ( $code, my $comment ) = split_on_comment( $line ); # break into code and comment
my $found = $FALSE;
foreach my $sname ( @subs ) { # check against every name in the list of sub names
$found = ( $code =~ /\W$sname\W/ );
if ( $found ) { last; } # break when a sub name is found
}
return $found;
} # end of has_sub_calls
sub html_filename { # given "foo/bar.pl" return "foo/bar.html"
# given "foo/bar" return "foo/bar.html"
my $ret = $_[0];
if ( $ret =~ /\.pl$/ ) { # arg is "xxxxx.pl"
$ret =~ s/\.pl$/\.html/; # find ".pl" replace with ".html"
}
else { $ret .= ".html" } # arg is "xxxxx", append ".html"
return $ret;
} # end of html_filename
sub is_readable_text_file { # check and, if needed, die with informative message
my $pathname = $_[0];
my $msg_no_file = "File \'$pathname\' does not exist.\n";
my $msg_not_readable = "File \'$pathname\' is not readable.\n";
my $msg_not_text = "File \'$pathname\' is not a text file.\n";
my $good = 0; # guilty until proven innocent
if (not (-e $pathname) ) { print $msg_no_file; }
elsif (not (-r $pathname) ) { print $msg_not_readable; }
elsif (not (-T $pathname) ) { print $msg_not_text; }
else { $good = 1; } # proven innocent!
return $good;
} # end of is_readable_text_file
sub is_sub { # is line start of a sub? "sub" possibly preceded by whitespace
my $line = $_[0];
my $ret = ( $line =~ /^\s*sub\s/ );
return $ret;
} # end of is_sub()
sub list_subs { # prepare the list of subroutine names
@subs=(); #empty list
foreach my $line ( @code ) {
chomp( $line );
if ( is_sub($line) ) { push( @subs, sub_name($line) ); } # if the line starts "sub", push the name of the sub onto the list
} # end of foreach
} # end of list_subs()
sub properly_indent { # puts the appropriate number of non-breaking spaces into the front of the line
my $code = $_[0];
$code =~ /(\s*?)\S/; # find leading spaces
if ( $1 ) {
my $prefix = $1;
my $spaces = length( $prefix );
my $new_prefix = " " x $spaces; # " " into the HTML
$code =~ s/$prefix/$new_prefix/; # replace spaces with " "s (remember, comments are html, even when that's not convenient - the source reads "&nbsp;")
} # end if has leading blanks
return $code;
} # end of properly_indent()
sub split_on_comment { # comment lines (start w/hash) left on left
my $line = $_[0];
my $code = $line;
my $comment = "";
if ( $code !~ /^#/ ) { # if first char is #, leave comment in code area
if ( $code !~ /.#/ ) # no hashes in the line?
{ $code = $line; } # no comments, code is whole line
else {
( $code, $comment ) = hard_way_split_on_comment( $code );
}
} # end of if line doesn't start with a "#"
$code =~ s/&/&/g; # preserve ampersands
$code =~ s/</g; # no html tags in code
$code =~ s/\t/$tab_spaces/g; # expand tabs to spaces
return ( $code, $comment );
} # end of split_on_comment()
sub sub_name { # pick the sub name from the line that starts the sub
my $line = $_[0];
$line =~ /sub\s+(\w+)/; # after the "sub" and optional whitespace, a string of word chars
return $1;
} # end of sub_name
sub write_footer { # the end of the .html file
print "