| 1 |
#! /usr/bin/perl |
| 2 |
# /usr/bin/pl2html |
| 3 |
# copyright 2007, Martin Rinehart |
| 4 |
|
|
| 5 |
# This project used by the author to learn Perl. |
| 6 |
# Hereby licensed to all for non-commercial use. |
| 7 |
|
|
| 8 |
# Version 1.0 |
| 9 |
|
looks great on Konq, good on Firefox Linux, dunno MSIE |
| 10 |
|
|
| 11 |
# Version 1+ wishlist |
| 12 |
|
want to have hyperlinks back to caller(s) for each sub |
| 13 |
|
|
| 14 |
# report bugs to MartinRinehart at gmail dot com |
| 15 |
|
|
| 16 |
use strict; |
Syntax: pl2html source.pl Example: pl2html pl2html.pl (Example generates this file) This shows HTML in a comment. | |
| 17 |
use Switch; |
|
| 18 |
use warnings; |
|
| 19 |
|
|
| 20 |
my $FALSE=0; my $TRUE=1; |
readability constants |
| 21 |
|
|
| 22 |
my @code; |
list of the input file's lines |
| 23 |
my @subs; |
list of names of subroutines |
| 24 |
|
|
| 25 |
my $bgcolor = "#f8f4f0"; |
beige surround |
| 26 |
my $numcolor = "#e4e4e4"; |
grey numbers |
| 27 |
my $toccolor="#fffcf8"; |
table of contents |
| 28 |
my @rowcolors = ( "#f0f8ff", "f0fff8" ); |
alternating code line stripes |
| 29 |
|
|
| 30 |
my $num_width="5%"; my $code_width="55%"; my $comment_width="40%"; |
space for output pieces |
| 31 |
|
|
| 32 |
my $tab = 4; |
width in spaces, set to suit yourself |
| 33 |
my $tab_spaces = " " x $tab; |
|
| 34 |
|
|
| 35 |
|
following vars are used by hard_way_split_on_comment() and its callees. |
| 36 |
my ( $In_quote_or_pattern, |
|
| 37 |
$In_quote, $In_single_quote, |
|
| 38 |
$In_pattern, $In_single_pattern ); |
|
| 39 |
my ( $Line, $Char, $Ptr, $Eos, $Pattern_char, $Was_dollar ); |
|
| 40 |
|
|
| 41 |
&main; |
|
| 42 |
|
|
| toc |
sub add_anchors { |
finds subroutine calls in code and adds hyperlinks to sub |
| 57 |
|
|
| 58 |
my $code = $_[0]; |
|
| 59 |
my $anchor = ""; |
|
| 60 |
my $match = $FALSE; |
|
| 61 |
|
|
| 62 |
foreach my $sname ( @subs ) { |
|
| 63 |
if ( ($code =~ /^$sname\W/) |
"func(..." |
| 64 |
or ($code =~ /\W$sname\W/) |
...+func(..." |
| 65 |
or ($code =~ /\W$sname$/) ) { |
...+func" |
| 66 |
$anchor = "<a href=\"#$sname\">$sname</a>"; |
func |
| 67 |
$code =~ s/$sname/$anchor/; |
replace the name with the hyperlinked name |
| 68 |
} |
|
| 69 |
} |
end of foreach |
| 70 |
|
|
| 71 |
return $code; |
|
| 72 |
|
|
| 73 |
} |
end of add_anchors() |
| 74 |
|
|
| 75 |
|
to split on comment there's an easy way: |
| 76 |
|
$line =~ /(^[^#]*)#(.*$)/; |
| 77 |
|
$code = $1; |
| 78 |
|
$comment = $2; |
| 79 |
|
|
| 80 |
|
but that will split on "#" embedded in - quotes
- regex patterns
- $#list_names
|
| 81 |
|
|
| 82 |
|
to get it right, you have to do it the hard way: |
| 83 |
|
|
| toc |
sub hard_way_check_for_func { |
for finding patterns in calls to grep() and split() |
| 114 |
|
|
| 115 |
my $func = $_[0]; |
|
| 116 |
|
|
| 117 |
my $tail = substr( $Line, $Ptr ); |
|
| 118 |
my $is_func = ( $tail =~ /(^$func\s*\(\s*)\S/ ); |
"func" [opt_white] "(" [opt_white] one non-white character |
| 119 |
if ( $is_func ) { |
|
| 120 |
$Ptr += length( $1 ); |
$1 is all chars up to, but not including, start of pattern |
| 121 |
$Char = substr( $Line, $Ptr, 1 ); |
pointer points to pattern start char |
| 122 |
hard_way_start_pattern( $TRUE ); |
start single pattern |
| 123 |
} |
|
| 124 |
|
|
| 125 |
} |
end of hard_way_check_for_func() |
| 126 |
|
|
| toc |
sub hard_way_check_for_tilde { |
found "=" or "!", checks for "=~" or "!~" |
| 128 |
|
|
| 129 |
if ( ($Eos - $Ptr) < 3 ) { return; } |
returns if not enough chars for pattern |
| 130 |
|
|
| 131 |
if ( substr($Line, $Ptr+1, 1) ne "~" ) { return; } |
returns if not tilde |
| 132 |
|
|
| 133 |
$Ptr += 2; |
point to char after "=~" (or "!~") |
| 134 |
|
|
| 135 |
hard_way_skip_white_space(); |
|
| 136 |
|
|
| 137 |
if ( $Char eq "m" ) { |
|
| 138 |
hard_way_before_pattern( $TRUE ); |
=~ m/ is single pattern |
| 139 |
} |
|
| 140 |
elsif ( $Char eq "s" ) { |
|
| 141 |
hard_way_before_pattern( $FALSE ); |
=~ s/ is NOT single pattern |
| 142 |
} |
|
| 143 |
elsif ( $Char eq "t" ) { |
could be =~ tr/... or "t" could be a pattern delimiter |
| 144 |
if ( $Ptr < $Eos ) { |
|
| 145 |
my $next = substr( $Line, $Ptr+1, 1 ); |
|
| 146 |
if ( $next eq "r" ) { |
|
| 147 |
$Ptr++; |
|
| 148 |
hard_way_before_pattern( $FALSE ); |
=~ tr/ is NOT single pattern |
| 149 |
} |
|
| 150 |
else { hard_way_start_pattern( $TRUE ); } |
=~ tpatternt is single |
| 151 |
} |
|
| 152 |
} |
|
| 153 |
else { hard_way_start_pattern( $TRUE ); } |
!~ / is single pattern |
| 154 |
|
|
| 155 |
} |
end of hard_way_check_for_tilde() |
| 156 |
|
|
| toc |
sub hard_way_crawl { |
crawls down code line, one char at a time |
| 185 |
|
|
| 186 |
while ( $Ptr < $Eos ) { |
|
| 187 |
|
|
| 188 |
$Char = substr( $Line, $Ptr, 1 ); |
|
| 189 |
if ( ($Char eq "#") and |
the basic check for a comment-starting "#" |
| 190 |
(not $In_quote_or_pattern) and |
|
| 191 |
(not $Was_dollar) ) { return $Ptr; } |
returns here on finding comment-starting "#" |
| 192 |
|
|
| 193 |
if ( $In_quote_or_pattern and ($Char eq "\\") ) { $Ptr++; } |
skip escaped chars |
| 194 |
|
|
| 195 |
elsif( $In_quote ) { hard_way_check_for_close_quote(); } |
|
| 196 |
|
|
| 197 |
elsif( $In_pattern ) { hard_way_check_for_close_pattern(); } |
|
| 198 |
|
|
| 199 |
else { hard_way_check_special(); } |
check for quote start, pattern start, etc. |
| 200 |
|
|
| 201 |
$Ptr++; |
|
| 202 |
$Was_dollar = $Char eq "\$"; |
used to tell "#" comment start from "$#name" |
| 203 |
|
|
| 204 |
} |
end of while loop |
| 205 |
|
|
| 206 |
return $Ptr; |
|
| 207 |
|
|
| 208 |
} |
end of hard_way_crawl |
| 209 |
|
|
| toc |
sub hard_way_split_on_comment { |
this is the main routine in the "hard_way" group |
| 239 |
|
|
| 240 |
hard_way_clear(); |
resets all $In_xxx flags |
| 241 |
$Line = $_[0]; |
the line of code |
| 242 |
$Char = ""; |
the current character |
| 243 |
$Ptr = 0; |
current position |
| 244 |
$Eos = length( $Line ); |
|
| 245 |
|
|
| 246 |
$Pattern_char = ""; |
character that starts a regex pattern |
| 247 |
|
|
| 248 |
chomp $Line; |
|
| 249 |
|
|
| 250 |
my $hash_pos = hard_way_crawl(); |
return is 1 plus index of comment-starting "#" |
| 251 |
|
|
| 252 |
|
there are three cases to handle - $hash_pos == 0, line is all code
- 0 < $hash_pos < $Eos - split into code and comment portions
- $hash_pos == $Eos - line is all code
|
| 253 |
|
|
| 254 |
|
|
| 255 |
if ( $hash_pos == 0 ) |
this is why it's 1 plus |
| 256 |
{ return ( "", substr($Line,1) ); } |
whole line is comment |
| 257 |
elsif ( $hash_pos < $Eos ) |
code and comment present |
| 258 |
{ return ( |
|
| 259 |
substr($Line, 0, $hash_pos), |
|
| 260 |
substr($Line, $hash_pos+1) ); } |
|
| 261 |
else { return ( $Line, "" ); } |
line is all code |
| 262 |
|
|
| 263 |
} |
end of hard_way_split_on_comment() |
| 264 |
|
|
| toc |
sub is_readable_text_file { |
check and, if needed, die with informative message |
| 305 |
|
|
| 306 |
my $pathname = $_[0]; |
|
| 307 |
|
|
| 308 |
my $msg_no_file = "File \'$pathname\' does not exist.\n"; |
|
| 309 |
my $msg_not_readable = "File \'$pathname\' is not readable.\n"; |
|
| 310 |
my $msg_not_text = "File \'$pathname\' is not a text file.\n"; |
|
| 311 |
|
|
| 312 |
my $good = 0; |
guilty until proven innocent |
| 313 |
|
|
| 314 |
if (not (-e $pathname) ) { print $msg_no_file; } |
|
| 315 |
elsif (not (-r $pathname) ) { print $msg_not_readable; } |
|
| 316 |
elsif (not (-T $pathname) ) { print $msg_not_text; } |
|
| 317 |
else { $good = 1; } |
proven innocent! |
| 318 |
|
|
| 319 |
return $good; |
|
| 320 |
|
|
| 321 |
} |
end of is_readable_text_file |
| 322 |
|
|
| toc |
sub split_on_comment { |
comment lines (start w/hash) left on left |
| 366 |
|
|
| 367 |
my $line = $_[0]; |
|
| 368 |
|
|
| 369 |
my $code = $line; |
|
| 370 |
my $comment = ""; |
|
| 371 |
|
|
| 372 |
if ( $code !~ /^#/ ) { |
if first char is #, leave comment in code area |
| 373 |
|
|
| 374 |
if ( $code !~ /.#/ ) |
no hashes in the line? |
| 375 |
{ $code = $line; } |
no comments, code is whole line |
| 376 |
else { |
|
| 377 |
( $code, $comment ) = hard_way_split_on_comment( $code ); |
|
| 378 |
} |
|
| 379 |
|
|
| 380 |
} |
end of if line doesn't start with a "#" |
| 381 |
|
|
| 382 |
$code =~ s/&/&/g; |
preserve ampersands |
| 383 |
$code =~ s/</</g; |
no html tags in code |
| 384 |
$code =~ s/\t/$tab_spaces/g; |
expand tabs to spaces |
| 385 |
|
|
| 386 |
return ( $code, $comment ); |
|
| 387 |
|
|
| 388 |
} |
end of split_on_comment() |
| 389 |
|
|
| toc |
sub write_html { |
the mainline for handling an input file |
| 407 |
|
|
| 408 |
my $pathname = $_[0]; |
|
| 409 |
|
|
| 410 |
if ( not is_readable_text_file($pathname) ) { return; } |
|
| 411 |
|
|
| 412 |
open( IN, "<", $pathname ); |
read the input file into @code |
| 413 |
@code = <IN>; |
|
| 414 |
close( IN ); |
|
| 415 |
|
|
| 416 |
list_subs(); |
create the list of sub names |
| 417 |
|
|
| 418 |
my $out = html_filename( $pathname ); |
change "xxx.pl" to "xxx.html" |
| 419 |
|
|
| 420 |
open( OUT, ">", $out ); |
|
| 421 |
select( OUT ); |
redirect "print" statements to the .html file |
| 422 |
|
|
| 423 |
write_header( $pathname, $out ); |
|
| 424 |
write_rows(); |
|
| 425 |
write_footer( $out ); |
|
| 426 |
|
|
| 427 |
select( STDOUT ); |
|
| 428 |
close( OUT ); |
|
| 429 |
|
|
| 430 |
print "\'$out\' written.\n"; |
|
| 431 |
|
|
| 432 |
} |
end of write_html() |
| 433 |
|
|
| toc |
sub write_header { |
write the part of the .html file above the actual listing |
| 435 |
|
|
| 436 |
my $pathname = $_[0]; |
input file pathname |
| 437 |
my $out = $_[1]; |
output file pathname |
| 438 |
|
|
| 439 |
print "<! $out, documenting $pathname >\n"; |
start with a comment |
| 440 |
my ($sec, $min, $hr, $day, $mon, $yr, $wd, $doy, $dst) |
|
| 441 |
= localtime( time() ); |
|
| 442 |
$mon++; |
|
| 443 |
$yr += 1900; |
|
| 444 |
|
|
| 445 |
if ( $min < 10 ) { $min = "0".$min; } |
|
| 446 |
if ( $day < 10 ) { $day = "0".$day; } |
|
| 447 |
|
|
| 448 |
my $timestamp = "$hr:$min:$sec, $mon\/$day\/$yr"; |
|
| 449 |
|
|
| 450 |
print "<! written by pl2html, $timestamp >\n\n"; |
and another comment |
| 451 |
print "<html>\n\n"; |
|
| 452 |
print "<head>\n\n"; |
|
| 453 |
print " <title>$out</title>\n\n"; |
|
| 454 |
print "</head>\n\n"; |
|
| 455 |
|
|
| 456 |
print "<body bgcolor=$bgcolor>\n\n"; |
note, you don't see much of this color, it's a surround |
| 457 |
|
|
| 458 |
print "<h1 align=\"center\">$pathname</h1>\n"; |
big title |
| 459 |
print "<h4 align=\"right\">Formatted and hyperlinked by pl2html, $timestamp</h4>\n"; |
subtitle |
| 460 |
print "<hr align=\"center\" width=90%>\n\n"; |
draw a line |
| 461 |
print "<a name=\"toc\"></a>\n"; |
anchor for "toc" links |
| 462 |
|
|
| 463 |
print "<table align=\"center\" bgcolor=$toccolor border=1 cellpadding=10>\n"; |
this starts the toc |
| 464 |
print "<tr><td>\n"; |
|
| 465 |
|
|
| 466 |
my $cols = 4; |
|
| 467 |
my $subs_per_col = int( ($#subs + $cols)/$cols ); |
|
| 468 |
my $i = 0; |
|
| 469 |
my $j = 0; |
|
| 470 |
|
|
| 471 |
foreach my $sname ( @subs ) { |
list names in four cols. Each col is one | |
| 472 |
|
|
| 473 |
|
names go down first col, then down second, etc. |
| 474 |
|
order is the order the programmer uses for the subs |
| 475 |
|
|
| 476 |
print "<a href=\"#$sname\"> $sname </a> <br>\n"; |
link toc name to source location |
| 477 |
$i++; $j++; |
|
| 478 |
|
|
| 479 |
if ( $i == $subs_per_col ) { |
on to next | at end of column |
| 480 |
unless ( $j == @subs ) { |
don't do it if we've finished |
| 481 |
print "</td><td>\n"; |
|
| 482 |
$i=0; |
|
| 483 |
} |
|
| 484 |
} |
|
| 485 |
} |
end of foreach |
| 486 |
|
|
| 487 |
print "</td></tr></table>\n"; |
close the table |
| 488 |
|
|
| 489 |
} |
end of write_header() |
| 490 |
|
|
| toc |
sub write_row_html { |
writes the html for non "sub" code lines |
| 492 |
|
|
| 493 |
my ( $color, $code, $comment, $i ) = @_; |
|
| 494 |
|
|
| 495 |
print "<tr bgcolor=$color>\n"; |
set the color for the row |
| 496 |
print " <td align=\"right\" bgcolor=$numcolor width=$num_width> $i </td>\n"; |
write the line number (overrides row color) |
| 497 |
|
|
| 498 |
if ( $code =~ /^\#/ ) { |
if line starts with hash, use both columns |
| 499 |
print " <td colspan=2> <font face=\"FreeMono,Monospace,Courier,Courier New\"> $code </font> </td>\n"; |
write the code across two columns |
| 500 |
} |
|
| 501 |
else { |
|
| 502 |
print " <td width=$code_width> <font face=\"FreeMono,Monospace,Courier,Courier New\"> $code </font> </td>\n"; |
write the code |
| 503 |
print " <td width=$comment_width> $comment </td>\n"; |
write the comment |
| 504 |
} |
|
| 505 |
print "</tr>\n\n"; |
|
| 506 |
|
|
| 507 |
} |
end of write_row_html() |
| 508 |
|
|
| toc |
sub write_row_sub { |
write a row that starts a sub |
| 523 |
|
|
| 524 |
my $line = $_[0]; |
input line |
| 525 |
my $i = $_[1]; |
current line number |
| 526 |
my $color = $_[2]; |
color |
| 527 |
|
|
| 528 |
my $sname = sub_name( $line ); |
get the name from the "sub" line |
| 529 |
my ( $code, $comment) = split_on_comment( $line ); |
split to code and comment |
| 530 |
$code = properly_indent( $code ); |
replace " " with " " |
| 531 |
|
|
| 532 |
print "<tr bgcolor=$color>\n"; |
start a row and set the color |
| 533 |
print " <td align=\"right\" bgcolor=$numcolor width=$num_width> "; |
|
| 534 |
print " <a href=\"#toc\"> toc </a> </td>\n"; |
write a link to "toc" instead of line number |
| 535 |
print " <td width=$code_width> <font face=\"FreeMono,Monospace,Courier,Courier New\" size=+1><b> $code "; |
write the code |
| 536 |
print "</b></font> </td>\n"; |
|
| 537 |
print " <td width=$comment_width> <h3> $comment </h3> </td>\n"; |
write the comment |
| 538 |
print "</tr>\n\n"; |
|
| 539 |
|
|
| 540 |
} |
end of write_row_sub |
| 541 |
|
|
| toc |
sub write_row_sub_calls { |
write a row that includes sub calls |
| 543 |
|
|
| 544 |
my $line = $_[0]; |
input line |
| 545 |
my $i = $_[1]; |
line number |
| 546 |
my $color = $_[2]; |
color |
| 547 |
|
|
| 548 |
my ($code, $comment) = split_on_comment( $line ); |
split to code and comment |
| 549 |
$code = add_anchors( $code ); |
replace sub calls with hyperlinks |
| 550 |
$code = properly_indent( $code ); |
replace " "s with " "s |
| 551 |
|
|
| 552 |
write_row_html( $color, $code, $comment, $i ); |
write the html |
| 553 |
|
|
| 554 |
} |
end of write_row_sub_call() |
| 555 |
|
|
| toc |
sub write_rows { |
writes the reformatted, hyperlinked code |
| 557 |
|
|
| 558 |
my $i=1; |
first line is 1, most editors |
| 559 |
|
|
| 560 |
print "<table align=\"center\" border=0\n"; |
|
| 561 |
print " cellpadding=1 cellspacing=0 width=95%>\n"; |
open a table |
| 562 |
|
|
| 563 |
foreach my $line ( @code ) { |
|
| 564 |
|
|
| 565 |
my $color = $rowcolors[ $i%2 ]; |
alternate colors |
| 566 |
|
|
| 567 |
chomp( $line ); |
eat EOL |
| 568 |
|
|
| 569 |
if ( is_sub($line) ) { |
|
| 570 |
write_sub_end(); |
end the current table (you can't get anchors within a table) |
| 571 |
write_sub_begin( $line ); |
add anchor and open a new table |
| 572 |
write_row_sub( $line, $i, $color ); } |
write the "sub" line |
| 573 |
elsif( has_sub_calls($line) ) { write_row_sub_calls( $line, $i, $color ); } |
or write a line with sub calls |
| 574 |
else { write_row_plain( $line, $i, $color ); } |
or write a line w/o sub calls |
| 575 |
|
|
| 576 |
$i++; |
|
| 577 |
|
|
| 578 |
} |
end of foreach |
| 579 |
|
|
| 580 |
} |
end of write_rows() |
| 581 |
|
|