/usr/bin/pl2html

Formatted and hyperlinked by pl2html, 6:23:40, 3/02/2007


main
add_anchors
hard_way_before_pattern
hard_way_check_for_close_pattern
hard_way_check_for_close_quote
hard_way_check_for_func
hard_way_check_for_tilde
hard_way_check_special
hard_way_clear
hard_way_crawl
hard_way_set_single_pattern
hard_way_set_single_quote
hard_way_skip_white_space
hard_way_split_on_comment
hard_way_start_pattern
has_sub_calls
html_filename
is_readable_text_file
is_sub
list_subs
properly_indent
split_on_comment
sub_name
write_footer
write_html
write_header
write_row_html
write_row_plain
write_row_sub
write_row_sub_calls
write_rows
write_sub_begin
write_sub_end
help_requested
1 #! /usr/bin/perl
2 # /usr/bin/pl2html
3 # copyright 2007, Martin Rinehart
4
5 # This project used by the author to learn Perl.
6 # Hereby licensed to all for non-commercial use.
7
8 # Version 1.0
9 looks great on Konq, good on Firefox Linux, dunno MSIE
10
11 # Version 1+ wishlist
12 want to have hyperlinks back to caller(s) for each sub
13
14 # report bugs to MartinRinehart at gmail dot com
15
16 use strict;
Syntax: pl2html source.pl
Example: pl2html pl2html.pl
(Example generates this file)
This shows HTML in a comment.
17 use Switch;
18 use warnings;
19
20 my $FALSE=0; my $TRUE=1; readability constants
21
22 my @code; list of the input file's lines
23 my @subs; list of names of subroutines
24
25 my $bgcolor = "#f8f4f0"; beige surround
26 my $numcolor = "#e4e4e4"; grey numbers
27 my $toccolor="#fffcf8"; table of contents
28 my @rowcolors = ( "#f0f8ff", "f0fff8" ); alternating code line stripes
29
30 my $num_width="5%"; my $code_width="55%"; my $comment_width="40%"; space for output pieces
31
32 my $tab = 4; width in spaces, set to suit yourself
33 my $tab_spaces = " " x $tab;
34
35 following vars are used by hard_way_split_on_comment() and its callees.
36     my ( $In_quote_or_pattern,
37             $In_quote, $In_single_quote,
38             $In_pattern, $In_single_pattern );
39     my ( $Line, $Char, $Ptr, $Eos, $Pattern_char, $Was_dollar );
40
41 &main;
42
toc sub main {

44
45 &help_requested(); dies after displaying help
46
47 if ( $#ARGV == -1 ) { if no args, display syntax and exit
48   push( @ARGV, "-?" );
49   &help_requested;
50 }
51
52 foreach my $file ( @ARGV ) { write_html( $file ); } this will work for l*.pl or an explicit list
53
54 } end of main()
55
toc sub add_anchors {

finds subroutine calls in code and adds hyperlinks to sub

57
58     my $code = $_[0];
59     my $anchor = "";
60     my $match = $FALSE;
61
62     foreach my $sname ( @subs ) {
63         if ( ($code =~ /^$sname\W/) "func(..."
64             or ($code =~ /\W$sname\W/) ...+func(..."
65             or ($code =~ /\W$sname$/) ) { ...+func"
66             $anchor = "<a href=\"#$sname\">$sname</a>"; func
67             $code =~ s/$sname/$anchor/; replace the name with the hyperlinked name
68         }
69     } end of foreach
70
71     return $code;
72
73 } end of add_anchors()
74
75 to split on comment there's an easy way:
76
   $line =~ /(^[^#]*)#(.*$)/;
77
   $code = $1;
78
   $comment = $2;
79
80 but that will split on "#" embedded in
  • quotes
  • regex patterns
  • $#list_names
81
82 to get it right, you have to do it the hard way:
83
toc sub hard_way_before_pattern {

pointer is at "m" in m/pattern/, at "s" in s/srch/rep/, etc.

85
86     if ( $Ptr < $Eos ) {
87         $Ptr++;
88         $Char = substr( $Line, $Ptr, 1 );
89     }
90     hard_way_start_pattern( $_[0] ); note: this is harmless at EOS
91
92 } end of hard_way_before_pattern
93
toc sub hard_way_check_for_close_pattern {

clear all flags on close pattern

95
96     if ( $Char eq $Pattern_char ) {
97
98         if ( $In_single_pattern ) { hard_way_clear(); } clear flags
99         else { $In_single_pattern = $TRUE; } at end of first pattern, process the next pattern
100
101     } end if
102
103 } end of hard_way_check_for_close_pattern
104
toc sub hard_way_check_for_close_quote {

clear all flags on close quote

106 note: don't clear if a single quote is inside a double or vice-versa
107
108     if ( $In_single_quote and ($Char eq "'") ) { hard_way_clear(); }
109     elsif ( (not $In_single_quote) and $Char eq '"' ) { hard_way_clear(); }
110
111 } end of hard_way_check_for_close_quote
112
toc sub hard_way_check_for_func {

for finding patterns in calls to grep() and split()

114
115     my $func = $_[0];
116
117     my $tail = substr( $Line, $Ptr );
118     my $is_func = ( $tail =~ /(^$func\s*\(\s*)\S/ ); "func" [opt_white] "(" [opt_white] one non-white character
119     if ( $is_func ) {
120         $Ptr += length( $1 ); $1 is all chars up to, but not including, start of pattern
121         $Char = substr( $Line, $Ptr, 1 ); pointer points to pattern start char
122         hard_way_start_pattern( $TRUE ); start single pattern
123     }
124
125 } end of hard_way_check_for_func()
126
toc sub hard_way_check_for_tilde {

found "=" or "!", checks for "=~" or "!~"

128
129     if ( ($Eos - $Ptr) < 3 ) { return; }

returns if not enough chars for pattern

130
131     if ( substr($Line, $Ptr+1, 1) ne "~" ) { return; }

returns if not tilde

132
133     $Ptr += 2; point to char after "=~" (or "!~")
134
135     hard_way_skip_white_space();
136
137     if ( $Char eq "m" ) {
138         hard_way_before_pattern( $TRUE ); =~ m/ is single pattern
139     }
140     elsif ( $Char eq "s" ) {
141         hard_way_before_pattern( $FALSE ); =~ s/ is NOT single pattern
142     }
143     elsif ( $Char eq "t" ) { could be =~ tr/... or "t" could be a pattern delimiter
144         if ( $Ptr < $Eos ) {
145             my $next = substr( $Line, $Ptr+1, 1 );
146             if ( $next eq "r" ) {
147                 $Ptr++;
148                 hard_way_before_pattern( $FALSE ); =~ tr/ is NOT single pattern
149             }
150             else { hard_way_start_pattern( $TRUE ); } =~ tpatternt is single
151         }
152     }
153     else { hard_way_start_pattern( $TRUE ); } !~ / is single pattern
154
155 } end of hard_way_check_for_tilde()
156
toc sub hard_way_check_special {

look for open quotes, start of patterns - these checks are made when outside any pattern or quote

158
159     switch ( $Char ) {
160
161         case ( "'" ) { hard_way_set_single_quote( $TRUE ); } quote starting with single
162         case ( '"' ) { hard_way_set_single_quote( $FALSE ); } quote starting with double
163         case ( "=" ) { hard_way_check_for_tilde(); } binding? "=~"
164         case ( "!" ) { hard_way_check_for_tilde(); } not binding? "!~"
165         case ( "g" ) { hard_way_check_for_func( "grep" ) ; } calling grep( /pattern/ ...
166         case ( "s" ) { hard_way_check_for_func( "split" ); } calling split( /pattern/ ...
167
168     } end of switch
169
170 } end of sub hard_way_check_special
171
toc sub hard_way_clear {

called to initialize and when leaving a pattern or quote

173
174     $In_quote_or_pattern = $FALSE;
175     $In_quote = $FALSE;
176     $In_single_quote = $FALSE;
177     $In_pattern = $FALSE;
178     $In_single_pattern = $FALSE;
179     $Pattern_char = "";
180     $Was_dollar = $FALSE;
181
182 } end of hard_way_clear
183
toc sub hard_way_crawl {

crawls down code line, one char at a time

185
186     while ( $Ptr < $Eos ) {
187
188         $Char = substr( $Line, $Ptr, 1 );
189         if ( ($Char eq "#") and the basic check for a comment-starting "#"
190              (not $In_quote_or_pattern) and
191              (not $Was_dollar) ) { return $Ptr; }

returns here on finding comment-starting "#"

192
193         if ( $In_quote_or_pattern and ($Char eq "\\") ) { $Ptr++; } skip escaped chars
194
195         elsif( $In_quote ) { hard_way_check_for_close_quote(); }
196
197         elsif( $In_pattern ) { hard_way_check_for_close_pattern(); }
198
199         else { hard_way_check_special(); } check for quote start, pattern start, etc.
200
201         $Ptr++;
202         $Was_dollar = $Char eq "\$"; used to tell "#" comment start from "$#name"
203
204     } end of while loop
205
206     return $Ptr;
207
208 } end of hard_way_crawl
209
toc sub hard_way_set_single_pattern {

pattern-starting char has been found

211
212     $In_single_pattern = $_[0];
213     $In_pattern = $TRUE;
214     $In_quote_or_pattern = $TRUE;
215
216 } end of hard_way_set_single_pattern()
217
toc sub hard_way_set_single_quote{

quote-starting char has been found

219
220     $In_single_quote = $_[0]; may be $TRUE or $FALSE
221     $In_quote = $TRUE;
222     $In_quote_or_pattern = $TRUE;
223
224 } end of hard_way_set_single_quote()
225
toc sub hard_way_skip_white_space {

note that this is the same a C, but uglier and slower

227
228     my $space = ord( " " ); 32 "white space" is anything from 0 to 32
229
230     while ( $Ptr < $Eos ) { stop at EOS
231         $Char = substr( $Line, $Ptr, 1 );
232         if ( ord($Char) > $space ) { last; } stop if non-white
233         $Ptr++; keep going
234     }
235
236 } end of hard_way_skip_white_space()
237
toc sub hard_way_split_on_comment {

this is the main routine in the "hard_way" group

239
240     hard_way_clear(); resets all $In_xxx flags
241     $Line = $_[0]; the line of code
242     $Char = ""; the current character
243     $Ptr = 0; current position
244     $Eos = length( $Line );
245
246     $Pattern_char = ""; character that starts a regex pattern
247
248     chomp $Line;
249
250     my $hash_pos = hard_way_crawl(); return is 1 plus index of comment-starting "#"
251
252 there are three cases to handle
  • $hash_pos == 0, line is all code
  • 0 < $hash_pos < $Eos - split into code and comment portions
  • $hash_pos == $Eos - line is all code
253
254
255     if ( $hash_pos == 0 ) this is why it's 1 plus
256         { return ( "", substr($Line,1) ); } whole line is comment
257     elsif ( $hash_pos < $Eos ) code and comment present
258         { return (
259             substr($Line, 0, $hash_pos),
260             substr($Line, $hash_pos+1) ); }
261     else { return ( $Line, "" ); } line is all code
262
263 } end of hard_way_split_on_comment()
264
toc sub hard_way_start_pattern {

pattern starts at the current $Ptr and $Char

266
267     $Pattern_char = $Char;
268     hard_way_set_single_pattern( $_[0] );
269
270 } end of hard_way_start_pattern
271
272

end of "hard_way" code

273
toc sub has_sub_calls {

are there function/subroutine calls in this line of code?

275
276     my $line = $_[0];
277
278     my ( $code, my $comment ) = split_on_comment( $line ); break into code and comment
279
280     my $found = $FALSE;
281
282     foreach my $sname ( @subs ) { check against every name in the list of sub names
283         $found = ( $code =~ /\W$sname\W/ );
284         if ( $found ) { last; } break when a sub name is found
285     }
286
287     return $found;
288
289 } end of has_sub_calls
290
toc sub html_filename {

given "foo/bar.pl" return "foo/bar.html"

292 given "foo/bar" return "foo/bar.html"
293
294     my $ret = $_[0];
295
296     if ( $ret =~ /\.pl$/ ) { arg is "xxxxx.pl"
297         $ret =~ s/\.pl$/\.html/; find ".pl" replace with ".html"
298     }
299     else { $ret .= ".html" } arg is "xxxxx", append ".html"
300     return $ret;
301
302 } end of html_filename
303
toc sub is_readable_text_file {

check and, if needed, die with informative message

305
306     my $pathname = $_[0];
307
308     my $msg_no_file = "File \'$pathname\' does not exist.\n";
309     my $msg_not_readable = "File \'$pathname\' is not readable.\n";
310     my $msg_not_text = "File \'$pathname\' is not a text file.\n";
311
312     my $good = 0; guilty until proven innocent
313
314     if (not (-e $pathname) ) { print $msg_no_file; }
315     elsif (not (-r $pathname) ) { print $msg_not_readable; }
316     elsif (not (-T $pathname) ) { print $msg_not_text; }
317     else { $good = 1; } proven innocent!
318
319     return $good;
320
321 } end of is_readable_text_file
322
toc sub is_sub {

is line start of a sub? "sub" possibly preceded by whitespace

324
325     my $line = $_[0];
326
327     my $ret = ( $line =~ /^\s*sub\s/ );
328
329     return $ret;
330
331 } end of is_sub()
332
toc sub list_subs {

prepare the list of subroutine names

334
335     @subs=(); empty list
336
337     foreach my $line ( @code ) {
338
339         chomp( $line );
340         if ( is_sub($line) ) { push( @subs, sub_name($line) ); } if the line starts "sub", push the name of the sub onto the list
341
342     } end of foreach
343
344 } end of list_subs()
345
toc sub properly_indent {

puts the appropriate number of non-breaking spaces into the front of the line

347
348     my $code = $_[0];
349     $code =~ /(\s*?)\S/; find leading spaces
350
351     if ( $1 ) {
352
353         my $prefix = $1;
354         my $spaces = length( $prefix );
355         my $new_prefix = "&nbsp;" x $spaces; " " into the HTML
356
357         $code =~ s/$prefix/$new_prefix/; replace spaces with "&nbsp;"s (remember, comments are html, even when that's not convenient - the source reads "&amp;nbsp;")
358
359     } end if has leading blanks
360
361     return $code;
362
363 } end of properly_indent()
364
toc sub split_on_comment {

comment lines (start w/hash) left on left

366
367     my $line = $_[0];
368
369     my $code = $line;
370     my $comment = "";
371
372     if ( $code !~ /^#/ ) { if first char is #, leave comment in code area
373
374         if ( $code !~ /.#/ ) no hashes in the line?
375             { $code = $line; } no comments, code is whole line
376         else {
377             ( $code, $comment ) = hard_way_split_on_comment( $code );
378         }
379
380     } end of if line doesn't start with a "#"
381
382     $code =~ s/&/&amp;/g; preserve ampersands
383     $code =~ s/</&lt;/g; no html tags in code
384     $code =~ s/\t/$tab_spaces/g; expand tabs to spaces
385
386     return ( $code, $comment );
387
388 } end of split_on_comment()
389
toc sub sub_name {

pick the sub name from the line that starts the sub

391
392     my $line = $_[0];
393     $line =~ /sub\s+(\w+)/; after the "sub" and optional whitespace, a string of word chars
394
395     return $1;
396
397 } end of sub_name
398
toc sub write_footer {

the end of the .html file

400
401     print "</body></html>\n";
402     print "<! end of $_[0]>\n";
403
404 } end of write_footer()
405
toc sub write_html {

the mainline for handling an input file

407
408     my $pathname = $_[0];
409
410     if ( not is_readable_text_file($pathname) ) { return; }
411
412     open( IN, "<", $pathname ); read the input file into @code
413     @code = <IN>;
414     close( IN );
415
416     list_subs(); create the list of sub names
417
418     my $out = html_filename( $pathname ); change "xxx.pl" to "xxx.html"
419
420     open( OUT, ">", $out );
421     select( OUT ); redirect "print" statements to the .html file
422
423     write_header( $pathname, $out );
424     write_rows();
425     write_footer( $out );
426
427     select( STDOUT );
428     close( OUT );
429
430     print "\'$out\' written.\n";
431
432 } end of write_html()
433
toc sub write_header {

write the part of the .html file above the actual listing

435
436     my $pathname = $_[0]; input file pathname
437     my $out = $_[1]; output file pathname
438
439     print "<! $out, documenting $pathname >\n"; start with a comment
440     my ($sec, $min, $hr, $day, $mon, $yr, $wd, $doy, $dst)
441         = localtime( time() );
442     $mon++;
443     $yr += 1900;
444
445     if ( $min < 10 ) { $min = "0".$min; }
446     if ( $day < 10 ) { $day = "0".$day; }
447
448     my $timestamp = "$hr:$min:$sec, $mon\/$day\/$yr";
449
450     print "<! written by pl2html, $timestamp >\n\n"; and another comment
451     print "<html>\n\n";
452     print "<head>\n\n";
453     print " <title>$out</title>\n\n";
454     print "</head>\n\n";
455
456     print "<body bgcolor=$bgcolor>\n\n"; note, you don't see much of this color, it's a surround
457
458     print "<h1 align=\"center\">$pathname</h1>\n"; big title
459     print "<h4 align=\"right\">Formatted and hyperlinked by pl2html, $timestamp</h4>\n"; subtitle
460     print "<hr align=\"center\" width=90%>\n\n"; draw a line
461     print "<a name=\"toc\"></a>\n"; anchor for "toc" links
462
463     print "<table align=\"center\" bgcolor=$toccolor border=1 cellpadding=10>\n"; this starts the toc
464     print "<tr><td>\n";
465
466     my $cols = 4;
467     my $subs_per_col = int( ($#subs + $cols)/$cols );
468     my $i = 0;
469         my $j = 0;
470
471     foreach my $sname ( @subs ) { list names in four cols. Each col is one
472
473 names go down first col, then down second, etc.
474 order is the order the programmer uses for the subs
475
476         print "<a href=\"#$sname\"> $sname </a> <br>\n"; link toc name to source location
477         $i++; $j++;
478
479         if ( $i == $subs_per_col ) { on to next at end of column
480                     unless ( $j == @subs ) { don't do it if we've finished
481             print "</td><td>\n";
482             $i=0;
483                     }
484         }
485     } end of foreach
486
487     print "</td></tr></table>\n"; close the table
488
489 } end of write_header()
490
toc sub write_row_html {

writes the html for non "sub" code lines

492
493     my ( $color, $code, $comment, $i ) = @_;
494
495     print "<tr bgcolor=$color>\n"; set the color for the row
496     print " <td align=\"right\" bgcolor=$numcolor width=$num_width> $i </td>\n"; write the line number (overrides row color)
497
498     if ( $code =~ /^\#/ ) { if line starts with hash, use both columns
499         print " <td colspan=2> <font face=\"FreeMono,Monospace,Courier,Courier New\"> $code </font> </td>\n"; write the code across two columns
500     }
501     else {
502         print " <td width=$code_width> <font face=\"FreeMono,Monospace,Courier,Courier New\"> $code </font> </td>\n"; write the code
503         print " <td width=$comment_width> $comment </td>\n"; write the comment
504     }
505     print "</tr>\n\n";
506
507 } end of write_row_html()
508
toc sub write_row_plain {

write row that does not include sub calls

510
511     my $line = $_[0]; input line
512     my $i = $_[1]; current line number
513     my $color = $_[2]; color (alternates)
514
515     my ($code, $comment) = split_on_comment( $line ); divide line into code and comment
516     $code = properly_indent( $code ); replace " " with "&nbsp"
517
518     write_row_html( $color, $code, $comment, $i ); write the html
519
520 } end of write_row_plain()
521
toc sub write_row_sub {

write a row that starts a sub

523
524     my $line = $_[0]; input line
525     my $i = $_[1]; current line number
526     my $color = $_[2]; color
527
528     my $sname = sub_name( $line ); get the name from the "sub" line
529     my ( $code, $comment) = split_on_comment( $line ); split to code and comment
530     $code = properly_indent( $code ); replace " " with "&nbsp;"
531
532     print "<tr bgcolor=$color>\n"; start a row and set the color
533     print " <td align=\"right\" bgcolor=$numcolor width=$num_width> ";
534     print " <a href=\"#toc\"> toc </a> </td>\n"; write a link to "toc" instead of line number
535     print " <td width=$code_width> <font face=\"FreeMono,Monospace,Courier,Courier New\" size=+1><b> $code "; write the code
536     print "</b></font> </td>\n";
537     print " <td width=$comment_width> <h3> $comment </h3> </td>\n"; write the comment
538     print "</tr>\n\n";
539
540 } end of write_row_sub
541
toc sub write_row_sub_calls {

write a row that includes sub calls

543
544     my $line = $_[0]; input line
545     my $i = $_[1]; line number
546     my $color = $_[2]; color
547
548     my ($code, $comment) = split_on_comment( $line ); split to code and comment
549     $code = add_anchors( $code ); replace sub calls with hyperlinks
550     $code = properly_indent( $code ); replace " "s with "&nbsp;"s
551
552     write_row_html( $color, $code, $comment, $i ); write the html
553
554 } end of write_row_sub_call()
555
toc sub write_rows {

writes the reformatted, hyperlinked code

557
558     my $i=1; first line is 1, most editors
559
560     print "<table align=\"center\" border=0\n";
561     print " cellpadding=1 cellspacing=0 width=95%>\n"; open a table
562
563     foreach my $line ( @code ) {
564
565         my $color = $rowcolors[ $i%2 ]; alternate colors
566
567         chomp( $line ); eat EOL
568
569         if ( is_sub($line) ) {
570             write_sub_end(); end the current table (you can't get anchors within a table)
571             write_sub_begin( $line ); add anchor and open a new table
572             write_row_sub( $line, $i, $color ); } write the "sub" line
573         elsif( has_sub_calls($line) ) { write_row_sub_calls( $line, $i, $color ); } or write a line with sub calls
574         else { write_row_plain( $line, $i, $color ); } or write a line w/o sub calls
575
576         $i++;
577
578     } end of foreach
579
580 } end of write_rows()
581
toc sub write_sub_begin {

each sub gets an anchor and starts a new table

583
584     my $line = $_[0];
585     my $sname = sub_name( $line );
586
587     print "<a name=\"$sname\"></a>\n"; the anchor
588     print "<table align=\"center\" border=0\n";
589     print " cellpadding=1 cellspacing=0 width=95%>\n"; the new table
590
591 } end of write_sub_begin()
592
toc sub write_sub_end {

start of each sub ends previous table

594
595     print "</table>\n\n";
596
597 } end of write_sub_end()
598
toc sub help_requested {

look for and respond to help switch "-?", "/h", etc.

600
601 my $msg_help = <<'ENDHELP';
602   syntax: perl perldoc source_file_path_name
603 ENDHELP
604
605 my $arg = $ARGV[0]; help must be first arg
606 my $help =
607   ( $arg =~ /(^-\?$)|(^\/\?$)|(^-h$)|(^\/h$)|(^-help$)|(^\/help$)/ ); anything reasonable
608
609 if ( $help ) {
610     print $msg_help;
611     die "\n"; assumption, user is not asking for help AND providing other specs
612 }
613
614 return $FALSE;
615
616 } end of help_requested()
617
618 # end of /usr/bin/pl2html