# bnf2html.py - convert the parser portion of Python's BNF to HTML
# copyright 2008, Martin Rinehart
''' Prepare hyperlinked HTML of the EBNF Python definitions from python.org.
The 'download entire grammar as text' link in the language reference section is
followed to get the EBNF. Manually, the single blank line at the top of this
file is deleted. The remainder is saved as 'grammar.txt' exactly as it was sent
from python.org.
The result file is read here and converted into a dictionary in which the
production name is the key and the definition(s) is (are) the value. From this
dictionary, hyperlinked HTML is generated. There is a plain page showing
all definitions and a One-At-A-Time page that lets the user mouse click
from production to production.
The writeOaatXxxx() methods write the One-At-A-Time page's body section.
Special thanks to Gabriel Genellina for helping make this more Pythonic,
for suggesting the 'title=' tooltip and for encouraging me to make the final
design improvements.
Contents here are:
imports
color and pathname "constants"
fileRead()
prepareDict()
HTML writing routines, alphabetically
utility functions
mainline
Structure, omitting utility functions:
Main
fileRead
prepareDict
writeHTML
writeHTML1 - main page
writeTopComments
writeHead
writeBody
writeBodyEntries
writeBody Entry
writeBottomComment
writeHTML2 - Oaat page
writeTopComments
writeHead
writeOaatBody
writeOaatBodyEntry
writeBottomComment
Tip of the hat to Opera's 'Error Console' which, set to Messages, is very,
very crabby about HTML and JavaScript.
'''
# My Decaf language tokenizer, helps hyperlink() definition names.
from tok import *
ROW_COLORS=[ '#f8f8d0', '#e0f8f8' ] # rows alternate these colors
BACKGROUND_COLOR = '#f8f8f4'
_INPUT_PATHNAME = 'grammar.txt'
_OUTPUT_PATHNAME = 'python-grammar.html'
_OUTPUT1_PATHNAME = 'python-grammar1.html'
_OUTPUT2_PATHNAME = 'python-grammar2.html'
def fileRead():
''' Returns bnf as list of strings. '''
file = open( _INPUT_PATHNAME, 'r' )
bnf = file.readlines()
file.close()
return bnf
def prepareDict( bnf ):
''' Converts list of strings to dictionary, definition values keyed by
production names. '''
startDef = True
count = 0
prods = {}
val = ''
for s in bnf:
if startDef and len( s.strip() ) == 0:
continue
if startDef:
loc = s.index( '::=' )
if loc > -1:
count += 1
key = s[0:loc].strip()
else:
# Got here? Input file is damaged.
raise Error( 'start error at line', i )
startDef = False
elif len( s.strip() ) > 0:
val += s.strip()
else:
prods[key] = val
startDef = True
val = ''
# end of for loop
return prods
# end of prepareDict()
# HTML writing routines, alphabetically
def writeBody():
''' Clerk and controller,
section. '''
_OFILE.write('''
Python's EBNF Grammar
''')
writeBodyEntries()
_OFILE.write( '''
End of Productions top
eof
''' )
_OFILE.write('\n' )
# end of writeBody()
def writeBodyEntries():
''' Write the production entries. '''
plist = sorted( _PRODUCTIONS )
line = 0 # counter for alternating colors
_OFILE.write( ' Statement Productions
\n\n' )
for pname in plist:
if isStatement( pname ):
writeBodyEntry( pname, ROW_COLORS[line % 2] )
line += 1
_OFILE.write( '' )
_OFILE.write( ' Other Productions
\n\n' )
for pname in plist:
if not isStatement( pname ):
writeBodyEntry( pname, ROW_COLORS[line % 2] )
line += 1
def writeBodyEntry( pname, color ):
''' Write an individual entry in the
section. '''
_OFILE.write( '\n' )
_OFILE.write( ' \n' )
_OFILE.write( '\t| ' + \
'' + pname + \
' | \n' )
_OFILE.write( '\t ' + define( pname, True ) + ' \n\t | \n' )
_OFILE.write( '
\n\n' )
def writeBottomComment( pathname ):
''' Add EOF comment. '''
_OFILE.write( '\n' )
def writeHead( main ):
''' Writes the HTML section. '''
_OFILE.write( '''
Python's EBNF Grammar
''' )
if main:
_OFILE.write( '''
''' )
_OFILE.write('''
''' )
# end of writeHead()
def writeHTML():
''' Organizer for the HTML writing phase. '''
global _OFILE
_OFILE = open( _OUTPUT_PATHNAME, 'w' )
writeHTML0()
_OFILE.close()
_OFILE = open( _OUTPUT1_PATHNAME, 'w' )
writeHTML1()
_OFILE.close()
_OFILE = open( _OUTPUT2_PATHNAME, 'w' )
writeHTML2()
_OFILE.close()
# end of writeHTML()
def writeHTML0():
''' Writes the frameset page. '''
writeTopComments( _OUTPUT_PATHNAME,
"Java EBNF grammar's frameset page." )
writeHead( False ) # False == don't write JavaScript
_OFILE.write('''
''' )
writeBottomComment( _OUTPUT_PATHNAME )
def writeHTML1():
''' Writes the main page listing all productions. '''
writeTopComments( _OUTPUT1_PATHNAME,
'page listing all Python productions (BNF grammar)')
writeHead( True ) # True == write the JavaScript
writeBody()
writeBottomComment( _OUTPUT1_PATHNAME )
# end of writeHTML1()
def writeHTML2():
''' Writes the One-At-A-Time page. '''
writeTopComments( _OUTPUT2_PATHNAME,
'page listing Python productions (BNF grammar) one at a time' )
writeHead( False ) # False == don't write JavaScript
writeOaatBody()
writeBottomComment( _OUTPUT2_PATHNAME )
def writeOaatBody():
''' Write One-At-A-Time body section. '''
_OFILE.write( '\n\n' )
_OFILE.write( '''
''' )
for pname in sorted( _PRODUCTIONS ):
writeOaatEntry( pname )
_OFILE.write( '\n\n' )
# end of writeOaatBody()
def writeOaatEntry( pname ):
''' Write One-At-A-Time production entry. '''
# write a table to vertically separate the entries
_OFILE.write( '''
''' )
# write an anchor link
_OFILE.write( " \n" )
# write the production name as a header
_OFILE.write( '''
''' )
# write the definition
defi = define( pname, False ) # False = this is One-At-A-Time
_OFILE.write( defi )
# end of writeOaatEntry()
def writeTopComments( pathname, comment ):
''' Writes BOF and copyright comments. '''
_OFILE.write(
'' )
_OFILE.write( '''
''' )
# end of writeTopComments
# utility functions
def define( pname, main ):
''' Create definition portion of production.
'main' is true for the main page, false for the Oaat page.
'''
defi = _PRODUCTIONS[ pname ]
defi = defi.replace( ' ', ' ' ) # don't break on spaces
defi = defi.replace( '|', ' |' ) # do break before 'or's
defi = defi.replace( '<', '<' ) # eliminate false HTML tags
tokens = tokenize( [defi] ) # tokenize() needs a list of strings
ret = ''
for tok in tokens:
if ( tok.type == NAME ):
tok.textValue = hyperlink( tok.textValue, main )
if ( tok.type == END_OF_LINE ):
break
ret += tok.textValue
return ret
# end of define()
def hyperlink( p, main ):
''' Returns 'p' or, if 'p' is a production name, an appropriate hyperlink.
The hyperlink is a function call in the main page, a direct jump in the
One-At-A-Time page.'''
ret = p # default, if not a production name
try:
defi = unquote( _PRODUCTIONS[p] )
if main:
ret = "\n\t\t ' + p + ' '
except KeyError:
pass # some text (e.g. NEWLINE ) is not a production name
return ret
# end of hyperlink()
def hyperlinkOaat( p ):
''' Turns 'prod_name' into
" prod_name ". '''
try:
defi = unquote( _PRODUCTIONS[p] )
ret = "\n\t\t ' + p + ' '
except KeyError:
ret = p # some text (e.g. NEWLINE ) is not a production name
return ret
# end of hyperlink()
def isStatement( name ):
''' The _stmt _PRODUCTIONS are all written before the others. '''
return name.endswith( '_stmt' )
def unquote( defi ):
''' Replace quotes in definitions with appropriate HTML. '''
defi = defi.replace( '"', '"' )
defi = defi.replace( "'", ''' )
defi = defi.replace( '"\'\'\'"', '"'''"' )
defi = defi.replace( "'\"\"\"'", ''"""'' )
return defi
# mainline
if __name__ == '__main__':
# _ALLCAPS is a modular "constant" (written here, read everywhere)
_OFILE = '' # managed by writeHTML()
_PRODUCTIONS = prepareDict( fileRead() )
# Odd bug. This value (and only this value!) gets lost.
_PRODUCTIONS['try_stmt'] = ' try1_stmt | try2_stmt '
writeHTML()
_OFILE.close()
print 'HTML written to "' + _OUTPUT_PATHNAME + '", "' + \
_OUTPUT1_PATHNAME + '" and "' + _OUTPUT2_PATHNAME + '".'
# end of bnf2html.py