# bnf2html.py - convert the parser portion of Python's BNF to HTML # copyright 2008, Martin Rinehart ''' Prepare hyperlinked HTML of the EBNF Python definitions from python.org. The 'download entire grammar as text' link in the language reference section is followed to get the EBNF. Manually, the single blank line at the top of this file is deleted. The remainder is saved as 'grammar.txt' exactly as it was sent from python.org. The result file is read here and converted into a dictionary in which the production name is the key and the definition(s) is (are) the value. From this dictionary, hyperlinked HTML is generated. There is a plain page showing all definitions and a One-At-A-Time page that lets the user mouse click from production to production. The writeOaatXxxx() methods write the One-At-A-Time page's body section. Special thanks to Gabriel Genellina for helping make this more Pythonic, for suggesting the 'title=' tooltip and for encouraging me to make the final design improvements. Contents here are: imports color and pathname "constants" fileRead() prepareDict() HTML writing routines, alphabetically utility functions mainline Structure, omitting utility functions: Main fileRead prepareDict writeHTML writeHTML1 - main page writeTopComments writeHead writeBody writeBodyEntries writeBody Entry writeBottomComment writeHTML2 - Oaat page writeTopComments writeHead writeOaatBody writeOaatBodyEntry writeBottomComment Tip of the hat to Opera's 'Error Console' which, set to Messages, is very, very crabby about HTML and JavaScript. ''' # My Decaf language tokenizer, helps hyperlink() definition names. from tok import * ROW_COLORS=[ '#f8f8d0', '#e0f8f8' ] # rows alternate these colors BACKGROUND_COLOR = '#f8f8f4' _INPUT_PATHNAME = 'grammar.txt' _OUTPUT_PATHNAME = 'python-grammar.html' _OUTPUT1_PATHNAME = 'python-grammar1.html' _OUTPUT2_PATHNAME = 'python-grammar2.html' def fileRead(): ''' Returns bnf as list of strings. ''' file = open( _INPUT_PATHNAME, 'r' ) bnf = file.readlines() file.close() return bnf def prepareDict( bnf ): ''' Converts list of strings to dictionary, definition values keyed by production names. ''' startDef = True count = 0 prods = {} val = '' for s in bnf: if startDef and len( s.strip() ) == 0: continue if startDef: loc = s.index( '::=' ) if loc > -1: count += 1 key = s[0:loc].strip() else: # Got here? Input file is damaged. raise Error( 'start error at line', i ) startDef = False elif len( s.strip() ) > 0: val += s.strip() else: prods[key] = val startDef = True val = '' # end of for loop return prods # end of prepareDict() # HTML writing routines, alphabetically def writeBody(): ''' Clerk and controller, section. ''' _OFILE.write('''

Python's EBNF Grammar


''') writeBodyEntries() _OFILE.write( '''

End of Productions top

 
eof ''' ) _OFILE.write('\n' ) # end of writeBody() def writeBodyEntries(): ''' Write the production entries. ''' plist = sorted( _PRODUCTIONS ) line = 0 # counter for alternating colors _OFILE.write( ' Statement Productions
\n\n' ) for pname in plist: if isStatement( pname ): writeBodyEntry( pname, ROW_COLORS[line % 2] ) line += 1 _OFILE.write( '

' ) _OFILE.write( ' Other Productions
\n\n' ) for pname in plist: if not isStatement( pname ): writeBodyEntry( pname, ROW_COLORS[line % 2] ) line += 1 def writeBodyEntry( pname, color ): ''' Write an individual entry in the section. ''' _OFILE.write( '\n' ) _OFILE.write( ' \n' ) _OFILE.write( '\t\n' ) _OFILE.write( '\t\n' ) _OFILE.write( '
' + \ '' + pname + \ ' ' + define( pname, True ) + ' \n\t
\n\n' ) def writeBottomComment( pathname ): ''' Add EOF comment. ''' _OFILE.write( '\n' ) def writeHead( main ): ''' Writes the HTML section. ''' _OFILE.write( ''' Python's EBNF Grammar ''' ) if main: _OFILE.write( ''' ''' ) _OFILE.write(''' ''' ) # end of writeHead() def writeHTML(): ''' Organizer for the HTML writing phase. ''' global _OFILE _OFILE = open( _OUTPUT_PATHNAME, 'w' ) writeHTML0() _OFILE.close() _OFILE = open( _OUTPUT1_PATHNAME, 'w' ) writeHTML1() _OFILE.close() _OFILE = open( _OUTPUT2_PATHNAME, 'w' ) writeHTML2() _OFILE.close() # end of writeHTML() def writeHTML0(): ''' Writes the frameset page. ''' writeTopComments( _OUTPUT_PATHNAME, "Java EBNF grammar's frameset page." ) writeHead( False ) # False == don't write JavaScript _OFILE.write(''' ''' ) writeBottomComment( _OUTPUT_PATHNAME ) def writeHTML1(): ''' Writes the main page listing all productions. ''' writeTopComments( _OUTPUT1_PATHNAME, 'page listing all Python productions (BNF grammar)') writeHead( True ) # True == write the JavaScript writeBody() writeBottomComment( _OUTPUT1_PATHNAME ) # end of writeHTML1() def writeHTML2(): ''' Writes the One-At-A-Time page. ''' writeTopComments( _OUTPUT2_PATHNAME, 'page listing Python productions (BNF grammar) one at a time' ) writeHead( False ) # False == don't write JavaScript writeOaatBody() writeBottomComment( _OUTPUT2_PATHNAME ) def writeOaatBody(): ''' Write One-At-A-Time body section. ''' _OFILE.write( '\n\n' ) _OFILE.write( '''
Adjust this frame
to fit: import_stmt
EBNF from docs.python.org/ref/grammar.txt.

HTML written by bnf2html.py , © 2008, ''' + \ " Martin Rinehart." + '''

''' ) for pname in sorted( _PRODUCTIONS ): writeOaatEntry( pname ) _OFILE.write( '\n\n' ) # end of writeOaatBody() def writeOaatEntry( pname ): ''' Write One-At-A-Time production entry. ''' # write a table to vertically separate the entries _OFILE.write( '''
 
''' ) # write an anchor link _OFILE.write( " \n" ) # write the production name as a header _OFILE.write( '''
prev   next
''' + pname + '''
''' ) # write the definition defi = define( pname, False ) # False = this is One-At-A-Time _OFILE.write( defi ) # end of writeOaatEntry() def writeTopComments( pathname, comment ): ''' Writes BOF and copyright comments. ''' _OFILE.write( '' ) _OFILE.write( ''' ''' ) # end of writeTopComments # utility functions def define( pname, main ): ''' Create definition portion of production. 'main' is true for the main page, false for the Oaat page. ''' defi = _PRODUCTIONS[ pname ] defi = defi.replace( ' ', ' ' ) # don't break on spaces defi = defi.replace( '|', ' |' ) # do break before 'or's defi = defi.replace( '<', '<' ) # eliminate false HTML tags tokens = tokenize( [defi] ) # tokenize() needs a list of strings ret = '' for tok in tokens: if ( tok.type == NAME ): tok.textValue = hyperlink( tok.textValue, main ) if ( tok.type == END_OF_LINE ): break ret += tok.textValue return ret # end of define() def hyperlink( p, main ): ''' Returns 'p' or, if 'p' is a production name, an appropriate hyperlink. The hyperlink is a function call in the main page, a direct jump in the One-At-A-Time page.''' ret = p # default, if not a production name try: defi = unquote( _PRODUCTIONS[p] ) if main: ret = "\n\t\t' except KeyError: pass # some text (e.g. NEWLINE ) is not a production name return ret # end of hyperlink() def hyperlinkOaat( p ): ''' Turns 'prod_name' into " prod_name ". ''' try: defi = unquote( _PRODUCTIONS[p] ) ret = "\n\t\t' except KeyError: ret = p # some text (e.g. NEWLINE ) is not a production name return ret # end of hyperlink() def isStatement( name ): ''' The _stmt _PRODUCTIONS are all written before the others. ''' return name.endswith( '_stmt' ) def unquote( defi ): ''' Replace quotes in definitions with appropriate HTML. ''' defi = defi.replace( '"', '"' ) defi = defi.replace( "'", ''' ) defi = defi.replace( '"\'\'\'"', '"'''"' ) defi = defi.replace( "'\"\"\"'", ''"""'' ) return defi # mainline if __name__ == '__main__': # _ALLCAPS is a modular "constant" (written here, read everywhere) _OFILE = '' # managed by writeHTML() _PRODUCTIONS = prepareDict( fileRead() ) # Odd bug. This value (and only this value!) gets lost. _PRODUCTIONS['try_stmt'] = ' try1_stmt | try2_stmt ' writeHTML() _OFILE.close() print 'HTML written to "' + _OUTPUT_PATHNAME + '", "' + \ _OUTPUT1_PATHNAME + '" and "' + _OUTPUT2_PATHNAME + '".' # end of bnf2html.py