Please login or register. Welcome to the Studio, guest!


Quick Links:


newBookmarkLockedFalling

Eric

Eric Avatar



1,442


November 2005
Version 0.2:
from html.parser import HTMLParser

class HTMLFormatter(HTMLParser):
"""Formats HTML"""

def __init__(self):
HTMLParser.__init__(self)
self.tabbed = 0
self.formatted = []

def append(self, data):
self.formatted.append(str(data))

def _write_tabs(self):
self.append('\t'*self.tabbed)

def _format_attrs(self, attrs):
fattrs = ""
for a,v in attrs:
fattrs = fattrs + " " + a + '="' + v.replace('"', '\\"') + '"'
return fattrs

def _format_tag(self, tag, ttype='start', ats=None):
ftag = '<'
if ttype == 'end':
ftag = ftag + '/'
ftag = ftag + tag
if ats != None and len(ats):
ftag = ftag + self._format_attrs(ats)
if ttype == 'self':
ftag = ftag + ' /'
ftag = ftag + '>'
return ftag

def handle_starttag(self, tag, attrs):
self._write_tabs()
self.tabbed = self.tabbed + 1
self.append(self._format_tag(tag, ats=attrs) + '\n')

def handle_endtag(self, tag):
self.tabbed = self.tabbed - 1
self._write_tabs()
self.append(self._format_tag(tag, ttype='end') + '\n')

def handle_startendtag(self, tag, attrs):
self._write_tabs()
self.append(self._format_tag(tag, ttype='self', ats=attrs) + '\n')

def handle_data(self, data):
data = data.strip()
if(len(data)):
self._write_tabs()
self.append(data + '\n')

def handle_charref(self, name):
self.append('&#'+name+';')

def handle_entityref(self, name):
self.append('&'+name+';')

def handle_comment(self, data):
data = '<!--' + data + '-->'
self._write_tabs();
self.append(data + '\n')

def handle_decl(self, decl):
self._write_tabs()
self.append('<!'+decl+'>')

def handle_pi(self, data):
self._write_tabs()
self.append('<?'+data+'>')

def render(self):
return "".join(self.formatted)

if __name__ == "__main__":
import sys
if len(sys.argv) == 3:
try:
n = HTMLFormatter()
f = open(sys.argv[1], 'r')
n.feed(f.read())
f.close()
f = open(sys.argv[2], 'w')
f.write(n.render())
f.close()
except IOError:
print("Failed opening or writing to files '{0}', '{1}'".format(sys.argv[1], sys.argv[2]))
else:
print("Wrong number of arguments")



Anyways, to run, basically save this as htmlformatter.py (or your choice name). Then run:
python htmlformatter.py file1.html file2.html

Or you can use it as a library.
from htmlformatter import HTMLFormatter
n = HTMLFormatter()
n.feed(some_html)
print(n.render())



To Do:
  • Fix the way it handles data, comments and entity refs
  • Make it auto fix tags that should be self closing but aren't
  • Close tags that only have space or small data in them on the same line instead of 2-3 separate lines



I'm open to ideas and suggestions for improvements.


Last Edit: Jun 21, 2009 16:42:57 GMT by Eric

newBookmarkLockedFalling