equal
deleted
inserted
replaced
|
1 #!/usr/bin/env python |
|
2 |
|
3 import os |
|
4 import sys |
|
5 import shutil |
|
6 import htmlentitydefs |
|
7 |
|
8 def replace_entities(line): |
|
9 i = 0 |
|
10 outstr = line |
|
11 while True: |
|
12 estart = outstr.find("&", i) |
|
13 eend = outstr.find(";", estart) |
|
14 if estart == -1 or eend == -1: |
|
15 break; |
|
16 # entity replace is needed |
|
17 entitystr = outstr[estart + 1:eend] |
|
18 i = eend |
|
19 if entitystr in htmlentitydefs.name2codepoint: |
|
20 replacestr = str(htmlentitydefs.name2codepoint[entitystr]) |
|
21 outstr = outstr[:estart] + "&#" + \ |
|
22 replacestr + ";" + \ |
|
23 outstr[eend + 1:] |
|
24 i = len(outstr[:estart]) + len(replacestr) + 3 |
|
25 |
|
26 return outstr |
|
27 |
|
28 infile = sys.argv[1] |
|
29 for line in open(infile, "r"): |
|
30 print replace_entities(line), |