3 # doc_postprocess.py [-h|--help] <pattern>...
5 # Post-process the Doxygen-generated HTML files matching pattern.
12 # Substitutions with regular expressions are somewhat slow in Python 3.9.5.
13 # Use str.replace() rather than re.sub() where possible.
15 # [search string, compiled regular expression or None, substitution string, count]
18 [ ' & ', re.compile(r' & *'), '& ', 1],
19 [ ' * ', re.compile(r' \* *'), '* ', 1],
21 [ ' &', None, '&', 0],
22 [ '&', re.compile(r'&\b'), '& ', 0],
23 [ ' *', None, '*', 0],
24 [ '*', re.compile(r'\*\b'), '* ', 0],
26 [ 'template<', re.compile(r'\btemplate<'), 'template <', 1]
31 [ '( ', re.compile(r'\( *'), '(', 1],
33 [ ' & ', None, '& ', 0],
34 [ ' * ', None, '* ', 0],
36 [ ' & ', re.compile(r' & *'), '& ', 0],
37 [ ' * ', re.compile(r' \* *'), '* ', 0],
39 [ 'template<', re.compile(r'\btemplate<'), 'template <', 1]
44 [ 'template<', re.compile(r'^(<h\d>|)template<'), '\\1template <', 1]
47 all_lines_patterns = [
48 # For some reason, some versions of Doxygen output the full path to
49 # referenced tag files. This is bad since it breaks doc_install.py,
50 # and also because it leaks local path names into source tarballs.
51 # Thus, strip the directory prefix here.
52 [ ' doxygen="', re.compile(r' doxygen="[^":]*/([^":]+\.tag):'), ' doxygen="\\1:', 0],
54 [ '©', None, '©', 0],
55 [ '—', None, '—', 0],
56 [ '–', None, '–', 0],
57 [ ' ', re.compile(r' * *'), ' ', 0]
60 def doc_postprocess(patterns):
61 if not (isinstance(patterns, list) or isinstance(patterns, tuple)):
62 patterns = [] if patterns == None else [patterns]
65 for pattern in patterns:
66 filepaths += glob.glob(pattern)
68 for filepath in filepaths:
69 # Assume that the file is UTF-8 encoded.
70 # If illegal UTF-8 bytes in the range 0x80..0xff are encountered, they are
71 # replaced by Unicode Private Use characters in the range 0xdc80..0xdcff
72 # and restored to their original values when the file is rewritten.
73 with open(filepath, mode='r', encoding='utf-8', errors='surrogateescape') as file:
74 # Read the whole file into a buffer, a list with one line per element.
75 buf = file.readlines()
77 for line_number in range(len(buf)):
78 line = buf[line_number]
81 if '<a class="el"' in line:
82 for subst in class_el_patterns:
85 line = subst[1].sub(subst[2], line, count=subst[3])
87 line = line.replace(subst[0], subst[2], subst[3])
89 elif ('<td class="md"' in line) or ('<td class="mdname"' in line):
90 for subst in class_md_patterns:
93 line = subst[1].sub(subst[2], line, count=subst[3])
95 line = line.replace(subst[0], subst[2], subst[3])
98 for subst in else_patterns:
101 line = subst[1].sub(subst[2], line, count=subst[3])
103 line = line.replace(subst[0], subst[2], subst[3])
105 for subst in all_lines_patterns:
108 line = subst[1].sub(subst[2], line, count=subst[3])
110 line = line.replace(subst[0], subst[2], subst[3])
112 buf[line_number] = line
114 with open(filepath, mode='w', encoding='utf-8', errors='surrogateescape') as file:
115 # Write the whole buffer back into the file.
121 if __name__ == '__main__':
124 parser = argparse.ArgumentParser(
125 description='Post-process the Doxygen-generated HTML files matching pattern.')
126 parser.add_argument('patterns', nargs='*', metavar='pattern', help='filename pattern')
127 args = parser.parse_args()
130 sys.exit(doc_postprocess(args.patterns))