-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathgentags.py
executable file
·74 lines (63 loc) · 2.61 KB
/
gentags.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/env python
# vim:fileencoding=utf-8
# License: Apache 2.0 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import re
import subprocess
self_path = os.path.abspath(__file__)
HEADER = '''\
// Do not edit
// Generated from {}.in (see gentags.py)
'''
def generate_tag_headers():
with open("src/tag_strings.h", "wb") as tag_strings, \
open("src/tag_enum.h", "wb") as tag_enum, \
open("src/tag_sizes.h", "wb") as tag_sizes, \
open("python/gumbo/gumboc_tags.py", "wb") as tag_py, \
open('src/tag.in', 'rb') as tagfile:
tag_py.write('TagNames = [\n'.encode('utf-8'))
for f in (tag_strings, tag_enum, tag_sizes):
f.write(HEADER.format('tag').encode('utf-8'))
for tag in tagfile:
tag = tag.decode('utf-8').strip()
tag_upper = tag.upper().replace('-', '_')
tag_py.write(('\t"%s",\n' % tag_upper).encode('utf-8'))
tag_strings.write(('"%s",\n' % tag).encode('utf-8'))
tag_enum.write(('GUMBO_TAG_%s,\n' % tag_upper).encode('utf-8'))
tag_sizes.write(('%d, ' % len(tag)).encode('utf-8'))
tag_sizes.write(b'\n')
tag_py.write(']\n'.encode('utf-8'))
def generate_tag_perfect_hash(repetitions=200):
raw = subprocess.check_output(
'gperf -LANSI-C --ignore-case -H tag_hash -m{} src/tag.in'.format(repetitions).split()
).decode('utf-8').splitlines()
for i, line in enumerate(raw):
if line.startswith('in_word_set'):
break
else:
raise SystemExit('Failed to find in_word_set()')
lines = raw[:i - 1]
del raw[:i - 1]
raw = '\n'.join(raw)
wordlist = re.search("wordlist\[\]\s+=\s+{(.*?)}", raw, re.DOTALL)
if wordlist is None:
raise SystemExit('Failed to find wordlist')
wordlist = [w.strip().replace('"', '') for w in wordlist.group(1).split(',')]
taglist = ["\tGUMBO_TAG_" + (w.upper().replace('-', '_') if w else 'LAST') for w in wordlist]
processed = '\n'.join(lines) + '\n\n'
processed += 'static const GumboTag kGumboTagMap[] = {\n%s\n};' % '\n,'.join(taglist)
processed = re.sub(
r'.+^tag_hash',
HEADER.format('tag') + 'static inline unsigned int\ntag_hash',
processed,
flags=re.DOTALL | re.MULTILINE)
with open('src/tag_perf.h', 'wb') as f:
f.write(processed.encode('utf-8'))
f.write(b'\n')
def main():
os.chdir(os.path.dirname(self_path))
generate_tag_headers()
if __name__ == '__main__':
main()
generate_tag_perfect_hash()