-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmake_site.py
executable file
·543 lines (462 loc) · 18.6 KB
/
make_site.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
#!/usr/bin/env python3
from pathlib import Path
import sys
import subprocess
from dataclasses import dataclass, field
from typing import List, Mapping, Optional
from datetime import datetime
import yaml
from staticjinja import Site
import jinja2.ext
from jinja2 import Environment, FileSystemLoader
import pybtex.database
from mistletoe import Document, block_token
from mistletoe_renderer import CustomHTMLRenderer
from pylatexenc.latex2text import LatexNodes2Text
import urllib.request
import json
import gzip
import os
from github import Github
from slugify import slugify
class MarkdownExtension(jinja2.ext.Extension):
tags = set(['markdown'])
def __init__(self, environment):
super().__init__(environment)
environment.extend(
markdowner=CustomHTMLRenderer()
)
def parse(self, parser):
lineno = next(parser.stream).lineno
body = parser.parse_statements(
['name:endmarkdown'],
drop_needle=True
)
return jinja2.nodes.CallBlock(
self.call_method('_markdown_support'),
[],
[],
body
).set_lineno(lineno)
def _markdown_support(self, caller):
return self.environment.markdowner.render(Document(caller())).strip()
markdown_renderer = CustomHTMLRenderer()
def render_markdown(src: str) -> str:
return markdown_renderer.render_md(src)
ROOT = Path(__file__).parent
DATA = ROOT/'data'
TEMPLATE_SRC = str(ROOT/'templates')
@dataclass
class MenuItem:
title: str
url: str
@classmethod
def from_dict(cls, dic):
return cls(*next(iter(dic.items())))
@dataclass
class Menu:
title: str
items: List[MenuItem]
@classmethod
def from_dict(cls, dic):
return cls(dic['title'], [MenuItem.from_dict(item)
for item in dic['items']])
with (DATA/'menus.yaml').open('r', encoding='utf-8') as menu_file:
menus = [Menu.from_dict(menu) for menu in yaml.safe_load(menu_file)]
presentation = (DATA/'presentation.md').read_text(encoding='utf-8')
what_is = (DATA/'what_is.md').read_text(encoding='utf-8')
@dataclass
class Formalization:
title: str
authors: str
abstract: str
url: str
with (DATA/'formalizations.yaml').open('r', encoding='utf-8') as f_file:
formalizations = [Formalization(**form) for form in yaml.safe_load(f_file)]
@dataclass
class People:
name: str
descr: str = ''
img: str = ''
with (DATA/'people.yaml').open('r', encoding='utf-8') as m_file:
peoples = {mtr['name']: People(**mtr) for mtr in yaml.safe_load(m_file)}
@dataclass
class Team:
name: str
short_description: str
description: str
url: str
members: List[People]
use_biography: bool = False
with (DATA/'teams.yaml').open('r', encoding='utf-8') as t_file:
teams = [Team(team['name'], team['short_description'],
team['description'], team['url'],
[peoples.get(name, People(name)) for name in sorted(team['members'])],
use_biography=team.get('use_biography', True))
for team in yaml.safe_load(t_file)]
@dataclass
class DocDecl:
name: str
decl_header_html: str
docs_link: str
src_link: str
@dataclass
class HundredTheorem:
number: str
title: str
decl: Optional[str] = None
decls: Optional[List[str]] = None
doc_decls: Optional[List[DocDecl]] = None
author: Optional[str] = None
links: Optional[Mapping[str, str]] = None
note: Optional[str] = None
@dataclass
class Event:
title: str
location: str
type: str
url: str = 'TBA'
start_date: str = ''
end_date: str = ''
date_range: str = 'TBA'
@dataclass
class Course:
name: str
instructor: str
institution: str
lean_version: int
website: Optional[str] = None
repo: Optional[str] = None
material: Optional[str] = None
notes : Optional[str] = None
tags: List[str] = field(default_factory=list)
year: int = 2023
summary : Optional[str] = None
experiences : Optional[str] = None
urllib.request.urlretrieve(
'https://leanprover-community.github.io/mathlib4_docs/declarations/header-data.bmp',
DATA/'header-data.json'
)
with (DATA/'header-data.json').open('r', encoding='utf-8') as h_file:
header_data = json.load(h_file)
@dataclass
class HeaderDataEntry:
@dataclass
class InfoEntry:
sourceLink: str
name: str
line: int
kind: str
docLink: str
doc: str
info: InfoEntry
header: str
declarations = {
k: HeaderDataEntry(
info=HeaderDataEntry.InfoEntry(**d['info']),
header=d['header'],
) for k, d in header_data.items()
}
num_thms = len([d for d in declarations if declarations[d].info.kind == 'theorem'])
num_defns = len(declarations) - num_thms
urllib.request.urlretrieve(
'https://leanprover-community.github.io/mathlib4_docs/100.yaml',
DATA/'100.yaml')
with (DATA/'100.yaml').open('r', encoding='utf-8') as h_file:
hundred_theorems = [HundredTheorem(thm,**content) for (thm,content) in yaml.safe_load(h_file).items()]
for h in hundred_theorems:
if h.decl:
assert not h.decls
h.decls = [h.decl]
if h.decls:
doc_decls = []
for decl in h.decls:
try:
decl_info = declarations[decl]
except KeyError:
print(f'Error: 100 theorems entry {h.number} refers to a nonexistent declaration {decl}')
continue
doc_decls.append(DocDecl(
name=decl,
# TODO: add missing `/mathlib4_docs/` prefix to links within this header
decl_header_html = decl_info.header,
# note: the `.bmp` data files use doc-relative links
docs_link='/mathlib4_docs/' + decl_info.info.docLink,
src_link=decl_info.info.sourceLink))
h.doc_decls = doc_decls
else:
h.doc_decls = []
def replace_link(name, id):
if name == '':
return name
elif '/' in name:
return '/mathlib4_docs/' + name
else:
try:
# note: the `.bmp` data files use doc-relative links
return '/mathlib4_docs/' + declarations[name].info.docLink
except KeyError:
raise KeyError(f'Error: overview item {id} refers to a nonexistent declaration {name}')
@dataclass
class Overview:
id: str
depth: int
title: str
decl: Optional[str] = None
url: Optional[str] = None
parent: Optional['Overview'] = None
children: List['Overview'] = field(default_factory=list)
@property
def has_missing_child(self) -> bool:
if self.children:
return any(child.has_missing_child for child in self.children)
else:
return not bool(self.decl)
@property
def is_nonempty(self) -> bool:
if self.children:
return any(child.is_nonempty for child in self.children)
else:
return bool(self.decl)
@property
def nonempty_children(self) -> List['Overview']:
return [item for item in self.children if item.is_nonempty]
@property
def missing_children(self) -> List['Overview']:
return [item for item in self.children if item.has_missing_child]
@property
def slug(self) -> str:
return slugify(self.title)
@classmethod
def from_node(cls, identifier: str, title: str, children, depth: int, parent: 'Overview' = None) -> 'Overview':
is_leaf = not isinstance(children, dict)
decl = None
url = None
if is_leaf:
if children and 'http' in children:
url = children
else:
decl = replace_link((children or '').strip(), identifier)
node = cls(
id=identifier,
depth=depth,
title=title,
decl=decl,
url=url,
parent=parent,
children=[])
if not is_leaf:
node.children = [cls.from_node(f"{identifier}-{index}", title, subchildren, depth + 1, parent=node) for index, (title, subchildren) in enumerate(children.items())]
return node
@classmethod
def from_top_level(cls, index: int, title: str, children) -> 'Overview':
return cls.from_node(f"{index}", title, children, 0)
urllib.request.urlretrieve(
'https://leanprover-community.github.io/mathlib4_docs/overview.yaml',
DATA/'overview.yaml')
with (DATA/'overview.yaml').open('r', encoding='utf-8') as h_file:
overviews = [Overview.from_top_level(index, title, elements) for index, (title, elements) in enumerate(yaml.safe_load(h_file).items())]
urllib.request.urlretrieve(
'https://leanprover-community.github.io/mathlib4_docs/undergrad.yaml',
DATA/'undergrad.yaml')
with (DATA/'undergrad.yaml').open('r', encoding='utf-8') as h_file:
undergrad_overviews = [Overview.from_top_level(index, title, elements) for index, (title, elements) in enumerate(yaml.safe_load(h_file).items())]
with (DATA/'theories_index.yaml').open('r', encoding='utf-8') as h_file:
theories = yaml.safe_load(h_file)
with (DATA/'events.yaml').open('r', encoding='utf-8') as h_file:
events = [Event(**e) for e in yaml.safe_load(h_file)]
with (DATA/'courses.yaml').open('r', encoding='utf-8') as h_file:
courses = [Course(**e) for e in yaml.safe_load(h_file)]
courses_tags = set()
courses.sort(key=lambda c: (-c.lean_version, -c.year, c.name))
for course in courses:
courses_tags.update(course.tags)
course.tags.sort()
course.tags.append(f'lean{course.lean_version}')
for field in ['experiences', 'notes', 'summary', 'experiences']:
val = getattr(course, field)
if isinstance(val, str):
setattr(course, field, render_markdown(val))
elif isinstance(val, list):
setattr(course, field, render_markdown("\n".join(map(lambda v: "* " + v, val))))
courses_tags = ['lean4', 'lean3'] + sorted(list(courses_tags))
def format_date_range(event):
if event.start_date and event.end_date:
start_date = datetime.strptime(event.start_date, '%B %d %Y').date()
end_date = datetime.strptime(event.end_date, '%B %d %Y').date()
if start_date.year != end_date.year:
return f'{start_date.strftime("%B %-d, %Y")}–{end_date.strftime("%B %-d, %Y")}'
elif start_date.month != end_date.month:
return f'{start_date.strftime("%B %-d")}–{end_date.strftime("%B %-d, %Y")}'
elif start_date.day != end_date.day:
return f'{start_date.strftime("%B %-d")}–{end_date.strftime("%-d, %Y")}'
else:
return start_date.strftime("%B %-d, %Y")
else:
return 'TBA'
present = datetime.now().date()
old_events = sorted((e for e in events if e.end_date and datetime.strptime(e.end_date, '%B %d %Y').date() < present), key=lambda e: datetime.strptime(e.end_date, '%B %d %Y').date(), reverse=True)
new_events = sorted((e for e in events if (not e.end_date) or datetime.strptime(e.end_date, '%B %d %Y').date() >= present), key=lambda e: datetime.strptime(e.end_date, '%B %d %Y').date())
for e in old_events + new_events:
e.date_range = format_date_range(e)
@dataclass
class Project:
name: str
organization: str
description: str
maintainers: List[str]
stars: int
github = Github(os.environ.get('GITHUB_TOKEN', None))
urllib.request.urlretrieve(
'https://leanprover-contrib.github.io/leanprover-contrib/projects/projects.yml',
DATA/'projects.yaml')
with (DATA/'projects.yaml').open('r', encoding='utf-8') as h_file:
oprojects = yaml.safe_load(h_file)
projects = []
for name, project in oprojects.items():
if project.get('display', True):
github_repo = github.get_repo(project['organization'] + '/' + name)
stars = github_repo.stargazers_count
descr = render_markdown(project['description'])
projects.append(Project(name, project['organization'], descr, project['maintainers'], stars))
num_contrib = github.get_repo('leanprover-community/mathlib').get_contributors(anon=True).totalCount
projects.sort(key = lambda p: p.stars, reverse=True)
urllib.request.urlretrieve(
'https://leanprover-contrib.github.io/leanprover-contrib/version_history.yml',
DATA/'project_history.yaml')
with (DATA/'project_history.yaml').open('r', encoding='utf-8') as h_file:
project_history = yaml.safe_load(h_file)
bib = pybtex.database.parse_file('lean.bib')
about_lean_dic = {}
about_mathlib_dic = {}
formalization_papers_dic = {}
for key, data in bib.entries.items():
if 'tags' not in data.fields:
continue
tags = list(map(str.strip, data.fields.get('tags', '').split(',')))
data.fields['tags'] = tags
if 'about-lean' in tags:
data.fields['tags'].remove('about-lean')
about_lean_dic[key] = data
if 'about-mathlib' in tags:
data.fields['tags'].remove('about-mathlib')
about_mathlib_dic[key] = data
if 'formalization' in tags:
data.fields['tags'].remove('formalization')
formalization_papers_dic[key] = data
if 'link' in data.fields:
url = data.fields['link'][5:-1]
elif 'url' in data.fields:
url = data.fields['url']
elif 'eprint' in data.fields:
eprint = data.fields['eprint']
if eprint.startswith('arXiv:'):
url = 'https://arxiv.org/abs/'+eprint[6:]
elif (('archivePrefix' in data.fields and data.fields['archivePrefix'] == 'arXiv') or
('eprinttype' in data.fields and data.fields['eprinttype'] == 'arXiv')):
url = 'https://arxiv.org/abs/'+eprint
else:
url = eprint
else:
raise ValueError(f"Couldn't find a url for bib item {key}")
if url.startswith(r'\url'):
url = url[4:].strip('{}')
url = url.replace(r'\_', '_')
if 'journal' in data.fields and data.fields['journal'] != 'CoRR':
journal = data.fields['journal']
elif 'booktitle' in data.fields:
journal = data.fields['booktitle']
else:
journal = None
data.fields['url'] = url
data.fields['journal'] = journal
paper_lists = [('Papers about Lean',
sorted(about_lean_dic.values(),
key=lambda e: e.fields['year'],
reverse=True)),
('Papers about mathlib',
sorted(about_mathlib_dic.values(),
key=lambda e: e.fields['year'],
reverse=True)),
('Formalization papers using Lean',
sorted(formalization_papers_dic.values(),
key=lambda e: e.fields['year'],
reverse=True))]
def render_site(target: Path, base_url: str, reloader=False):
default_context = lambda: {
'base_url': base_url,
'menus': menus,
}
def render_content(env, template, **kwargs):
"""Render a markdown template."""
content_template = env.get_template("_markdown.html")
path = Path(template.name)
title = path.with_suffix('').name
(target/path.parent).mkdir(parents=True, exist_ok=True)
content_template.stream(**kwargs).dump(str(target/path.parent/title)+'.html')
def get_contents(template):
src = Path(template.filename).read_text(encoding='utf-8').replace('img/',
base_url+'/img/')
doc = Document(src)
content = render_markdown(src).strip()
title = ''
for child in doc.children:
if isinstance(child, block_token.Heading):
title = child.children[0].content
break
return { 'content': content, 'name': template.name,
'title': title }
def url(raw: str):
return raw if raw.startswith('http') else base_url + raw
latexnodes2text = LatexNodes2Text()
def clean_tex(src: str) -> str:
return latexnodes2text.latex_to_text(src)
subprocess.run(['bibtool', '--preserve.key.case=on', '--preserve.keys=on',
'--delete.field={website}', '--delete.field={tags}', '-s', '-i', 'lean.bib', '-o',
str(target/'lean.bib')])
site = Site.make_site(
searchpath=TEMPLATE_SRC,
outpath=str(target),
extensions=[MarkdownExtension],
rules = [
('.*.md', render_content)],
contexts=[
('.*', default_context),
('index.html', {'presentation': presentation,
'what_is': what_is,
'formalizations': formalizations}),
('papers.html', {'paper_lists': paper_lists}),
('100.html', {'hundred_theorems': hundred_theorems}),
('100-missing.html', {'hundred_theorems': hundred_theorems}),
('meet.html', {'community': (DATA/'community.md').read_text(encoding='utf-8')}),
('mathlib-overview.html', {'overviews': overviews, 'theories': theories}),
('undergrad.html', {'overviews': undergrad_overviews}),
('undergrad_todo.html', {'overviews': undergrad_overviews}),
('mathlib_stats.html', {'num_defns': num_defns, 'num_thms': num_thms, 'num_contrib': num_contrib}),
('lean_projects.html', {'projects': projects}),
('events.html', {'old_events': old_events, 'new_events': new_events}),
('teaching/courses.html', {'courses': courses, 'tags': courses_tags}),
('teams.html', {'introduction': (DATA/'teams_intro.md').read_text(encoding='utf-8'), 'teams': teams}),
('.*.md', get_contents)
],
filters={ 'url': url, 'md': render_markdown, 'tex': clean_tex },
mergecontexts=True)
# Now build the individual team pages
(target/'teams').mkdir(exist_ok=True)
env = Environment(loader=FileSystemLoader('templates'))
env.filters={ 'url': url, 'md': render_markdown, 'tex': clean_tex }
team_tpl = env.get_template('_team.html')
for team in teams:
with (target/'teams'/(team.url + '.html')).open('w') as tgt_file:
team_tpl.stream(team=team, menus=menus, base_url=base_url).dump(tgt_file)
for folder in ['css', 'js', 'img', 'papers', str(target/'teams')]:
subprocess.call(['rsync', '-a', folder, str(target).rstrip('/')])
subprocess.call(['rsync', '-a', 'googlef0c00cb4d31b246f.html', str(target).rstrip('/')])
subprocess.call(['rsync', '-a', 'robots.txt', str(target).rstrip('/')])
site.render(use_reloader=reloader)
if __name__ == '__main__':
if '--local' in sys.argv:
base_url = f"file://{(Path(__file__).parent/'build').absolute()}/"
else:
base_url = 'https://leanprover-community.github.io/'
render_site(ROOT/'build', base_url, reloader='--reload' in sys.argv)