forked from github/docs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsync-search-indices.js
executable file
·149 lines (130 loc) · 4.71 KB
/
sync-search-indices.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#!/usr/bin/env node
// [start-readme]
//
// This script is run automatically via GitHub Actions on every push to `main` to generate searchable data.
// It can also be run manually. For more info see [contributing/search.md](contributing/search.md)
//
// [end-readme]
import assert from 'assert'
import path from 'path'
import { program, Option } from 'commander'
import { languageKeys } from '../../lib/languages.js'
import { allVersions } from '../../lib/all-versions.js'
import searchSync from './sync.js'
const shortNames = Object.fromEntries(
Object.values(allVersions).map((info) => {
const shortName = info.hasNumberedReleases
? info.miscBaseName + info.currentRelease
: info.miscBaseName
return [shortName, info]
})
)
const allVersionKeys = [...Object.keys(shortNames), ...Object.keys(allVersions)]
const DEFAULT_OUT_DIRECTORY = path.join('lib', 'search', 'indexes')
program
.description('Creates search records (and Lunr indexes) by scraping')
.option('-v, --verbose', 'Verbose outputs')
.addOption(new Option('-V, --version <VERSION>', 'Specific versions').choices(allVersionKeys))
.addOption(
new Option('-l, --language <LANGUAGE>', 'Which languages to focus on').choices(languageKeys)
)
.addOption(
new Option('--not-language <LANGUAGE>', 'Specific language to omit').choices(languageKeys)
)
.option('-d, --dry-run', 'Does not write to disk')
.option(
'-o, --out-directory <DIRECTORY>',
`Where to dump the created files (default ${DEFAULT_OUT_DIRECTORY})`
)
.option('--no-compression', `Do not Brotli compress the created .json files (default false)`)
// Once we've fully removed all Lunr indexing code, we can remove this option
// and change where it's used to be that the default is to not generate
// any Lunr indexes.
.option('--no-lunr-index', `Do not generate a Lunr index, just the records file (default false)`)
.option('--no-markers', 'Do not print a marker for each parsed document')
.option('--filter <MATCH>', 'Filter to only do pages that match this string')
.parse(process.argv)
main(program.opts())
async function main(opts) {
let language
if ('language' in opts) {
language = opts.language
if (process.env.LANGUAGE) {
console.warn(
`'language' specified as argument ('${language}') AND environment variable ('${process.env.LANGUAGE}')`
)
}
} else {
if (process.env.LANGUAGE && process.env.LANGUAGE !== 'all') {
language = process.env.LANGUAGE
if (!languageKeys.includes(language)) {
throw new Error(
`Environment variable 'VERSION' (${language}) is not recognized. Must be one of ${languageKeys}`
)
}
}
}
const notLanguage = opts.notLanguage
if (notLanguage && language) {
throw new Error("Can't specify --language *and* --not-language")
}
let version
if ('version' in opts) {
version = opts.version
if (process.env.VERSION) {
console.warn(
`'version' specified as argument ('${version}') AND environment variable ('${process.env.VERSION}')`
)
}
} else {
if (process.env.VERSION && process.env.VERSION !== 'all') {
version = process.env.VERSION
if (!allVersionKeys.includes(version)) {
throw new Error(
`Environment variable 'VERSION' (${version}) is not recognized. Must be one of ${allVersionKeys}`
)
}
}
}
// A `--version` or `process.env.VERSION` was specified, we need to convert
// it to the long name. I.e. `free-pro-team@latest`. Not `dotcom`.
// But it could also have beeb specified as `all` which means that `version`
// here ill be `undefined` which is also OK.
// const indexVersion = shortNames[version].hasNumberedReleases
// ? shortNames[version].currentRelease
// : shortNames[version].miscBaseName
let indexVersion
if (version && version !== 'all') {
// If it has been specified, it needs to be in the "long-form".
// I.e. `[email protected]` not `ghes-3.5`.
indexVersion = version in shortNames ? shortNames[version].version : version
}
assert(
!indexVersion || indexVersion in allVersions,
`version must be undefined or one of ${Object.keys(allVersions)}`
)
let dryRun = false
if ('dryRun' in opts) {
dryRun = opts.dryRun
} else {
dryRun = Boolean(JSON.parse(process.env.DRY_RUN || 'false'))
}
const outDirectory = opts.outDirectory || DEFAULT_OUT_DIRECTORY
const compressFiles = !!opts.compression
const generateLunrIndex = !!opts.lunrIndex
const config = {
noMarkers: !opts.markers,
filter: opts.filter,
}
const options = {
dryRun,
language,
notLanguage,
version: indexVersion,
outDirectory,
compressFiles,
generateLunrIndex,
config,
}
await searchSync(options)
}