consul/website/scripts/index_search_content.js

126 lines
3.1 KiB
JavaScript
Raw Normal View History

2020-06-29 12:14:43 -04:00
require('dotenv').config()
const algoliasearch = require('algoliasearch')
const glob = require('glob')
const matter = require('gray-matter')
const path = require('path')
2020-07-06 10:05:26 -04:00
const remark = require('remark')
const visit = require('unist-util-visit')
2020-06-29 12:14:43 -04:00
// In addition to the content of the page,
// define additional front matter attributes that will be search-indexable
const SEARCH_DIMENSIONS = ['page_title', 'description']
main()
async function main() {
const pagesFolder = path.join(__dirname, '../pages')
// Grab all search-indexable content and format for Algolia
2020-07-06 10:05:26 -04:00
const searchObjects = await Promise.all(
glob.sync(path.join(pagesFolder, '**/*.mdx')).map(async (fullPath) => {
2020-06-29 12:14:43 -04:00
const { content, data } = matter.read(fullPath)
2020-07-06 10:05:26 -04:00
const searchableDimensions = SEARCH_DIMENSIONS.reduce(
(acc, dimension) => {
return { ...acc, [dimension]: data[dimension] }
},
{}
)
const headings = await collectHeadings(content)
2020-06-29 12:14:43 -04:00
// Get path relative to `pages`
const __resourcePath = fullPath.replace(`${pagesFolder}/`, '')
// Use clean URL for Algolia id
const objectID = __resourcePath.replace('.mdx', '')
return {
...searchableDimensions,
2020-07-06 10:05:26 -04:00
headings,
2020-06-29 12:14:43 -04:00
objectID,
}
})
2020-07-06 10:05:26 -04:00
)
2020-06-29 12:14:43 -04:00
try {
await indexSearchContent(searchObjects)
} catch (e) {
console.error(e)
process.exit(1)
}
}
async function indexSearchContent(objects) {
const {
NEXT_PUBLIC_ALGOLIA_APP_ID: appId,
NEXT_PUBLIC_ALGOLIA_INDEX: index,
ALGOLIA_API_KEY: apiKey,
} = process.env
if (!apiKey || !appId || !index) {
throw new Error(
`[*** Algolia Search Indexing Error ***] Received: ALGOLIA_API_KEY=${apiKey} ALGOLIA_APP_ID=${appId} ALGOLIA_INDEX=${index} \n Please ensure all Algolia Search-related environment vars are set in CI settings.`
)
}
console.log(`updating ${objects.length} indices...`)
try {
const searchClient = algoliasearch(appId, apiKey)
const searchIndex = searchClient.initIndex(index)
2020-07-06 10:05:26 -04:00
const { objectIDs } = await searchIndex.partialUpdateObjects(objects, {
2020-06-29 12:14:43 -04:00
createIfNotExists: true,
})
2020-07-06 10:05:26 -04:00
let staleIds = []
2020-06-29 12:14:43 -04:00
await searchIndex.browseObjects({
query: '',
batch: (batch) => {
2020-07-06 10:05:26 -04:00
staleIds = staleIds.concat(
batch
.filter(({ objectID }) => !objectIDs.includes(objectID))
.map(({ objectID }) => objectID)
2020-06-29 12:14:43 -04:00
)
},
})
if (staleIds.length > 0) {
console.log(`deleting ${staleIds.length} stale indices:`)
console.log(staleIds)
await searchIndex.deleteObjects(staleIds)
}
console.log('done')
process.exit(0)
} catch (error) {
throw new Error(error)
}
}
2020-07-06 10:05:26 -04:00
async function collectHeadings(mdxContent) {
const headings = []
const headingMapper = () => (tree) => {
visit(tree, 'heading', (node) => {
const title = node.children.reduce((m, n) => {
if (n.value) m += n.value
return m
}, '')
// Only include level 1 or level 2 headings
if (node.depth < 3) {
headings.push(title)
}
})
}
return remark()
.use(headingMapper)
.process(mdxContent)
.then(() => headings)
}