Re-work and simplify search plugin

This commit is contained in:
Ben Edgington
2023-01-09 09:48:01 +00:00
parent 297d69e4a3
commit 1d8d0cf9fa
5 changed files with 140 additions and 142 deletions

View File

@@ -1,7 +1,7 @@
const cheerio = require('cheerio')
/*
* Creates a GraphQL node containing data for the local search
* Creates GraphQL nodes containing data for the local search
*/
// Concatenate all text in child nodes while respecting exclusions
@@ -9,15 +9,11 @@ const getText = ($, node, exclude) => {
let text = ''
if ($(node).is(exclude.ignore)) {
return text
}
if (node.type === 'text') {
text += node.data
}
$(node).contents().each(function (i, e) {
$(node).contents().not(exclude.ignore).each(function (i, e) {
text += getText($, e, exclude)
})
@@ -25,44 +21,65 @@ const getText = ($, node, exclude) => {
}
// Recurse until we find an element we want to treat as a chunk, then get all its text content.
const getChunks = ($, node, chunkTypes, exclude) => {
const getChunks = ($, node, chunkTypes, exclude, counts) => {
if (counts === undefined) {
counts = Array(chunkTypes.length).fill(0)
}
const chunks = []
if ($(node).is(exclude.ignore) || $(node).is(exclude.dedup)) {
return chunks
}
for (let idx = 0; idx < chunkTypes.length; idx++) {
chunkTypes.every( (type) => {
const type = chunkTypes[idx]
if ($(node).is(type.query)) {
const tagName = $(node).get(0).tagName
let id = $(node).attr('id')
if ( id === undefined) {
id = tagName + '_' + counts[idx]
$(node).attr('id', id)
++counts[idx]
}
const text = getText($, node, exclude)
if (text !== '') {
chunks.push(
{
type: $(node).get(0).tagName,
type: tagName,
label: type.label,
id: $(node).attr('id'),
id: id,
text: text,
})
}
// Add a node only once
return false
break
}
return true
}
$(node).children().not(exclude.ignore).not(exclude.dedup).each(function (i, e) {
chunks.push(...getChunks($, e, chunkTypes, exclude, counts))
})
$(node).children().each(function (i, e) {
chunks.push(getChunks($, e, chunkTypes, exclude))
})
return chunks
}
return chunks.flat()
const isExcludedFrontmatter = (frontmatter, exclude) => {
for (let i = 0; i < exclude.frontmatter.length; i++) {
const test = exclude.frontmatter[i]
const [key, ...rest] = Object.keys(test)
if (Object.prototype.hasOwnProperty.call(frontmatter, key)
&& frontmatter[key] == test[key]) {
return true
}
}
return false
}
exports.createPages = async (
{
actions,
actions: { createNode },
graphql,
reporter,
createNodeId,
createContentDigest,
}, pluginOptions,
@@ -70,77 +87,83 @@ exports.createPages = async (
const {
enabled = true,
root = '',
chunkTypes = [],
pageFilter = '{}',
exclude = {pages: [], ignore: '', dedup: ''},
exclude = {frontmatter: [], pages: [], ignore: '', dedup: ''},
} = pluginOptions
const mySearchData = []
if (enabled) {
const result = await graphql(`
{
allMarkdownRemark(filter: ${pageFilter}) {
edges {
node {
html
frontmatter {
path
titles
}
const result = await graphql(`
{
allMarkdownRemark {
edges {
node {
html
frontmatter {
path
index
sequence
titles
hide
}
}
}
}
`)
const pages = result.data.allMarkdownRemark.edges
await Promise.all(pages.map(async (page) => {
const frontmatter = page.node.frontmatter
if (frontmatter !== undefined && exclude.pages.indexOf(frontmatter.path) === -1) {
// Get the HTML. This is the contents of `dangerouslySetInnerHTML={{ __html: html }}`
// in the page template.
const $ = cheerio.load(page.node.html, null, false)
// Changes to the HTML AST made here will not persist, but we need to do
// exactly the same as in gatsby-ssr so that our ids end up consistent.
chunkTypes.forEach( (type) => {
$(type.query).not(exclude.ignore).not(exclude.dedup).not('[id]').each( function (i, e) {
$(this).attr('id', $(this).get(0).tagName + '_' + i)
})
})
const chunks = getChunks($, $.root(), chunkTypes, exclude)
mySearchData.push({
path: frontmatter.path,
title: frontmatter.titles.filter(x => x !== '').join(' | '),
chunks: chunks,
})
}
}))
}
name = 'mySearchData'
actions.createNode({
id: createNodeId(name),
data: mySearchData,
internal: {
type: name,
contentDigest: createContentDigest(mySearchData)
}
})
`)
const pages = result.data.allMarkdownRemark.edges
await Promise.all(pages.map(async (page) => {
const $ = cheerio.load(page.node.html, null, false)
const frontmatter = page.node.frontmatter
let chunks = []
if (enabled
&& frontmatter !== undefined
&& isExcludedFrontmatter(frontmatter, exclude) === false
&& exclude.pages.indexOf(frontmatter.path) === -1) {
chunks = getChunks($, $.root(), chunkTypes, exclude)
}
// It seems to be hard to modify the underlying MarkdownRemark node's HTML, so we add
// the modified HTML to a new node and deal with it in the page template.
const nodeData = {
frontmatter: {
path: frontmatter.path,
index: frontmatter.index,
titles: frontmatter.titles,
sequence: frontmatter.sequence,
},
chunks: chunks,
html: $.html(),
}
createNode({
...nodeData,
id: createNodeId(nodeData.frontmatter.path),
internal: {
type: 'mySearchData',
contentDigest: createContentDigest(nodeData)
}
})
}))
}
exports.createSchemaCustomization = ({ actions: { createTypes } }) => {
createTypes(`
type Frontmatter {
path: String!
index: [Int]
titles: [String]
sequence: Int
}
type mySearchData implements Node {
data: JSON
frontmatter: Frontmatter!
chunks: JSON
html: String
}
`)
}