diff --git a/gatsby-config.js b/gatsby-config.js index 60c69b6..9e83aa5 100644 --- a/gatsby-config.js +++ b/gatsby-config.js @@ -91,8 +91,6 @@ module.exports = { resolve: 'my-search-index', options: { enabled: true, - // Query that matches the element via which the HTML is included in the page template. - root: 'main', // Matching elements have their text added to the index. First match wins. chunkTypes: [ {query: 'figcaption', label: 'Figure caption'}, @@ -103,14 +101,14 @@ module.exports = { {query: 'h3, h4, h5, h6', label: 'Heading'}, {query: 'p', label: 'Paragraph'}, ], - // Note, only pages under src/md/pages have a "hide" property. - pageFilter: '{frontmatter: {hide: {eq: false}}}', exclude: { - // Speed up the build (these are excluded from the index by pageFilter, anyway). - pages: ['/404.html', '/annotated-spec/', '/contact/', '/contents/', '/search/', '/'], - // Elements matching this query are ignored completely, including their text. + // Note, only pages under src/md/pages have a "hide" property. + frontmatter: [{hide: true}, {hide: null}], + // The frontmatter filter takes care of excluding a good set of pages for now. + pages: [], + // Elements matching this query are ignored completely, including their text: ignore: 'svg *, details *, mtable *, mrow *, [aria-hidden="true"] *, .footnote-ref', - // Chunks matching this query are excluded as duplicates (to handle nested matches). + // Chunks matching this query are excluded as duplicates (to handle nested matches): dedup: '[id^="fn-"] *, figcaption *, li *', } }, diff --git a/plugins/my-search-index/gatsby-node.js b/plugins/my-search-index/gatsby-node.js index 23c2ffd..cffbb04 100644 --- a/plugins/my-search-index/gatsby-node.js +++ b/plugins/my-search-index/gatsby-node.js @@ -1,7 +1,7 @@ const cheerio = require('cheerio') /* - * Creates a GraphQL node containing data for the local search + * Creates GraphQL nodes containing data for the local search */ // Concatenate all text in child nodes while respecting exclusions @@ -9,15 +9,11 @@ const getText = ($, node, exclude) => { let text = '' - if ($(node).is(exclude.ignore)) { - return text - } - if (node.type === 'text') { text += node.data } - $(node).contents().each(function (i, e) { + $(node).contents().not(exclude.ignore).each(function (i, e) { text += getText($, e, exclude) }) @@ -25,44 +21,65 @@ const getText = ($, node, exclude) => { } // Recurse until we find an element we want to treat as a chunk, then get all its text content. -const getChunks = ($, node, chunkTypes, exclude) => { +const getChunks = ($, node, chunkTypes, exclude, counts) => { + + if (counts === undefined) { + counts = Array(chunkTypes.length).fill(0) + } const chunks = [] - if ($(node).is(exclude.ignore) || $(node).is(exclude.dedup)) { - return chunks - } + for (let idx = 0; idx < chunkTypes.length; idx++) { - chunkTypes.every( (type) => { + const type = chunkTypes[idx] if ($(node).is(type.query)) { + + const tagName = $(node).get(0).tagName + let id = $(node).attr('id') + if ( id === undefined) { + id = tagName + '_' + counts[idx] + $(node).attr('id', id) + ++counts[idx] + } + const text = getText($, node, exclude) if (text !== '') { chunks.push( { - type: $(node).get(0).tagName, + type: tagName, label: type.label, - id: $(node).attr('id'), + id: id, text: text, }) } - // Add a node only once - return false + break } - return true + } + + $(node).children().not(exclude.ignore).not(exclude.dedup).each(function (i, e) { + chunks.push(...getChunks($, e, chunkTypes, exclude, counts)) }) - $(node).children().each(function (i, e) { - chunks.push(getChunks($, e, chunkTypes, exclude)) - }) + return chunks +} - return chunks.flat() +const isExcludedFrontmatter = (frontmatter, exclude) => { + + for (let i = 0; i < exclude.frontmatter.length; i++) { + const test = exclude.frontmatter[i] + const [key, ...rest] = Object.keys(test) + if (Object.prototype.hasOwnProperty.call(frontmatter, key) + && frontmatter[key] == test[key]) { + return true + } + } + return false } exports.createPages = async ( { - actions, + actions: { createNode }, graphql, - reporter, createNodeId, createContentDigest, }, pluginOptions, @@ -70,77 +87,83 @@ exports.createPages = async ( const { enabled = true, - root = '', chunkTypes = [], - pageFilter = '{}', - exclude = {pages: [], ignore: '', dedup: ''}, + exclude = {frontmatter: [], pages: [], ignore: '', dedup: ''}, } = pluginOptions - const mySearchData = [] - - if (enabled) { - - const result = await graphql(` - { - allMarkdownRemark(filter: ${pageFilter}) { - edges { - node { - html - frontmatter { - path - titles - } + const result = await graphql(` + { + allMarkdownRemark { + edges { + node { + html + frontmatter { + path + index + sequence + titles + hide } } } } - `) - - const pages = result.data.allMarkdownRemark.edges - - await Promise.all(pages.map(async (page) => { - - const frontmatter = page.node.frontmatter - if (frontmatter !== undefined && exclude.pages.indexOf(frontmatter.path) === -1) { - - // Get the HTML. This is the contents of `dangerouslySetInnerHTML={{ __html: html }}` - // in the page template. - const $ = cheerio.load(page.node.html, null, false) - - // Changes to the HTML AST made here will not persist, but we need to do - // exactly the same as in gatsby-ssr so that our ids end up consistent. - chunkTypes.forEach( (type) => { - $(type.query).not(exclude.ignore).not(exclude.dedup).not('[id]').each( function (i, e) { - $(this).attr('id', $(this).get(0).tagName + '_' + i) - }) - }) - - const chunks = getChunks($, $.root(), chunkTypes, exclude) - - mySearchData.push({ - path: frontmatter.path, - title: frontmatter.titles.filter(x => x !== '').join(' | '), - chunks: chunks, - }) - } - })) - } - - name = 'mySearchData' - actions.createNode({ - id: createNodeId(name), - data: mySearchData, - internal: { - type: name, - contentDigest: createContentDigest(mySearchData) } - }) + `) + + const pages = result.data.allMarkdownRemark.edges + + await Promise.all(pages.map(async (page) => { + + const $ = cheerio.load(page.node.html, null, false) + + const frontmatter = page.node.frontmatter + let chunks = [] + + if (enabled + && frontmatter !== undefined + && isExcludedFrontmatter(frontmatter, exclude) === false + && exclude.pages.indexOf(frontmatter.path) === -1) { + + chunks = getChunks($, $.root(), chunkTypes, exclude) + } + + // It seems to be hard to modify the underlying MarkdownRemark node's HTML, so we add + // the modified HTML to a new node and deal with it in the page template. + const nodeData = { + frontmatter: { + path: frontmatter.path, + index: frontmatter.index, + titles: frontmatter.titles, + sequence: frontmatter.sequence, + }, + chunks: chunks, + html: $.html(), + } + + createNode({ + ...nodeData, + id: createNodeId(nodeData.frontmatter.path), + internal: { + type: 'mySearchData', + contentDigest: createContentDigest(nodeData) + } + }) + })) } exports.createSchemaCustomization = ({ actions: { createTypes } }) => { createTypes(` + type Frontmatter { + path: String! + index: [Int] + titles: [String] + sequence: Int + } + type mySearchData implements Node { - data: JSON + frontmatter: Frontmatter! + chunks: JSON + html: String } `) } diff --git a/plugins/my-search-index/gatsby-ssr.js b/plugins/my-search-index/gatsby-ssr.js deleted file mode 100644 index 505d404..0000000 --- a/plugins/my-search-index/gatsby-ssr.js +++ /dev/null @@ -1,33 +0,0 @@ -const { renderToString } = require('react-dom/server') -const cheerio = require('cheerio') - -/* - * Adds ID anchors to all elements that might appear in the local search - */ - -exports.replaceRenderer = ({ pathname, bodyComponent, replaceBodyHTMLString }, pluginOptions) => { - - const { - enabled = true, - root = 'body', - chunkTypes = [], - exclude = {pages: [], ignore: '', dedup: ''}, - } = pluginOptions - - if (enabled && exclude.pages.indexOf(pathname) == -1) { - - // Get the HTML - const html = renderToString(bodyComponent) - const $ = cheerio.load(html, null, false) - - // Modify the HTML - add id attributes where required. - chunkTypes.forEach( (type) => { - $(root + ' *').filter(type.query).not(exclude.ignore).not(exclude.dedup).not('[id]').each( function (i, e) { - $(this).attr('id', $(this).get(0).tagName + '_' + i) - }) - }) - - // Replace the HTML - replaceBodyHTMLString($.html()) - } -} diff --git a/src/components/search.js b/src/components/search.js index 62ab513..a35b6a8 100644 --- a/src/components/search.js +++ b/src/components/search.js @@ -14,15 +14,16 @@ const getSearchResults = (query, data) => { } // Match the starts of words only. The "d" flag gives us the matching indices. - const regex = RegExp('(^|\\W|_)' + escapeRegExp(query.searchText), 'gd' + (query.isCaseSensitive ? '' : 'i')) + const regex = RegExp('(^|\\W|_)' + escapeRegExp(query.searchText), + 'gd' + (query.isCaseSensitive ? '' : 'i')) - const result = data.map( (page) => { + const result = data.map( ({ node }) => { let score = 0 const matches = [] - for (let i = 0; i < page.chunks?.length; i++) { + for (let i = 0; i < node.chunks?.length; i++) { - let chunk = page.chunks[i] + let chunk = node.chunks[i] let match const indices = [] while ((match = regex.exec(chunk.text)) !== null) { @@ -44,27 +45,35 @@ const getSearchResults = (query, data) => { } return matches.length === 0 ? null : { - url: page.path, - title: page.title, + url: node.frontmatter.path, + title: node.frontmatter.titles.filter(x => x).join(' | '), matches: matches, score: score, } }) - return result.filter(x => x !== null).sort((a, b) => (b.score - a.score)) + return result.filter(x => x).sort((a, b) => (b.score - a.score)) } const Search = () => { const queryData = useStaticQuery(graphql` query { - mySearchData { - data + allMySearchData { + edges { + node { + frontmatter { + path + titles + } + chunks + } + } } } `) - const searchData = queryData.mySearchData.data + const searchData = queryData.allMySearchData.edges const [searchQuery, setQuery] = React.useState({ searchText: '', @@ -82,7 +91,7 @@ const Search = () => { return { ...previousState, isCaseSensitive: !previousState.isCaseSensitive } }); } - + const results = getSearchResults(searchQuery, searchData) const pages = results.map((result) => { @@ -93,7 +102,7 @@ const Search = () => { {match.text.substring(indices[0], indices[1])} , - (i === match.indices.length -1) ? match.text.substring(indices[1]) : '', + (i === match.indices.length - 1) ? match.text.substring(indices[1]) : '', ] }) return ( @@ -141,7 +150,7 @@ const Search = () => { -