Re-work and simplify search plugin

This commit is contained in:
Ben Edgington
2023-01-09 09:48:01 +00:00
parent 297d69e4a3
commit 1d8d0cf9fa
5 changed files with 140 additions and 142 deletions

View File

@@ -91,8 +91,6 @@ module.exports = {
resolve: 'my-search-index',
options: {
enabled: true,
// Query that matches the element via which the HTML is included in the page template.
root: 'main',
// Matching elements have their text added to the index. First match wins.
chunkTypes: [
{query: 'figcaption', label: 'Figure caption'},
@@ -103,14 +101,14 @@ module.exports = {
{query: 'h3, h4, h5, h6', label: 'Heading'},
{query: 'p', label: 'Paragraph'},
],
// Note, only pages under src/md/pages have a "hide" property.
pageFilter: '{frontmatter: {hide: {eq: false}}}',
exclude: {
// Speed up the build (these are excluded from the index by pageFilter, anyway).
pages: ['/404.html', '/annotated-spec/', '/contact/', '/contents/', '/search/', '/'],
// Elements matching this query are ignored completely, including their text.
// Note, only pages under src/md/pages have a "hide" property.
frontmatter: [{hide: true}, {hide: null}],
// The frontmatter filter takes care of excluding a good set of pages for now.
pages: [],
// Elements matching this query are ignored completely, including their text:
ignore: 'svg *, details *, mtable *, mrow *, [aria-hidden="true"] *, .footnote-ref',
// Chunks matching this query are excluded as duplicates (to handle nested matches).
// Chunks matching this query are excluded as duplicates (to handle nested matches):
dedup: '[id^="fn-"] *, figcaption *, li *',
}
},

View File

@@ -1,7 +1,7 @@
const cheerio = require('cheerio')
/*
* Creates a GraphQL node containing data for the local search
* Creates GraphQL nodes containing data for the local search
*/
// Concatenate all text in child nodes while respecting exclusions
@@ -9,15 +9,11 @@ const getText = ($, node, exclude) => {
let text = ''
if ($(node).is(exclude.ignore)) {
return text
}
if (node.type === 'text') {
text += node.data
}
$(node).contents().each(function (i, e) {
$(node).contents().not(exclude.ignore).each(function (i, e) {
text += getText($, e, exclude)
})
@@ -25,44 +21,65 @@ const getText = ($, node, exclude) => {
}
// Recurse until we find an element we want to treat as a chunk, then get all its text content.
const getChunks = ($, node, chunkTypes, exclude) => {
const getChunks = ($, node, chunkTypes, exclude, counts) => {
if (counts === undefined) {
counts = Array(chunkTypes.length).fill(0)
}
const chunks = []
if ($(node).is(exclude.ignore) || $(node).is(exclude.dedup)) {
return chunks
}
for (let idx = 0; idx < chunkTypes.length; idx++) {
chunkTypes.every( (type) => {
const type = chunkTypes[idx]
if ($(node).is(type.query)) {
const tagName = $(node).get(0).tagName
let id = $(node).attr('id')
if ( id === undefined) {
id = tagName + '_' + counts[idx]
$(node).attr('id', id)
++counts[idx]
}
const text = getText($, node, exclude)
if (text !== '') {
chunks.push(
{
type: $(node).get(0).tagName,
type: tagName,
label: type.label,
id: $(node).attr('id'),
id: id,
text: text,
})
}
// Add a node only once
return false
break
}
return true
}
$(node).children().not(exclude.ignore).not(exclude.dedup).each(function (i, e) {
chunks.push(...getChunks($, e, chunkTypes, exclude, counts))
})
$(node).children().each(function (i, e) {
chunks.push(getChunks($, e, chunkTypes, exclude))
})
return chunks
}
return chunks.flat()
const isExcludedFrontmatter = (frontmatter, exclude) => {
for (let i = 0; i < exclude.frontmatter.length; i++) {
const test = exclude.frontmatter[i]
const [key, ...rest] = Object.keys(test)
if (Object.prototype.hasOwnProperty.call(frontmatter, key)
&& frontmatter[key] == test[key]) {
return true
}
}
return false
}
exports.createPages = async (
{
actions,
actions: { createNode },
graphql,
reporter,
createNodeId,
createContentDigest,
}, pluginOptions,
@@ -70,77 +87,83 @@ exports.createPages = async (
const {
enabled = true,
root = '',
chunkTypes = [],
pageFilter = '{}',
exclude = {pages: [], ignore: '', dedup: ''},
exclude = {frontmatter: [], pages: [], ignore: '', dedup: ''},
} = pluginOptions
const mySearchData = []
if (enabled) {
const result = await graphql(`
{
allMarkdownRemark(filter: ${pageFilter}) {
edges {
node {
html
frontmatter {
path
titles
}
const result = await graphql(`
{
allMarkdownRemark {
edges {
node {
html
frontmatter {
path
index
sequence
titles
hide
}
}
}
}
`)
const pages = result.data.allMarkdownRemark.edges
await Promise.all(pages.map(async (page) => {
const frontmatter = page.node.frontmatter
if (frontmatter !== undefined && exclude.pages.indexOf(frontmatter.path) === -1) {
// Get the HTML. This is the contents of `dangerouslySetInnerHTML={{ __html: html }}`
// in the page template.
const $ = cheerio.load(page.node.html, null, false)
// Changes to the HTML AST made here will not persist, but we need to do
// exactly the same as in gatsby-ssr so that our ids end up consistent.
chunkTypes.forEach( (type) => {
$(type.query).not(exclude.ignore).not(exclude.dedup).not('[id]').each( function (i, e) {
$(this).attr('id', $(this).get(0).tagName + '_' + i)
})
})
const chunks = getChunks($, $.root(), chunkTypes, exclude)
mySearchData.push({
path: frontmatter.path,
title: frontmatter.titles.filter(x => x !== '').join(' | '),
chunks: chunks,
})
}
}))
}
name = 'mySearchData'
actions.createNode({
id: createNodeId(name),
data: mySearchData,
internal: {
type: name,
contentDigest: createContentDigest(mySearchData)
}
})
`)
const pages = result.data.allMarkdownRemark.edges
await Promise.all(pages.map(async (page) => {
const $ = cheerio.load(page.node.html, null, false)
const frontmatter = page.node.frontmatter
let chunks = []
if (enabled
&& frontmatter !== undefined
&& isExcludedFrontmatter(frontmatter, exclude) === false
&& exclude.pages.indexOf(frontmatter.path) === -1) {
chunks = getChunks($, $.root(), chunkTypes, exclude)
}
// It seems to be hard to modify the underlying MarkdownRemark node's HTML, so we add
// the modified HTML to a new node and deal with it in the page template.
const nodeData = {
frontmatter: {
path: frontmatter.path,
index: frontmatter.index,
titles: frontmatter.titles,
sequence: frontmatter.sequence,
},
chunks: chunks,
html: $.html(),
}
createNode({
...nodeData,
id: createNodeId(nodeData.frontmatter.path),
internal: {
type: 'mySearchData',
contentDigest: createContentDigest(nodeData)
}
})
}))
}
exports.createSchemaCustomization = ({ actions: { createTypes } }) => {
createTypes(`
type Frontmatter {
path: String!
index: [Int]
titles: [String]
sequence: Int
}
type mySearchData implements Node {
data: JSON
frontmatter: Frontmatter!
chunks: JSON
html: String
}
`)
}

View File

@@ -1,33 +0,0 @@
const { renderToString } = require('react-dom/server')
const cheerio = require('cheerio')
/*
* Adds ID anchors to all elements that might appear in the local search
*/
exports.replaceRenderer = ({ pathname, bodyComponent, replaceBodyHTMLString }, pluginOptions) => {
const {
enabled = true,
root = 'body',
chunkTypes = [],
exclude = {pages: [], ignore: '', dedup: ''},
} = pluginOptions
if (enabled && exclude.pages.indexOf(pathname) == -1) {
// Get the HTML
const html = renderToString(bodyComponent)
const $ = cheerio.load(html, null, false)
// Modify the HTML - add id attributes where required.
chunkTypes.forEach( (type) => {
$(root + ' *').filter(type.query).not(exclude.ignore).not(exclude.dedup).not('[id]').each( function (i, e) {
$(this).attr('id', $(this).get(0).tagName + '_' + i)
})
})
// Replace the HTML
replaceBodyHTMLString($.html())
}
}

View File

@@ -14,15 +14,16 @@ const getSearchResults = (query, data) => {
}
// Match the starts of words only. The "d" flag gives us the matching indices.
const regex = RegExp('(^|\\W|_)' + escapeRegExp(query.searchText), 'gd' + (query.isCaseSensitive ? '' : 'i'))
const regex = RegExp('(^|\\W|_)' + escapeRegExp(query.searchText),
'gd' + (query.isCaseSensitive ? '' : 'i'))
const result = data.map( (page) => {
const result = data.map( ({ node }) => {
let score = 0
const matches = []
for (let i = 0; i < page.chunks?.length; i++) {
for (let i = 0; i < node.chunks?.length; i++) {
let chunk = page.chunks[i]
let chunk = node.chunks[i]
let match
const indices = []
while ((match = regex.exec(chunk.text)) !== null) {
@@ -44,27 +45,35 @@ const getSearchResults = (query, data) => {
}
return matches.length === 0 ? null : {
url: page.path,
title: page.title,
url: node.frontmatter.path,
title: node.frontmatter.titles.filter(x => x).join(' | '),
matches: matches,
score: score,
}
})
return result.filter(x => x !== null).sort((a, b) => (b.score - a.score))
return result.filter(x => x).sort((a, b) => (b.score - a.score))
}
const Search = () => {
const queryData = useStaticQuery(graphql`
query {
mySearchData {
data
allMySearchData {
edges {
node {
frontmatter {
path
titles
}
chunks
}
}
}
}
`)
const searchData = queryData.mySearchData.data
const searchData = queryData.allMySearchData.edges
const [searchQuery, setQuery] = React.useState({
searchText: '',
@@ -82,7 +91,7 @@ const Search = () => {
return { ...previousState, isCaseSensitive: !previousState.isCaseSensitive }
});
}
const results = getSearchResults(searchQuery, searchData)
const pages = results.map((result) => {
@@ -93,7 +102,7 @@ const Search = () => {
<span className='match-text'>
{match.text.substring(indices[0], indices[1])}
</span>,
(i === match.indices.length -1) ? match.text.substring(indices[1]) : '',
(i === match.indices.length - 1) ? match.text.substring(indices[1]) : '',
]
})
return (
@@ -141,7 +150,7 @@ const Search = () => {
<label htmlFor="is-case-sensitive">Case sensitive</label>
</span>
</div>
<div id="search-results">
<div id="search-results">
{results.length > 0 ? (
<ul>
{pages}

View File

@@ -17,14 +17,14 @@ import "../css/page.css"
export function Head({ data }) {
const { markdownRemark, site } = data
const { frontmatter } = markdownRemark
const { mySearchData, site } = data
const frontmatter = mySearchData.frontmatter
const indexArray = frontmatter.index
var pageTitle = site.siteMetadata.title
if (frontmatter.titles !== null) {
const titles = frontmatter.titles.filter(x => x !== "")
const titles = frontmatter.titles.filter(x => x !== '')
const number = (indexArray.length >= 2) ? indexArray.join('.') : ''
pageTitle += ' | ' + number + ' ' + titles[titles.length - 1]
}
@@ -36,16 +36,17 @@ export function Head({ data }) {
export default function Template({ data }) {
const { html, frontmatter } = data.markdownRemark
const { html, frontmatter } = data.mySearchData
const indexArray = frontmatter.index
const path = frontmatter.path
const pageExtras = frontmatter.path.startsWith('/search')
const pageExtras = path.startsWith('/search')
? <Search />
: <Subsections indexArray={indexArray} />
return (
<>
<Banner path={frontmatter.path} />
<Banner path={path} />
<div id="page">
<Sidebar index={frontmatter.index} />
<div id="main-content">
@@ -58,7 +59,7 @@ export default function Template({ data }) {
<Footer />
<PrevNext seq={frontmatter.sequence} />
</div>
<PageNavi path={frontmatter.path} />
<PageNavi path={path} />
<FootnoteTooltips />
</div>
<PrintScripts />
@@ -68,7 +69,7 @@ export default function Template({ data }) {
export const pageQuery = graphql`
query($path: String!) {
markdownRemark(frontmatter: { path: { eq: $path } }) {
mySearchData(frontmatter: { path: { eq: $path } }) {
frontmatter {
index
path