mirror of
https://github.com/meteor/meteor.git
synced 2026-05-02 03:01:46 -04:00
117 lines
3.8 KiB
JavaScript
117 lines
3.8 KiB
JavaScript
// algolia crawler -> go to algolia [crawler dashboard](https://crawler.algolia.com/) if you want to
|
|
// apply any changes to the crawler
|
|
// this is maintained here so that we can have a backup of the crawler configuration and
|
|
// git history of the changes made to the crawler configuration
|
|
|
|
// This was based onhttps://v3-docs.meteor.com/
|
|
new Crawler({
|
|
appId: "....",
|
|
apiKey: "....",
|
|
rateLimit: 8,
|
|
maxDepth: 10,
|
|
startUrls: ["https://v3-docs.meteor.com/"],
|
|
renderJavaScript: false,
|
|
sitemaps: ["https://v3-docs.meteor.com/sitemap.xml"],
|
|
exclusionPatterns: [],
|
|
ignoreCanonicalTo: false,
|
|
discoveryPatterns: ["https://v3-docs.meteor.com/**"],
|
|
schedule: "at 05:10 on Saturday",
|
|
actions: [
|
|
{
|
|
indexName: "meteor_docs_v3",
|
|
pathsToMatch: ["https://v3-docs.meteor.com/**"],
|
|
recordExtractor: ({ $, helpers }) => {
|
|
return helpers.docsearch({
|
|
recordProps: {
|
|
lvl0: {
|
|
selectors: "",
|
|
defaultValue: "Documentation",
|
|
},
|
|
lvl1: ".content h1",
|
|
lvl2: ".content h2",
|
|
lvl3: ".content h3",
|
|
lvl4: ".content h4",
|
|
lvl5: ".content h5",
|
|
content: ".content p, .content li",
|
|
lang: {
|
|
defaultValue: ['en','en-US'],
|
|
},
|
|
language: {
|
|
defaultValue: ['en','en-US'],
|
|
},
|
|
},
|
|
indexHeadings: true,
|
|
aggregateContent: true,
|
|
recordVersion: 'v3',
|
|
});
|
|
},
|
|
},
|
|
],
|
|
initialIndexSettings: {
|
|
meteor_docs_v3: {
|
|
attributesForFaceting: ["type", "lang"],
|
|
attributesToRetrieve: ["hierarchy", "content", "anchor", "url"],
|
|
attributesToHighlight: ["hierarchy", "hierarchy_camel", "content"],
|
|
attributesToSnippet: ["content:10"],
|
|
camelCaseAttributes: ["hierarchy", "hierarchy_radio", "content"],
|
|
searchableAttributes: [
|
|
"unordered(hierarchy_radio_camel.lvl0)",
|
|
"unordered(hierarchy_radio.lvl0)",
|
|
"unordered(hierarchy_radio_camel.lvl1)",
|
|
"unordered(hierarchy_radio.lvl1)",
|
|
"unordered(hierarchy_radio_camel.lvl2)",
|
|
"unordered(hierarchy_radio.lvl2)",
|
|
"unordered(hierarchy_radio_camel.lvl3)",
|
|
"unordered(hierarchy_radio.lvl3)",
|
|
"unordered(hierarchy_radio_camel.lvl4)",
|
|
"unordered(hierarchy_radio.lvl4)",
|
|
"unordered(hierarchy_radio_camel.lvl5)",
|
|
"unordered(hierarchy_radio.lvl5)",
|
|
"unordered(hierarchy_radio_camel.lvl6)",
|
|
"unordered(hierarchy_radio.lvl6)",
|
|
"unordered(hierarchy_camel.lvl0)",
|
|
"unordered(hierarchy.lvl0)",
|
|
"unordered(hierarchy_camel.lvl1)",
|
|
"unordered(hierarchy.lvl1)",
|
|
"unordered(hierarchy_camel.lvl2)",
|
|
"unordered(hierarchy.lvl2)",
|
|
"unordered(hierarchy_camel.lvl3)",
|
|
"unordered(hierarchy.lvl3)",
|
|
"unordered(hierarchy_camel.lvl4)",
|
|
"unordered(hierarchy.lvl4)",
|
|
"unordered(hierarchy_camel.lvl5)",
|
|
"unordered(hierarchy.lvl5)",
|
|
"unordered(hierarchy_camel.lvl6)",
|
|
"unordered(hierarchy.lvl6)",
|
|
"content",
|
|
],
|
|
distinct: true,
|
|
attributeForDistinct: "url",
|
|
customRanking: [
|
|
"desc(weight.pageRank)",
|
|
"desc(weight.level)",
|
|
"asc(weight.position)",
|
|
],
|
|
ranking: [
|
|
"words",
|
|
"filters",
|
|
"typo",
|
|
"attribute",
|
|
"proximity",
|
|
"exact",
|
|
"custom",
|
|
],
|
|
highlightPreTag: '<span class="algolia-docsearch-suggestion--highlight">',
|
|
highlightPostTag: "</span>",
|
|
minWordSizefor1Typo: 3,
|
|
minWordSizefor2Typos: 7,
|
|
allowTyposOnNumericTokens: false,
|
|
minProximity: 1,
|
|
ignorePlurals: true,
|
|
advancedSyntax: true,
|
|
attributeCriteriaComputedByMinProximity: true,
|
|
removeWordsIfNoResults: "allOptional",
|
|
},
|
|
},
|
|
});
|