chore: Routing Refactor (#3)

* scrape with 1:1 mapping to origin repo

* exclude .md extension from file path in URLs inside MDs

* removed legacy static files

* remove image path manipulation

* move scrapper to new folder

* sidebar custom ordering implemented
This commit is contained in:
Filip Pajic
2024-04-25 09:29:22 +02:00
committed by GitHub
parent ac48b3aa6f
commit 32f0947064
14 changed files with 178 additions and 20 deletions

View File

@@ -0,0 +1,13 @@
import 'dotenv/config';
const {
GITHUB_TOKEN
} = process.env;
if (!GITHUB_TOKEN) {
throw new Error("Please provide the GITHUB_TOKEN")
}
export {
GITHUB_TOKEN
}

View File

@@ -0,0 +1,58 @@
import path from "path";
import { fetchFromGitHub } from "./github.mjs";
import axios from 'axios'
import { createDirectory, readFile, writeFile, writeLargeFile } from './file.mjs'
import { adjustPathForMarkdown, vacMarkdownToDocusaurusMarkdown } from './markdown-convertor.mjs'
async function downloadFile(url, fullFilePath) {
const request = await axios.get(url, {
responseType: "stream"
});
const directory = path.dirname(fullFilePath)
await createDirectory(directory)
await writeLargeFile(fullFilePath, request.data)
}
async function downloadAndModifyFile(url, filePath) {
const fullFilePath = path.join(process.cwd(), filePath)
await downloadFile(url, fullFilePath);
const fileExtension = path.extname(filePath)
if (fileExtension === '.md' || fileExtension === '.mdx') {
const fileBuffer = await readFile(fullFilePath);
const fileContent = fileBuffer.toString();
const convertedFileContent = vacMarkdownToDocusaurusMarkdown(fileContent, filePath);
await writeFile(fullFilePath, convertedFileContent);
}
}
export async function fetchDirectoryContents(dirUrl, basePath, prefixToRemove) {
try {
const files = await fetchFromGitHub(dirUrl);
if (!files) {
console.log('No files found', files)
return
}
for (const file of files) {
const prefixRemovalRegex = new RegExp(`^${prefixToRemove}`)
const relativePath = file.path.replace(prefixRemovalRegex, '')
// const filePath = path.join(basePath, adjustPathForMarkdown(relativePath))
const filePath = path.join(basePath, relativePath)
if (file.type === 'file') {
await downloadAndModifyFile(file.download_url, filePath)
} else if (file.type === 'dir') {
await fetchDirectoryContents(file.url, basePath, prefixToRemove)
}
}
} catch (e) {
console.error('Error fetching files:', e)
}
}

View File

@@ -0,0 +1,62 @@
import { mkdirp } from 'mkdirp'
import fs from 'fs'
import util from 'util'
import stream from 'stream'
export function readFile(path) {
return new Promise((resolve, reject) => {
fs.readFile(path, (err, data) => {
if (err) {
reject(err);
}
resolve(data);
})
});
}
export function writeFile(path, content) {
return new Promise((resolve, reject) => {
fs.writeFile(path, content, err => {
if (err) {
reject(err);
}
resolve();
})
})
}
export async function writeLargeFile(path, data) {
const pipeline = util.promisify(stream.pipeline)
// We need to handle backpressuring to not corrupt larger files, https://nodejs.org/en/learn/modules/backpressuring-in-streams
return pipeline(data, fs.createWriteStream(path))
}
export function removeDirectory(path) {
return new Promise((resolve, reject) => {
fs.rmdir(path, {recursive: true}, err => {
if (err) {
reject(err)
}
resolve();
})
})
}
export async function createDirectory(path) {
try {
/*
On Windows file systems, attempts to create a root directory (ie, a drive letter or root UNC path) will fail. If the root directory exists, then it will fail with EPERM. If the root directory does not exist, then it will fail with ENOENT.
On posix file systems, attempts to create a root directory (in recursive mode) will succeed silently, as it is treated like just another directory that already exists. (In non-recursive mode, of course, it fails with EEXIST.)
In order to preserve this system-specific behavior (and because it's not as if we can create the parent of a root directory anyway), attempts to create a root directory are passed directly to the fs implementation, and any errors encountered are not handled.
That's why we're using the next library
*/
return await mkdirp(path)
} catch (error) {
throw error;
}
}

View File

@@ -0,0 +1,13 @@
import { GITHUB_TOKEN } from './config.mjs'
import axios from "axios";
export async function fetchFromGitHub(url, callback) {
const response = await axios.get(url, {
headers: {
'User-Agent': 'Node.js',
'Authorization': `token ${GITHUB_TOKEN}`
}
});
return response.data;
}

View File

@@ -0,0 +1,19 @@
import { fetchDirectoryContents } from './fetch-content.mjs'
const directoriesToSync = ['codex', 'nomos', 'status', 'vac', 'waku']
async function main() {
for (let i = 0; i < directoriesToSync.length; i++) {
const dirName = directoriesToSync[i];
const baseUrl = `https://api.github.com/repos/vacp2p/rfc-index/contents/${dirName}`
const baseSavePath = `./${dirName}/`
const prefixToRemove = dirName + '/'
await fetchDirectoryContents(baseUrl, baseSavePath, prefixToRemove)
console.log(`Synced ${dirName}`)
}
}
main();

View File

@@ -0,0 +1,140 @@
function enhanceMarkdownWithBulletPointsCorrected(input) {
// Split the input text into lines
const lines = input.split('\n')
// Initialize an array to hold the extracted fields
let extractedFields = []
// Initialize variables to track the frontmatter and contributors section
let inFrontMatter = false
let inContributors = false
let contributorsLines = [] // Holds contributors lines
// Process each line
const outputLines = lines.map(line => {
if (line.trim() === '---') {
inFrontMatter = !inFrontMatter
if (!inFrontMatter && contributorsLines.length) {
// We're exiting frontmatter; time to add contributors
extractedFields.push(`contributors:\n${contributorsLines.join('\n')}`)
contributorsLines = [] // Reset for safety
}
return line // Keep the frontmatter delimiters
}
if (inFrontMatter) {
if (line.startsWith('contributors:')) {
inContributors = true // Entering contributors section
} else if (inContributors) {
if (line.startsWith(' -')) {
contributorsLines.push(line.trim()) // Add contributors line
} else {
// Exiting contributors section
inContributors = false
extractedFields.push(`contributors:\n${contributorsLines.join('\n')}`)
contributorsLines = [] // Reset
}
} else {
const match = line.match(/(status|category|editor):(.*)/)
if (match) {
extractedFields.push(line.trim())
}
}
}
return line // Return the line unmodified
})
// Find the index of the second frontmatter delimiter
const endOfFrontMatterIndex = outputLines.findIndex(
(line, index) => line.trim() === '---' && index > 0,
)
// Insert the extracted fields as capitalized bullet points after the frontmatter
const bulletPoints = extractedFields
.map(field => {
// Capitalize the first letter of the label and ensure proper formatting for multi-line fields
if (field.includes('\n')) {
const [label, ...values] = field.split('\n')
return `- ${label.charAt(0).toUpperCase() +
label.slice(1)}:\n ${values.join('\n ')}`
} else {
return `- ${field.charAt(0).toUpperCase() + field.slice(1)}`
}
})
.join('\n')
outputLines.splice(endOfFrontMatterIndex + 1, 0, bulletPoints)
// Join the lines back into a single string and return
return outputLines.join('\n')
}
function parseSlugFromFrontmatter(content) {
const frontmatterMatch = content.match(/---\s*\n([\s\S]*?)\n---/)
if (frontmatterMatch) {
const frontmatterContent = frontmatterMatch[1]
function extractNumberFromTitle(content) {
const parts = content.split('/')
return parseInt(parts[0].split(' ')[1], 10)
}
return extractNumberFromTitle(frontmatterContent)
}
return 1 // Return null if not found
}
function unescapeHtmlComments(htmlString) {
return htmlString.replace(/\\<\!--/g, '\n<!--').replace(/--\\>/g, '-->\n')
}
function updateMarkdownLinksToExcludeMD(content) {
function replaceLinks(match, p1, p2, p3) {
let url = p2.replace(/\.md$/, ''); // Remove .md extension from URL
let anchor = p3.replace(/^\//, ''); // Remove preceding '/' from anchor if exists
return `[${p1}](${url}${anchor ? '#' + anchor : ''})`;
}
const regex = /\[((?:(?!\]).)+)\]\(([^)]*?\.md)(?:\/#|\/#)?([^)]*)\)/g
return content.replace(regex, replaceLinks)
}
export function vacMarkdownToDocusaurusMarkdown(fileContent) {
let convertedContent = fileContent;
// Remove 'tags' line from frontmatter because the format is wrong
convertedContent = convertedContent.replace(/tags:.*\n?/, '')
// Replace <br> with <br/>
convertedContent = convertedContent.replace(/<br>/g, '<br/>')
// Escape < and > with \< and \>, respectively
// Be cautious with this replacement; adjust as needed based on your context
convertedContent = convertedContent.replace(/</g, '\\<').replace(/>/g, '\\>')
// NEW: Remove 'slug' line from frontmatter
convertedContent = convertedContent.replace(/^slug:.*\n?/m, '')
// Replace empty Markdown links with placeholder URL
convertedContent = convertedContent.replace(/\[([^\]]+)\]\(\)/g, '[$1](#)')
convertedContent = unescapeHtmlComments(convertedContent)
convertedContent = enhanceMarkdownWithBulletPointsCorrected(convertedContent)
convertedContent = updateMarkdownLinksToExcludeMD(convertedContent)
return convertedContent;
}
export function adjustPathForMarkdown(filePath) {
const parts = filePath.split('/')
if (parts?.length === 1) return filePath
if (filePath.includes('README.md')) return filePath
if (parts[parts.length - 1].endsWith('.md')) {
parts.splice(parts.length - 2, 1)
}
return parts.join('/')
}

View File

@@ -0,0 +1,28 @@
const { compose } = require("./utils");
const {
positionDefaultReadmeToTop,
removeRFCNumberedDirectories,
separateFoldersAndFilesOrder,
orderAlphabeticallyAndByNumber
} = require("./modifiers")
async function sidebarItemsGenerator({defaultSidebarItemsGenerator, ...args}) {
const defaultSidebarItems = await defaultSidebarItemsGenerator(args);
/*
We'll have multiple O(N) passes through the items depending on the reducer implementation,
but we'll sacrifice very small performance for sake of easier maintainability
*/
const sidebarModifier = compose(
positionDefaultReadmeToTop,
separateFoldersAndFilesOrder,
removeRFCNumberedDirectories,
orderAlphabeticallyAndByNumber
)
return sidebarModifier(defaultSidebarItems)
}
module.exports = {
sidebarItemsGenerator
}

View File

@@ -0,0 +1,15 @@
function isIndexDocument(documentId, parentDirectory) {
if (!documentId) {
return false
}
return (
documentId.toUpperCase() === "README" ||
documentId.toUpperCase() === "INDEX" ||
(!!parentDirectory && documentId.toUpperCase() === parentDirectory.toUpperCase())
)
}
module.exports = {
isIndexDocument
}

View File

@@ -0,0 +1,89 @@
const { isNumber } = require('./utils')
const {
isIndexDocument
} = require("./helpers")
function orderAlphabeticallyAndByNumber(sidebarItems) {
let newSidebarItems = [...sidebarItems]
for (let i = 0; i < newSidebarItems.length; i++) {
const sidebarItem = newSidebarItems[i];
if (sidebarItem.type === 'category' && sidebarItem.items && sidebarItem.items.length > 1) {
newSidebarItems[i] = {
...sidebarItem,
items: orderAlphabeticallyAndByNumber(sidebarItem.items)
}
}
}
const collator = new Intl.Collator(undefined, {numeric: true, sensitivity: 'base'});
const compareFunction = (a, b) => {
return collator.compare(a.label, b.label)
}
return newSidebarItems.sort(compareFunction)
}
function removeRFCNumberedDirectories(sidebarItems) {
let newSidebarItems = []
sidebarItems.forEach(sidebarItem => {
if (sidebarItem.type === 'category') {
const isRFCNumberedCategory = isNumber(sidebarItem.label);
if (isRFCNumberedCategory) {
newSidebarItems = [...newSidebarItems, ...sidebarItem.items]
} else {
const newSidebarItem = {
...sidebarItem,
items: removeRFCNumberedDirectories(sidebarItem.items)
}
newSidebarItems = [...newSidebarItems, newSidebarItem]
}
} else {
newSidebarItems = [...newSidebarItems, sidebarItem]
}
})
return newSidebarItems
}
function separateFoldersAndFilesOrder(sidebarItems) {
let categories = [];
let pages = [];
sidebarItems.forEach(sidebarItem => {
if (sidebarItem.type === 'category') {
categories = [...categories, sidebarItem]
} else {
pages = [...pages, sidebarItem]
}
})
return [
...categories,
...pages
]
}
function positionDefaultReadmeToTop(sidebarItems) {
let newSidebarItems = []
sidebarItems.forEach(sidebarItem => {
if (sidebarItem.type === "doc" && isIndexDocument(sidebarItem.id)) {
newSidebarItems = [sidebarItem, ...newSidebarItems]
} else {
newSidebarItems = [...newSidebarItems, sidebarItem]
}
})
return newSidebarItems
}
module.exports = {
orderAlphabeticallyAndByNumber,
removeRFCNumberedDirectories,
separateFoldersAndFilesOrder,
positionDefaultReadmeToTop,
}

View File

@@ -0,0 +1,27 @@
function isNumber(value) {
if (true === Array.isArray(value)) {
return false;
}
return !isNaN(parseInt(value, 10));
}
/*
Composes multiple functions with same arguments into a single one
NOTE: Functions are executed from end of array to start (right to left)
*/
function compose(...funcs) {
if (funcs.length === 1) {
return funcs[0]
}
return funcs.reduce(
(firstFunction, nextFunction) =>
(...args) =>
firstFunction(nextFunction(...args))
)
}
module.exports = {
isNumber,
compose
}