Changed to unifiedjs-based markdown parsing (#78)

This commit is contained in:
Janne Ojanaho
2020-07-11 09:59:05 +03:00
committed by GitHub
parent b86edc4660
commit f2efd14186
7 changed files with 107 additions and 85 deletions

9
.vscode/launch.json vendored
View File

@@ -35,6 +35,15 @@
"${workspaceFolder}/packages/foam-vscode/out/test/**/*.js"
],
"preLaunchTask": "Build foam-vscode"
},
{
"type": "node",
"request": "launch",
"name": "Workspace Manager tests",
"program": "${workspaceFolder}/node_modules/tsdx/dist/index.js",
"args": ["test"],
"cwd": "${workspaceFolder}/packages/foam-workspace-manager",
"internalConsoleOptions": "openOnSessionStart"
}
]
}

View File

@@ -44,6 +44,7 @@
"lodash": "^4.17.15",
"remark-parse": "^8.0.2",
"remark-wiki-link": "^0.0.4",
"unified": "^9.0.0"
"unified": "^9.0.0",
"unist-util-visit": "^2.0.2"
}
}

View File

@@ -1,63 +0,0 @@
/**
* Adapted from vscode-markdown/src/util.ts
* https://github.com/yzhang-gh/vscode-markdown/blob/master/src/util.ts
*/
export const REGEX_FENCED_CODE_BLOCK = /^( {0,3}|\t)```[^`\r\n]*$[\w\W]+?^( {0,3}|\t)``` *$/gm;
export function markdownHeadingToPlainText(text: string) {
// Remove Markdown syntax (bold, italic, links etc.) in a heading
// For example: `_italic_` -> `italic`
return text.replace(/\[([^\]]*)\]\[[^\]]*\]/, (_, g1) => g1);
}
export function rxWikiLink(): RegExp {
const pattern = '\\[\\[([^\\]]+)\\]\\]'; // [[wiki-link-regex]]
return new RegExp(pattern, 'ig');
}
export function rxMarkdownHeading(level: number): RegExp {
const pattern = `^#{${level}}\\s+(.+)$`;
return new RegExp(pattern, 'im');
}
export const mdDocSelector = [
{ language: 'markdown', scheme: 'file' },
{ language: 'markdown', scheme: 'untitled' },
];
export function findTopLevelHeading(md: string): string | null {
const regex = rxMarkdownHeading(1);
const match = regex.exec(md);
if (match) {
return markdownHeadingToPlainText(match[1]);
}
return null;
}
export function cleanupMarkdown(markdown: string) {
const replacer = (foundStr: string) => foundStr.replace(/[^\r\n]/g, '');
return markdown
.replace(REGEX_FENCED_CODE_BLOCK, replacer) //// Remove fenced code blocks (and #603, #675)
.replace(/<!-- omit in (toc|TOC) -->/g, '&lt; omit in toc &gt;') //// Escape magic comment
.replace(/<!--[\W\w]+?-->/g, replacer) //// Remove comments
.replace(/^---[\W\w]+?(\r?\n)---/, replacer); //// Remove YAML front matter
}
export function findWikilinksInMarkdown(markdown: string): string[] {
const md = cleanupMarkdown(markdown);
const regex = rxWikiLink();
const unique = new Set<string>();
let match;
while ((match = regex.exec(md))) {
// can be file-name or file.name.ext
const [, name] = match;
if (name) {
unique.add(name);
}
}
return Array.from(unique);
}

View File

@@ -1,23 +1,45 @@
// @todo convert this to use ast parsing
// import unified from 'unified';
// import markdown from 'remark-parse';
// import wikiLinkPlugin from 'remark-wiki-link';
// let processor = unified()
// .use(markdown, { gfm: true })
// .use(wikiLinkPlugin);
import { findTopLevelHeading, findWikilinksInMarkdown } from './markdown-utils';
import unified from 'unified';
import markdownParse from 'remark-parse';
import wikiLinkPlugin from 'remark-wiki-link';
import visit from 'unist-util-visit';
import { Node, Parent } from 'unist';
// @ts-expect-error
export function readWorkspaceFile(filename: string): string {
throw new Error('Not implemented');
}
// pipeline cache
let processor: unified.Processor | null = null;
function parse(markdown: string): Node {
processor =
processor ||
unified()
.use(markdownParse, { gfm: true })
.use(wikiLinkPlugin);
return processor.parse(markdown);
}
export function parseNoteTitleFromMarkdown(markdown: string): string | null {
return findTopLevelHeading(markdown);
let title: string | null = null;
const tree = parse(markdown);
visit(tree, node => {
if (node.type === 'heading' && node.depth === 1) {
title = ((node as Parent)!.children[0].value as string) || null;
}
return title === null;
});
return title;
}
export function parseNoteLinksFromMarkdown(markdown: string): string[] {
return findWikilinksInMarkdown(markdown);
let links: string[] = [];
const tree = parse(markdown);
visit(tree, node => {
if (node.type === 'wikiLink') {
links.push(node.value as string);
}
});
return links;
}

View File

@@ -17,6 +17,12 @@ const pageC = `
# Page C
`;
const updatedPageC = `
# Page C
[[page-a]]
[[page-b]]
`;
describe('WorkspaceManager', () => {
it('links things correctly when added in order', () => {
const ws = new WorkspaceManager('dir/');
@@ -55,14 +61,7 @@ describe('WorkspaceManager', () => {
const before = ws.getNoteWithLinks('page-a');
// change document
ws.addNoteFromMarkdown(
'page-c.md',
`
# Page C
[[page-a]]
[[page-b]]
`
);
ws.addNoteFromMarkdown('page-c.md', updatedPageC);
const after = ws.getNoteWithLinks('page-a');

View File

@@ -0,0 +1,54 @@
import {
parseNoteTitleFromMarkdown,
parseNoteLinksFromMarkdown,
} from '../../src/utils/utils';
const pageA = `
# Page A
## Section
- [[page-b]]
- [[page-c]]
`;
const pageB = `
# Page B
`;
const pageC = `
foo
bar
`;
const pageD = `
# Page D
hello world
# Another header
hello world
`;
describe('WorkspaceManager', () => {
it('finds top level headings', () => {
const titleA = parseNoteTitleFromMarkdown(pageA);
const titleB = parseNoteTitleFromMarkdown(pageB);
const titleC = parseNoteTitleFromMarkdown(pageC);
const titleD = parseNoteTitleFromMarkdown(pageD);
expect(titleA).toEqual('Page A');
expect(titleB).toEqual('Page B');
expect(titleC).toBeNull();
// in case of multiple top level headings, the first one rules
expect(titleD).toEqual('Page D');
});
it('finds wikilinks', () => {
const linksA = parseNoteLinksFromMarkdown(pageA);
const linksB = parseNoteLinksFromMarkdown(pageB);
const linksC = parseNoteLinksFromMarkdown(pageC);
expect(linksA).toEqual(['page-b', 'page-c']);
expect(linksB).toEqual([]);
expect(linksC).toEqual([]);
});
});

View File

@@ -6974,7 +6974,7 @@ unist-util-visit-parents@^3.0.0:
"@types/unist" "^2.0.0"
unist-util-is "^4.0.0"
unist-util-visit@^2.0.0:
unist-util-visit@^2.0.0, unist-util-visit@^2.0.2:
version "2.0.2"
resolved "https://registry.yarnpkg.com/unist-util-visit/-/unist-util-visit-2.0.2.tgz#3843782a517de3d2357b4c193b24af2d9366afb7"
integrity sha512-HoHNhGnKj6y+Sq+7ASo2zpVdfdRifhTgX2KTU3B/sO/TTlZchp7E3S4vjRzDJ7L60KmrCPsQkVK3lEF3cz36XQ==