Files
penx/apps/web/lib/remark-slate/serialize.ts
0xzio 3edfe1aebf refactor: use monorepo
fix: fix build

chore: update README

feat: init extension

fix: fix web build
2025-04-19 01:48:59 +08:00

244 lines
7.6 KiB
TypeScript

import escapeHtml from 'escape-html'
import { BlockType, defaultNodeTypes, LeafType, NodeTypes } from './ast-types'
interface Options {
nodeTypes: NodeTypes
listDepth?: number
ignoreParagraphNewline?: boolean
}
const isLeafNode = (node: BlockType | LeafType): node is LeafType => {
return typeof (node as LeafType).text === 'string'
}
const VOID_ELEMENTS: Array<keyof NodeTypes> = ['thematic_break', 'image']
const BREAK_TAG = '<br>'
export default function serialize(
chunk: BlockType | LeafType,
opts: Options = { nodeTypes: defaultNodeTypes },
) {
const {
nodeTypes: userNodeTypes = defaultNodeTypes,
ignoreParagraphNewline = false,
listDepth = 0,
} = opts
let text = (chunk as LeafType).text || ''
let type = (chunk as BlockType).type || ''
const nodeTypes: NodeTypes = {
...defaultNodeTypes,
...userNodeTypes,
heading: {
...defaultNodeTypes.heading,
...userNodeTypes.heading,
},
}
const LIST_TYPES = [nodeTypes.ul_list, nodeTypes.ol_list]
let children = text
if (!isLeafNode(chunk)) {
children = chunk.children
.map((c: BlockType | LeafType) => {
const isList = !isLeafNode(c)
? (LIST_TYPES as string[]).includes(c.type || '')
: false
const selfIsList = (LIST_TYPES as string[]).includes(chunk.type || '')
// Links can have the following shape
// In which case we don't want to surround
// with break tags
// {
// type: 'paragraph',
// children: [
// { text: '' },
// { type: 'link', children: [{ text: foo.com }]}
// { text: '' }
// ]
// }
let childrenHasLink = false
if (!isLeafNode(chunk) && Array.isArray(chunk.children)) {
childrenHasLink = chunk.children.some(
(f) => !isLeafNode(f) && f.type === nodeTypes.link,
)
}
return serialize(
{ ...c, parentType: type },
{
nodeTypes,
// WOAH.
// what we're doing here is pretty tricky, it relates to the block below where
// we check for ignoreParagraphNewline and set type to paragraph.
// We want to strip out empty paragraphs sometimes, but other times we don't.
// If we're the descendant of a list, we know we don't want a bunch
// of whitespace. If we're parallel to a link we also don't want
// to respect neighboring paragraphs
ignoreParagraphNewline:
(ignoreParagraphNewline ||
isList ||
selfIsList ||
childrenHasLink) &&
// if we have c.break, never ignore empty paragraph new line
!(c as BlockType).break,
// track depth of nested lists so we can add proper spacing
listDepth: (LIST_TYPES as string[]).includes(
(c as BlockType).type || '',
)
? listDepth + 1
: listDepth,
},
)
})
.join('')
}
// This is pretty fragile code, check the long comment where we iterate over children
if (
!ignoreParagraphNewline &&
(text === '' || text === '\n') &&
chunk.parentType === nodeTypes.paragraph
) {
type = nodeTypes.paragraph
children = BREAK_TAG
}
if (children === '' && !VOID_ELEMENTS.find((k) => nodeTypes[k] === type))
return
// Never allow decorating break tags with rich text formatting,
// this can malform generated markdown
// Also ensure we're only ever applying text formatting to leaf node
// level chunks, otherwise we can end up in a situation where
// we try applying formatting like to a node like this:
// "Text foo bar **baz**" resulting in "**Text foo bar **baz****"
// which is invalid markup and can mess everything up
if (children !== BREAK_TAG && isLeafNode(chunk)) {
if (chunk.strikeThrough && chunk.bold && chunk.italic) {
children = retainWhitespaceAndFormat(children, '~~***')
} else if (chunk.bold && chunk.italic) {
children = retainWhitespaceAndFormat(children, '***')
} else {
if (chunk.bold) {
children = retainWhitespaceAndFormat(children, '**')
}
if (chunk.italic) {
children = retainWhitespaceAndFormat(children, '_')
}
if (chunk.strikeThrough) {
children = retainWhitespaceAndFormat(children, '~~')
}
if (chunk.code) {
children = retainWhitespaceAndFormat(children, '`')
}
}
}
switch (type) {
case nodeTypes.heading[1]:
return `# ${children}\n`
case nodeTypes.heading[2]:
return `## ${children}\n`
case nodeTypes.heading[3]:
return `### ${children}\n`
case nodeTypes.heading[4]:
return `#### ${children}\n`
case nodeTypes.heading[5]:
return `##### ${children}\n`
case nodeTypes.heading[6]:
return `###### ${children}\n`
case nodeTypes.block_quote:
// For some reason, marked is parsing blockquotes w/ one new line
// as contiued blockquotes, so adding two new lines ensures that doesn't
// happen
return `> ${children}\n\n`
case nodeTypes.code_block:
return `\`\`\`${
(chunk as BlockType).language || ''
}\n${children}\n\`\`\`\n`
case nodeTypes.link:
return `[${children}](${(chunk as BlockType).link || ''})`
case nodeTypes.image:
return `![${(chunk as BlockType).caption}](${
(chunk as BlockType).link || ''
})`
case nodeTypes.ul_list:
case nodeTypes.ol_list:
return `\n${children}\n`
case nodeTypes.listItem:
const isOL = chunk && chunk.parentType === nodeTypes.ol_list
const treatAsLeaf =
(chunk as BlockType).children.length === 1 &&
isLeafNode((chunk as BlockType).children[0])
let spacer = ''
for (let k = 0; listDepth > k; k++) {
if (isOL) {
// https://github.com/remarkjs/remark-react/issues/65
spacer += ' '
} else {
spacer += ' '
}
}
return `${spacer}${isOL ? '1.' : '-'} ${children}${
treatAsLeaf ? '\n' : ''
}`
case nodeTypes.paragraph:
return `${children}\n`
case nodeTypes.thematic_break:
return `---\n`
default:
return escapeHtml(children)
}
}
// This function handles the case of a string like this: " foo "
// Where it would be invalid markdown to generate this: "** foo **"
// We instead, want to trim the whitespace out, apply formatting, and then
// bring the whitespace back. So our returned string looks like this: " **foo** "
function retainWhitespaceAndFormat(string: string, format: string) {
// we keep this for a comparison later
const frozenString = string.trim()
// children will be mutated
let children = frozenString
// We reverse the right side formatting, to properly handle bold/italic and strikeThrough
// formats, so we can create ~~***FooBar***~~
const fullFormat = `${format}${children}${reverseStr(format)}`
// This conditions accounts for no whitespace in our string
// if we don't have any, we can return early.
if (children.length === string.length) {
return fullFormat
}
// if we do have whitespace, let's add our formatting around our trimmed string
// We reverse the right side formatting, to properly handle bold/italic and strikeThrough
// formats, so we can create ~~***FooBar***~~
const formattedString = format + children + reverseStr(format)
// and replace the non-whitespace content of the string
return string.replace(frozenString, formattedString)
}
const reverseStr = (string: string) => string.split('').reverse().join('')