Files
penx/apps/extension/lib/parser.ts
2025-05-16 18:30:02 +08:00

40 lines
981 B
TypeScript

import { Readability } from '@mozilla/readability'
import { parseHTML } from 'linkedom'
import { preParseContent } from './content-handler'
const DEBUG_MODE = false
export const parsePreparedContent = async (url: string, html: string) => {
let dom: Document | null = parseHTML(html).document
// Attempt to parse the article
// console.log('%c=parsePreparedContent', 'color:green', { url, html, dom })
dom = (await preParseContent(url, dom)) || dom
const content = await getReadabilityResult(url, html, dom, false)
// console.log('%c=getReadabilityResult:', 'color:green', content)
return content
}
async function getReadabilityResult(
url: string,
html: string,
dom: Document,
isNewsletter?: boolean,
) {
try {
const article = new Readability(dom, {
debug: DEBUG_MODE,
keepClasses: false,
}).parse()
if (article) {
return article
}
} catch (error) {
console.log('parsing error for url', { url, error })
}
}