mirror of
https://github.com/penxio/penx.git
synced 2026-01-13 15:38:12 -05:00
40 lines
981 B
TypeScript
40 lines
981 B
TypeScript
import { Readability } from '@mozilla/readability'
|
|
import { parseHTML } from 'linkedom'
|
|
import { preParseContent } from './content-handler'
|
|
|
|
const DEBUG_MODE = false
|
|
|
|
export const parsePreparedContent = async (url: string, html: string) => {
|
|
let dom: Document | null = parseHTML(html).document
|
|
// Attempt to parse the article
|
|
// console.log('%c=parsePreparedContent', 'color:green', { url, html, dom })
|
|
|
|
dom = (await preParseContent(url, dom)) || dom
|
|
|
|
const content = await getReadabilityResult(url, html, dom, false)
|
|
|
|
// console.log('%c=getReadabilityResult:', 'color:green', content)
|
|
|
|
return content
|
|
}
|
|
|
|
async function getReadabilityResult(
|
|
url: string,
|
|
html: string,
|
|
dom: Document,
|
|
isNewsletter?: boolean,
|
|
) {
|
|
try {
|
|
const article = new Readability(dom, {
|
|
debug: DEBUG_MODE,
|
|
keepClasses: false,
|
|
}).parse()
|
|
|
|
if (article) {
|
|
return article
|
|
}
|
|
} catch (error) {
|
|
console.log('parsing error for url', { url, error })
|
|
}
|
|
}
|