Files
Fabric/core/html_parser.go
2024-09-30 11:25:10 +08:00

28 lines
559 B
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package core
import (
"bytes"
"fmt"
"github.com/go-shiori/go-readability"
)
// HtmlReadability Turn any web page into a clean view
// args
//
// html (string): full data of web page
//
// return
//
// viewContent (string): html main content
// err (error): parser error
func HtmlReadability(html string) (viewContent string, err error) {
buf := bytes.NewBufferString(html)
article, err := readability.FromReader(buf, nil)
if err != nil {
return "", err
}
fmt.Println("MAIN-CONTENT:", article.TextContent)
return article.TextContent, nil
}