status-go/protocol/linkpreview_unfurler_opengraph.go
2024-03-12 22:47:51 +02:00

154 lines
4.2 KiB
Go

package protocol
import (
"bytes"
"fmt"
"io/ioutil"
"net/http"
neturl "net/url"
"strings"
"github.com/keighl/metabolize"
"go.uber.org/zap"
"golang.org/x/net/html"
"github.com/status-im/status-go/images"
"github.com/status-im/status-go/protocol/common"
"github.com/status-im/status-go/protocol/protobuf"
)
type OpenGraphMetadata struct {
Title string `json:"title" meta:"og:title"`
Description string `json:"description" meta:"og:description"`
ThumbnailURL string `json:"thumbnailUrl" meta:"og:image"`
}
// OpenGraphUnfurler should be preferred over OEmbedUnfurler because oEmbed
// gives back a JSON response with a "html" field that's supposed to be embedded
// in an iframe (hardly useful for existing Status' clients).
type OpenGraphUnfurler struct {
url *neturl.URL
logger *zap.Logger
httpClient *http.Client
}
func NewOpenGraphUnfurler(URL *neturl.URL, logger *zap.Logger, httpClient *http.Client) *OpenGraphUnfurler {
return &OpenGraphUnfurler{
url: URL,
logger: logger,
httpClient: httpClient,
}
}
func GetFavicon(bodyBytes []byte) string {
htmlTokens := html.NewTokenizer(bytes.NewBuffer(bodyBytes))
loop:
for {
tt := htmlTokens.Next()
switch tt {
case html.ErrorToken:
break loop
case html.StartTagToken:
t := htmlTokens.Token()
if t.Data != "link" {
continue
}
isIcon := false
href := ""
for _, attr := range t.Attr {
k := attr.Key
v := attr.Val
if k == "rel" && (v == "icon" || v == "shortcut icon") {
isIcon = true
} else if k == "href" &&
(strings.Contains(v, ".ico") ||
strings.Contains(v, ".png") ||
strings.Contains(v, ".svg")) {
href = v
}
}
if isIcon && href != "" {
return href
}
}
}
return ""
}
func (u *OpenGraphUnfurler) Unfurl() (*common.LinkPreview, error) {
preview := newDefaultLinkPreview(u.url)
preview.Type = protobuf.UnfurledLink_LINK
headers := map[string]string{
"accept": headerAcceptText,
"accept-language": headerAcceptLanguage,
"user-agent": headerUserAgent,
}
bodyBytes, err := fetchBody(u.logger, u.httpClient, u.url.String(), headers)
if err != nil {
return preview, err
}
var ogMetadata OpenGraphMetadata
err = metabolize.Metabolize(ioutil.NopCloser(bytes.NewBuffer(bodyBytes)), &ogMetadata)
if err != nil {
return preview, fmt.Errorf("failed to parse OpenGraph data")
}
faviconPath := GetFavicon(bodyBytes)
t, err := fetchImage(u.logger, u.httpClient, faviconPath, false)
if err != nil {
u.logger.Info("failed to fetch favicon", zap.String("url", u.url.String()), zap.Error(err))
} else {
preview.Favicon.DataURI = t.DataURI
}
// There are URLs like https://wikipedia.org/ that don't have an OpenGraph
// title tag, but article pages do. In the future, we can fallback to the
// website's title by using the <title> tag.
if ogMetadata.Title == "" {
return preview, fmt.Errorf("missing required title in OpenGraph response")
}
if ogMetadata.ThumbnailURL != "" {
t, err := fetchImage(u.logger, u.httpClient, ogMetadata.ThumbnailURL, true)
if err != nil {
// Given we want to fetch thumbnails on a best-effort basis, if an error
// happens we simply log it.
u.logger.Info("failed to fetch thumbnail", zap.String("url", u.url.String()), zap.Error(err))
} else {
preview.Thumbnail = t
}
}
preview.Title = ogMetadata.Title
preview.Description = ogMetadata.Description
return preview, nil
}
func fetchImage(logger *zap.Logger, httpClient *http.Client, url string, getDimensions bool) (common.LinkPreviewThumbnail, error) {
var thumbnail common.LinkPreviewThumbnail
imgBytes, err := fetchBody(logger, httpClient, url, nil)
if err != nil {
return thumbnail, fmt.Errorf("could not fetch thumbnail url='%s': %w", url, err)
}
if getDimensions {
width, height, err := images.GetImageDimensions(imgBytes)
if err != nil {
return thumbnail, fmt.Errorf("could not get image dimensions url='%s': %w", url, err)
}
thumbnail.Width = width
thumbnail.Height = height
}
dataURI, err := images.GetPayloadDataURI(imgBytes)
if err != nil {
return thumbnail, fmt.Errorf("could not build data URI url='%s': %w", url, err)
}
thumbnail.DataURI = dataURI
return thumbnail, nil
}