2023-10-13 13:25:34 +01:00
|
|
|
package protocol
|
2023-08-21 22:31:32 +03:00
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"fmt"
|
|
|
|
"io/ioutil"
|
|
|
|
"net/http"
|
|
|
|
neturl "net/url"
|
2024-03-12 22:47:51 +02:00
|
|
|
"strings"
|
2023-08-21 22:31:32 +03:00
|
|
|
|
|
|
|
"github.com/keighl/metabolize"
|
|
|
|
"go.uber.org/zap"
|
2024-03-12 22:47:51 +02:00
|
|
|
"golang.org/x/net/html"
|
2023-08-21 22:31:32 +03:00
|
|
|
|
|
|
|
"github.com/status-im/status-go/images"
|
|
|
|
"github.com/status-im/status-go/protocol/common"
|
|
|
|
"github.com/status-im/status-go/protocol/protobuf"
|
|
|
|
)
|
|
|
|
|
|
|
|
type OpenGraphMetadata struct {
|
|
|
|
Title string `json:"title" meta:"og:title"`
|
|
|
|
Description string `json:"description" meta:"og:description"`
|
|
|
|
ThumbnailURL string `json:"thumbnailUrl" meta:"og:image"`
|
|
|
|
}
|
|
|
|
|
|
|
|
// OpenGraphUnfurler should be preferred over OEmbedUnfurler because oEmbed
|
|
|
|
// gives back a JSON response with a "html" field that's supposed to be embedded
|
|
|
|
// in an iframe (hardly useful for existing Status' clients).
|
|
|
|
type OpenGraphUnfurler struct {
|
|
|
|
url *neturl.URL
|
|
|
|
logger *zap.Logger
|
|
|
|
httpClient *http.Client
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewOpenGraphUnfurler(URL *neturl.URL, logger *zap.Logger, httpClient *http.Client) *OpenGraphUnfurler {
|
|
|
|
return &OpenGraphUnfurler{
|
|
|
|
url: URL,
|
|
|
|
logger: logger,
|
|
|
|
httpClient: httpClient,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-03-12 22:47:51 +02:00
|
|
|
func GetFavicon(bodyBytes []byte) string {
|
|
|
|
htmlTokens := html.NewTokenizer(bytes.NewBuffer(bodyBytes))
|
|
|
|
loop:
|
|
|
|
for {
|
|
|
|
tt := htmlTokens.Next()
|
|
|
|
switch tt {
|
|
|
|
case html.ErrorToken:
|
|
|
|
break loop
|
|
|
|
case html.StartTagToken:
|
|
|
|
t := htmlTokens.Token()
|
|
|
|
if t.Data != "link" {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
isIcon := false
|
|
|
|
href := ""
|
|
|
|
for _, attr := range t.Attr {
|
|
|
|
k := attr.Key
|
|
|
|
v := attr.Val
|
|
|
|
if k == "rel" && (v == "icon" || v == "shortcut icon") {
|
|
|
|
isIcon = true
|
|
|
|
} else if k == "href" &&
|
|
|
|
(strings.Contains(v, ".ico") ||
|
|
|
|
strings.Contains(v, ".png") ||
|
|
|
|
strings.Contains(v, ".svg")) {
|
|
|
|
href = v
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if isIcon && href != "" {
|
|
|
|
return href
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
|
2023-10-13 13:25:34 +01:00
|
|
|
func (u *OpenGraphUnfurler) Unfurl() (*common.LinkPreview, error) {
|
2023-08-21 22:31:32 +03:00
|
|
|
preview := newDefaultLinkPreview(u.url)
|
|
|
|
preview.Type = protobuf.UnfurledLink_LINK
|
|
|
|
|
|
|
|
headers := map[string]string{
|
|
|
|
"accept": headerAcceptText,
|
|
|
|
"accept-language": headerAcceptLanguage,
|
|
|
|
"user-agent": headerUserAgent,
|
|
|
|
}
|
|
|
|
bodyBytes, err := fetchBody(u.logger, u.httpClient, u.url.String(), headers)
|
|
|
|
if err != nil {
|
|
|
|
return preview, err
|
|
|
|
}
|
|
|
|
|
|
|
|
var ogMetadata OpenGraphMetadata
|
|
|
|
err = metabolize.Metabolize(ioutil.NopCloser(bytes.NewBuffer(bodyBytes)), &ogMetadata)
|
|
|
|
if err != nil {
|
|
|
|
return preview, fmt.Errorf("failed to parse OpenGraph data")
|
|
|
|
}
|
|
|
|
|
2024-03-12 22:47:51 +02:00
|
|
|
faviconPath := GetFavicon(bodyBytes)
|
|
|
|
t, err := fetchImage(u.logger, u.httpClient, faviconPath, false)
|
|
|
|
if err != nil {
|
|
|
|
u.logger.Info("failed to fetch favicon", zap.String("url", u.url.String()), zap.Error(err))
|
|
|
|
} else {
|
|
|
|
preview.Favicon.DataURI = t.DataURI
|
|
|
|
}
|
2023-08-21 22:31:32 +03:00
|
|
|
// There are URLs like https://wikipedia.org/ that don't have an OpenGraph
|
|
|
|
// title tag, but article pages do. In the future, we can fallback to the
|
|
|
|
// website's title by using the <title> tag.
|
|
|
|
if ogMetadata.Title == "" {
|
|
|
|
return preview, fmt.Errorf("missing required title in OpenGraph response")
|
|
|
|
}
|
|
|
|
|
|
|
|
if ogMetadata.ThumbnailURL != "" {
|
2024-03-12 22:47:51 +02:00
|
|
|
t, err := fetchImage(u.logger, u.httpClient, ogMetadata.ThumbnailURL, true)
|
2023-08-21 22:31:32 +03:00
|
|
|
if err != nil {
|
|
|
|
// Given we want to fetch thumbnails on a best-effort basis, if an error
|
|
|
|
// happens we simply log it.
|
|
|
|
u.logger.Info("failed to fetch thumbnail", zap.String("url", u.url.String()), zap.Error(err))
|
|
|
|
} else {
|
|
|
|
preview.Thumbnail = t
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
preview.Title = ogMetadata.Title
|
|
|
|
preview.Description = ogMetadata.Description
|
2024-03-12 22:47:51 +02:00
|
|
|
|
2023-08-21 22:31:32 +03:00
|
|
|
return preview, nil
|
|
|
|
}
|
|
|
|
|
2024-03-12 22:47:51 +02:00
|
|
|
func fetchImage(logger *zap.Logger, httpClient *http.Client, url string, getDimensions bool) (common.LinkPreviewThumbnail, error) {
|
2023-08-21 22:31:32 +03:00
|
|
|
var thumbnail common.LinkPreviewThumbnail
|
|
|
|
|
|
|
|
imgBytes, err := fetchBody(logger, httpClient, url, nil)
|
|
|
|
if err != nil {
|
|
|
|
return thumbnail, fmt.Errorf("could not fetch thumbnail url='%s': %w", url, err)
|
|
|
|
}
|
2024-03-12 22:47:51 +02:00
|
|
|
if getDimensions {
|
|
|
|
width, height, err := images.GetImageDimensions(imgBytes)
|
|
|
|
if err != nil {
|
|
|
|
return thumbnail, fmt.Errorf("could not get image dimensions url='%s': %w", url, err)
|
|
|
|
}
|
|
|
|
thumbnail.Width = width
|
|
|
|
thumbnail.Height = height
|
2023-08-21 22:31:32 +03:00
|
|
|
}
|
|
|
|
dataURI, err := images.GetPayloadDataURI(imgBytes)
|
|
|
|
if err != nil {
|
|
|
|
return thumbnail, fmt.Errorf("could not build data URI url='%s': %w", url, err)
|
|
|
|
}
|
|
|
|
thumbnail.DataURI = dataURI
|
|
|
|
|
|
|
|
return thumbnail, nil
|
|
|
|
}
|