status-go/protocol/messenger_linkpreview.go

192 lines
5.0 KiB
Go
Raw Normal View History

package protocol
import (
"errors"
"fmt"
"net/http"
neturl "net/url"
"regexp"
"strings"
"go.uber.org/zap"
"golang.org/x/net/publicsuffix"
"github.com/status-im/markdown"
2023-10-13 14:31:56 +00:00
"github.com/status-im/status-go/multiaccounts/settings"
"github.com/status-im/status-go/protocol/common"
)
2023-11-10 10:32:58 +00:00
const UnfurledLinksPerMessageLimit = 5
2023-10-13 12:25:34 +00:00
type UnfurlURLsResponse struct {
LinkPreviews []*common.LinkPreview `json:"linkPreviews,omitempty"`
StatusLinkPreviews []*common.StatusLinkPreview `json:"statusLinkPreviews,omitempty"`
}
func normalizeHostname(hostname string) string {
hostname = strings.ToLower(hostname)
re := regexp.MustCompile(`^www\.(.*)$`)
return re.ReplaceAllString(hostname, "$1")
}
2023-10-13 12:25:34 +00:00
func (m *Messenger) newURLUnfurler(httpClient *http.Client, url *neturl.URL) Unfurler {
if IsSupportedImageURL(url) {
return NewImageUnfurler(
url,
m.logger,
httpClient)
}
switch normalizeHostname(url.Hostname()) {
case "reddit.com":
2023-10-13 12:25:34 +00:00
return NewOEmbedUnfurler(
"https://www.reddit.com/oembed",
url,
m.logger,
httpClient)
default:
2023-10-13 12:25:34 +00:00
return NewOpenGraphUnfurler(
url,
m.logger,
httpClient)
}
}
2023-10-13 12:25:34 +00:00
func (m *Messenger) unfurlURL(httpClient *http.Client, url string) (*common.LinkPreview, error) {
preview := new(common.LinkPreview)
parsedURL, err := neturl.Parse(url)
if err != nil {
return preview, err
}
unfurler := m.newURLUnfurler(httpClient, parsedURL)
preview, err = unfurler.Unfurl()
if err != nil {
return preview, err
}
preview.Hostname = strings.ToLower(parsedURL.Hostname())
return preview, nil
}
// parseValidURL is a stricter version of url.Parse that performs additional
// checks to ensure the URL is valid for clients to request a link preview.
func parseValidURL(rawURL string) (*neturl.URL, error) {
u, err := neturl.Parse(rawURL)
if err != nil {
return nil, fmt.Errorf("parsing URL failed: %w", err)
}
if u.Scheme == "" {
return nil, errors.New("missing URL scheme")
}
_, err = publicsuffix.EffectiveTLDPlusOne(u.Hostname())
if err != nil {
return nil, fmt.Errorf("missing known URL domain: %w", err)
}
return u, nil
}
// GetURLs returns only what we consider unfurleable URLs.
//
// If we wanted to be extra precise and help improve UX, we could ignore URLs
// that we know can't be unfurled. This is at least possible with the oEmbed
// protocol because providers must specify an endpoint scheme.
func GetURLs(text string) []string {
parsedText := markdown.Parse([]byte(text), nil)
visitor := common.RunLinksVisitor(parsedText)
urls := make([]string, 0, len(visitor.Links))
indexed := make(map[string]any, len(visitor.Links))
for _, rawURL := range visitor.Links {
parsedURL, err := parseValidURL(rawURL)
if err != nil {
continue
}
// Lowercase the host so the URL can be used as a cache key. Particularly on
// mobile clients it is common that the first character in a text input is
// automatically uppercased. In WhatsApp they incorrectly lowercase the
// URL's path, but this is incorrect. For instance, some URL shorteners are
// case-sensitive, some websites encode base64 in the path, etc.
parsedURL.Host = strings.ToLower(parsedURL.Host)
idx := parsedURL.String()
// Removes the spurious trailing forward slash.
idx = strings.TrimRight(idx, "/")
if _, exists := indexed[idx]; exists {
continue
} else {
indexed[idx] = nil
urls = append(urls, idx)
}
2023-11-10 10:32:58 +00:00
// This is a temporary limitation solution,
// should be changed with https://github.com/status-im/status-go/issues/4235
if len(urls) == UnfurledLinksPerMessageLimit {
break
}
}
return urls
}
func NewDefaultHTTPClient() *http.Client {
2023-10-13 12:25:34 +00:00
return &http.Client{Timeout: DefaultRequestTimeout}
}
// UnfurlURLs assumes clients pass URLs verbatim that were validated and
// processed by GetURLs.
2023-10-13 12:25:34 +00:00
func (m *Messenger) UnfurlURLs(httpClient *http.Client, urls []string) (UnfurlURLsResponse, error) {
2023-10-13 14:31:56 +00:00
response := UnfurlURLsResponse{}
s, err := m.getSettings()
if err != nil {
return response, fmt.Errorf("failed to get settigs: %w", err)
}
2023-10-13 14:31:56 +00:00
// Unfurl in a loop
response.LinkPreviews = make([]*common.LinkPreview, 0, len(urls))
response.StatusLinkPreviews = make([]*common.StatusLinkPreview, 0, len(urls))
if httpClient == nil {
httpClient = NewDefaultHTTPClient()
2023-10-13 12:25:34 +00:00
}
for _, url := range urls {
m.logger.Debug("unfurling", zap.String("url", url))
2023-10-13 12:25:34 +00:00
if IsStatusSharedURL(url) {
2023-10-13 12:25:34 +00:00
unfurler := NewStatusUnfurler(url, m, m.logger)
preview, err := unfurler.Unfurl()
if err != nil {
m.logger.Warn("failed to unfurl status link", zap.String("url", url), zap.Error(err))
continue
}
2023-10-13 14:31:56 +00:00
response.StatusLinkPreviews = append(response.StatusLinkPreviews, preview)
2023-10-13 12:25:34 +00:00
continue
}
// `AlwaysAsk` mode should be handled on the app side
// and is considered as equal to `EnableAll` in status-go.
if s.URLUnfurlingMode == settings.URLUnfurlingDisableAll {
continue
}
p, err := m.unfurlURL(httpClient, url)
if err != nil {
2023-10-13 12:25:34 +00:00
m.logger.Warn("failed to unfurl", zap.String("url", url), zap.Error(err))
continue
}
2023-10-13 14:31:56 +00:00
response.LinkPreviews = append(response.LinkPreviews, p)
}
2023-10-13 14:31:56 +00:00
return response, nil
}