2023-08-21 19:31:32 +00:00
|
|
|
package protocol
|
|
|
|
|
|
|
|
import (
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"net/http"
|
|
|
|
neturl "net/url"
|
|
|
|
"regexp"
|
|
|
|
"strings"
|
|
|
|
|
|
|
|
"go.uber.org/zap"
|
|
|
|
"golang.org/x/net/publicsuffix"
|
|
|
|
|
2023-10-24 03:42:56 +00:00
|
|
|
"github.com/status-im/markdown"
|
|
|
|
|
2023-10-13 14:31:56 +00:00
|
|
|
"github.com/status-im/status-go/multiaccounts/settings"
|
2023-08-21 19:31:32 +00:00
|
|
|
"github.com/status-im/status-go/protocol/common"
|
|
|
|
)
|
|
|
|
|
2023-11-10 10:32:58 +00:00
|
|
|
const UnfurledLinksPerMessageLimit = 5
|
|
|
|
|
2023-10-13 12:25:34 +00:00
|
|
|
type UnfurlURLsResponse struct {
|
|
|
|
LinkPreviews []*common.LinkPreview `json:"linkPreviews,omitempty"`
|
|
|
|
StatusLinkPreviews []*common.StatusLinkPreview `json:"statusLinkPreviews,omitempty"`
|
2023-08-21 19:31:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func normalizeHostname(hostname string) string {
|
|
|
|
hostname = strings.ToLower(hostname)
|
|
|
|
re := regexp.MustCompile(`^www\.(.*)$`)
|
|
|
|
return re.ReplaceAllString(hostname, "$1")
|
|
|
|
}
|
|
|
|
|
2023-10-13 12:25:34 +00:00
|
|
|
func (m *Messenger) newURLUnfurler(httpClient *http.Client, url *neturl.URL) Unfurler {
|
|
|
|
|
|
|
|
if IsSupportedImageURL(url) {
|
|
|
|
return NewImageUnfurler(
|
2023-08-21 19:31:32 +00:00
|
|
|
url,
|
|
|
|
m.logger,
|
|
|
|
httpClient)
|
|
|
|
}
|
|
|
|
|
|
|
|
switch normalizeHostname(url.Hostname()) {
|
|
|
|
case "reddit.com":
|
2023-10-13 12:25:34 +00:00
|
|
|
return NewOEmbedUnfurler(
|
2023-08-21 19:31:32 +00:00
|
|
|
"https://www.reddit.com/oembed",
|
|
|
|
url,
|
|
|
|
m.logger,
|
|
|
|
httpClient)
|
|
|
|
default:
|
2023-10-13 12:25:34 +00:00
|
|
|
return NewOpenGraphUnfurler(
|
2023-08-21 19:31:32 +00:00
|
|
|
url,
|
|
|
|
m.logger,
|
|
|
|
httpClient)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-10-13 12:25:34 +00:00
|
|
|
func (m *Messenger) unfurlURL(httpClient *http.Client, url string) (*common.LinkPreview, error) {
|
|
|
|
preview := new(common.LinkPreview)
|
2023-08-21 19:31:32 +00:00
|
|
|
|
|
|
|
parsedURL, err := neturl.Parse(url)
|
|
|
|
if err != nil {
|
|
|
|
return preview, err
|
|
|
|
}
|
|
|
|
|
|
|
|
unfurler := m.newURLUnfurler(httpClient, parsedURL)
|
|
|
|
preview, err = unfurler.Unfurl()
|
|
|
|
if err != nil {
|
|
|
|
return preview, err
|
|
|
|
}
|
|
|
|
preview.Hostname = strings.ToLower(parsedURL.Hostname())
|
|
|
|
|
|
|
|
return preview, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// parseValidURL is a stricter version of url.Parse that performs additional
|
|
|
|
// checks to ensure the URL is valid for clients to request a link preview.
|
|
|
|
func parseValidURL(rawURL string) (*neturl.URL, error) {
|
|
|
|
u, err := neturl.Parse(rawURL)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("parsing URL failed: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if u.Scheme == "" {
|
|
|
|
return nil, errors.New("missing URL scheme")
|
|
|
|
}
|
|
|
|
|
|
|
|
_, err = publicsuffix.EffectiveTLDPlusOne(u.Hostname())
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("missing known URL domain: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return u, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// GetURLs returns only what we consider unfurleable URLs.
|
|
|
|
//
|
|
|
|
// If we wanted to be extra precise and help improve UX, we could ignore URLs
|
|
|
|
// that we know can't be unfurled. This is at least possible with the oEmbed
|
|
|
|
// protocol because providers must specify an endpoint scheme.
|
|
|
|
func GetURLs(text string) []string {
|
|
|
|
parsedText := markdown.Parse([]byte(text), nil)
|
|
|
|
visitor := common.RunLinksVisitor(parsedText)
|
|
|
|
|
|
|
|
urls := make([]string, 0, len(visitor.Links))
|
|
|
|
indexed := make(map[string]any, len(visitor.Links))
|
|
|
|
|
|
|
|
for _, rawURL := range visitor.Links {
|
|
|
|
parsedURL, err := parseValidURL(rawURL)
|
|
|
|
if err != nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
// Lowercase the host so the URL can be used as a cache key. Particularly on
|
|
|
|
// mobile clients it is common that the first character in a text input is
|
|
|
|
// automatically uppercased. In WhatsApp they incorrectly lowercase the
|
|
|
|
// URL's path, but this is incorrect. For instance, some URL shorteners are
|
|
|
|
// case-sensitive, some websites encode base64 in the path, etc.
|
|
|
|
parsedURL.Host = strings.ToLower(parsedURL.Host)
|
|
|
|
|
|
|
|
idx := parsedURL.String()
|
|
|
|
// Removes the spurious trailing forward slash.
|
|
|
|
idx = strings.TrimRight(idx, "/")
|
|
|
|
if _, exists := indexed[idx]; exists {
|
|
|
|
continue
|
|
|
|
} else {
|
|
|
|
indexed[idx] = nil
|
|
|
|
urls = append(urls, idx)
|
|
|
|
}
|
2023-11-10 10:32:58 +00:00
|
|
|
|
|
|
|
// This is a temporary limitation solution,
|
|
|
|
// should be changed with https://github.com/status-im/status-go/issues/4235
|
|
|
|
if len(urls) == UnfurledLinksPerMessageLimit {
|
|
|
|
break
|
|
|
|
}
|
2023-08-21 19:31:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return urls
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewDefaultHTTPClient() *http.Client {
|
2023-10-13 12:25:34 +00:00
|
|
|
return &http.Client{Timeout: DefaultRequestTimeout}
|
2023-08-21 19:31:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// UnfurlURLs assumes clients pass URLs verbatim that were validated and
|
|
|
|
// processed by GetURLs.
|
2023-10-13 12:25:34 +00:00
|
|
|
func (m *Messenger) UnfurlURLs(httpClient *http.Client, urls []string) (UnfurlURLsResponse, error) {
|
2023-10-13 14:31:56 +00:00
|
|
|
response := UnfurlURLsResponse{}
|
|
|
|
|
|
|
|
s, err := m.getSettings()
|
|
|
|
if err != nil {
|
|
|
|
return response, fmt.Errorf("failed to get settigs: %w", err)
|
2023-08-21 19:31:32 +00:00
|
|
|
}
|
|
|
|
|
2023-10-13 14:31:56 +00:00
|
|
|
// Unfurl in a loop
|
|
|
|
|
|
|
|
response.LinkPreviews = make([]*common.LinkPreview, 0, len(urls))
|
|
|
|
response.StatusLinkPreviews = make([]*common.StatusLinkPreview, 0, len(urls))
|
|
|
|
|
|
|
|
if httpClient == nil {
|
|
|
|
httpClient = NewDefaultHTTPClient()
|
2023-10-13 12:25:34 +00:00
|
|
|
}
|
2023-08-21 19:31:32 +00:00
|
|
|
|
|
|
|
for _, url := range urls {
|
|
|
|
m.logger.Debug("unfurling", zap.String("url", url))
|
2023-10-13 12:25:34 +00:00
|
|
|
|
2023-11-10 15:00:03 +00:00
|
|
|
if IsStatusSharedURL(url) {
|
2023-10-13 12:25:34 +00:00
|
|
|
unfurler := NewStatusUnfurler(url, m, m.logger)
|
|
|
|
preview, err := unfurler.Unfurl()
|
|
|
|
if err != nil {
|
|
|
|
m.logger.Warn("failed to unfurl status link", zap.String("url", url), zap.Error(err))
|
|
|
|
continue
|
|
|
|
}
|
2023-10-13 14:31:56 +00:00
|
|
|
response.StatusLinkPreviews = append(response.StatusLinkPreviews, preview)
|
2023-10-13 12:25:34 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2023-10-17 07:25:45 +00:00
|
|
|
// `AlwaysAsk` mode should be handled on the app side
|
|
|
|
// and is considered as equal to `EnableAll` in status-go.
|
|
|
|
if s.URLUnfurlingMode == settings.URLUnfurlingDisableAll {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2023-08-21 19:31:32 +00:00
|
|
|
p, err := m.unfurlURL(httpClient, url)
|
|
|
|
if err != nil {
|
2023-10-13 12:25:34 +00:00
|
|
|
m.logger.Warn("failed to unfurl", zap.String("url", url), zap.Error(err))
|
2023-08-21 19:31:32 +00:00
|
|
|
continue
|
|
|
|
}
|
2023-10-13 14:31:56 +00:00
|
|
|
response.LinkPreviews = append(response.LinkPreviews, p)
|
2023-08-21 19:31:32 +00:00
|
|
|
}
|
|
|
|
|
2023-10-13 14:31:56 +00:00
|
|
|
return response, nil
|
2023-08-21 19:31:32 +00:00
|
|
|
}
|