2023-08-21 22:31:32 +03:00
|
|
|
package protocol
|
|
|
|
|
|
|
|
import (
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
2023-11-17 13:32:37 +00:00
|
|
|
"math"
|
2023-08-21 22:31:32 +03:00
|
|
|
"net/http"
|
|
|
|
neturl "net/url"
|
|
|
|
"regexp"
|
|
|
|
"strings"
|
|
|
|
|
|
|
|
"go.uber.org/zap"
|
|
|
|
"golang.org/x/net/publicsuffix"
|
|
|
|
|
2023-10-24 11:42:56 +08:00
|
|
|
"github.com/status-im/markdown"
|
|
|
|
|
2023-10-13 15:31:56 +01:00
|
|
|
"github.com/status-im/status-go/multiaccounts/settings"
|
2023-08-21 22:31:32 +03:00
|
|
|
"github.com/status-im/status-go/protocol/common"
|
|
|
|
)
|
|
|
|
|
2023-11-10 10:32:58 +00:00
|
|
|
const UnfurledLinksPerMessageLimit = 5
|
|
|
|
|
2023-11-17 13:32:37 +00:00
|
|
|
type URLUnfurlPermission int
|
|
|
|
|
|
|
|
const (
|
|
|
|
URLUnfurlingAllowed URLUnfurlPermission = iota
|
|
|
|
URLUnfurlingAskUser
|
|
|
|
URLUnfurlingForbiddenBySettings
|
|
|
|
URLUnfurlingNotSupported
|
|
|
|
)
|
|
|
|
|
|
|
|
type URLUnfurlingMetadata struct {
|
|
|
|
URL string `json:"url"`
|
|
|
|
Permission URLUnfurlPermission `json:"permission"`
|
|
|
|
IsStatusSharedURL bool `json:"isStatusSharedURL"`
|
|
|
|
}
|
|
|
|
|
|
|
|
type URLsUnfurlPlan struct {
|
|
|
|
URLs []URLUnfurlingMetadata `json:"urls"`
|
|
|
|
}
|
|
|
|
|
|
|
|
func URLUnfurlingSupported(url string) bool {
|
|
|
|
return !strings.HasSuffix(url, ".gif")
|
|
|
|
}
|
|
|
|
|
2023-10-13 13:25:34 +01:00
|
|
|
type UnfurlURLsResponse struct {
|
|
|
|
LinkPreviews []*common.LinkPreview `json:"linkPreviews,omitempty"`
|
|
|
|
StatusLinkPreviews []*common.StatusLinkPreview `json:"statusLinkPreviews,omitempty"`
|
2023-08-21 22:31:32 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
func normalizeHostname(hostname string) string {
|
|
|
|
hostname = strings.ToLower(hostname)
|
|
|
|
re := regexp.MustCompile(`^www\.(.*)$`)
|
|
|
|
return re.ReplaceAllString(hostname, "$1")
|
|
|
|
}
|
|
|
|
|
2023-10-13 13:25:34 +01:00
|
|
|
func (m *Messenger) newURLUnfurler(httpClient *http.Client, url *neturl.URL) Unfurler {
|
|
|
|
|
|
|
|
if IsSupportedImageURL(url) {
|
|
|
|
return NewImageUnfurler(
|
2023-08-21 22:31:32 +03:00
|
|
|
url,
|
|
|
|
m.logger,
|
|
|
|
httpClient)
|
|
|
|
}
|
|
|
|
|
|
|
|
switch normalizeHostname(url.Hostname()) {
|
|
|
|
case "reddit.com":
|
2023-10-13 13:25:34 +01:00
|
|
|
return NewOEmbedUnfurler(
|
2023-08-21 22:31:32 +03:00
|
|
|
"https://www.reddit.com/oembed",
|
|
|
|
url,
|
|
|
|
m.logger,
|
|
|
|
httpClient)
|
|
|
|
default:
|
2023-10-13 13:25:34 +01:00
|
|
|
return NewOpenGraphUnfurler(
|
2023-08-21 22:31:32 +03:00
|
|
|
url,
|
|
|
|
m.logger,
|
|
|
|
httpClient)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-10-13 13:25:34 +01:00
|
|
|
func (m *Messenger) unfurlURL(httpClient *http.Client, url string) (*common.LinkPreview, error) {
|
|
|
|
preview := new(common.LinkPreview)
|
2023-08-21 22:31:32 +03:00
|
|
|
|
|
|
|
parsedURL, err := neturl.Parse(url)
|
|
|
|
if err != nil {
|
|
|
|
return preview, err
|
|
|
|
}
|
|
|
|
|
|
|
|
unfurler := m.newURLUnfurler(httpClient, parsedURL)
|
|
|
|
preview, err = unfurler.Unfurl()
|
|
|
|
if err != nil {
|
|
|
|
return preview, err
|
|
|
|
}
|
|
|
|
preview.Hostname = strings.ToLower(parsedURL.Hostname())
|
|
|
|
|
|
|
|
return preview, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// parseValidURL is a stricter version of url.Parse that performs additional
|
|
|
|
// checks to ensure the URL is valid for clients to request a link preview.
|
|
|
|
func parseValidURL(rawURL string) (*neturl.URL, error) {
|
|
|
|
u, err := neturl.Parse(rawURL)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("parsing URL failed: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if u.Scheme == "" {
|
|
|
|
return nil, errors.New("missing URL scheme")
|
|
|
|
}
|
|
|
|
|
|
|
|
_, err = publicsuffix.EffectiveTLDPlusOne(u.Hostname())
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("missing known URL domain: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return u, nil
|
|
|
|
}
|
|
|
|
|
2023-11-17 13:32:37 +00:00
|
|
|
func (m *Messenger) GetTextURLsToUnfurl(text string) *URLsUnfurlPlan {
|
|
|
|
s, err := m.getSettings()
|
|
|
|
if err != nil {
|
|
|
|
// log the error and keep parsing the text
|
|
|
|
m.logger.Error("GetTextURLsToUnfurl: failed to get settings", zap.Error(err))
|
|
|
|
s.URLUnfurlingMode = settings.URLUnfurlingDisableAll
|
|
|
|
}
|
|
|
|
|
|
|
|
indexedUrls := map[string]struct{}{}
|
|
|
|
result := &URLsUnfurlPlan{
|
|
|
|
// The usage of `UnfurledLinksPerMessageLimit` is quite random here. I wanted to allocate
|
|
|
|
// some not-zero place here, using the limit number is at least some binding.
|
|
|
|
URLs: make([]URLUnfurlingMetadata, 0, UnfurledLinksPerMessageLimit),
|
|
|
|
}
|
2023-08-21 22:31:32 +03:00
|
|
|
parsedText := markdown.Parse([]byte(text), nil)
|
|
|
|
visitor := common.RunLinksVisitor(parsedText)
|
|
|
|
|
|
|
|
for _, rawURL := range visitor.Links {
|
|
|
|
parsedURL, err := parseValidURL(rawURL)
|
|
|
|
if err != nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
// Lowercase the host so the URL can be used as a cache key. Particularly on
|
|
|
|
// mobile clients it is common that the first character in a text input is
|
|
|
|
// automatically uppercased. In WhatsApp they incorrectly lowercase the
|
|
|
|
// URL's path, but this is incorrect. For instance, some URL shorteners are
|
|
|
|
// case-sensitive, some websites encode base64 in the path, etc.
|
|
|
|
parsedURL.Host = strings.ToLower(parsedURL.Host)
|
|
|
|
|
2023-11-17 13:32:37 +00:00
|
|
|
url := parsedURL.String()
|
|
|
|
url = strings.TrimRight(url, "/") // Removes the spurious trailing forward slash.
|
|
|
|
if _, exists := indexedUrls[url]; exists {
|
2023-08-21 22:31:32 +03:00
|
|
|
continue
|
2023-11-17 13:32:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
metadata := URLUnfurlingMetadata{
|
|
|
|
URL: url,
|
|
|
|
IsStatusSharedURL: IsStatusSharedURL(url),
|
|
|
|
}
|
|
|
|
|
|
|
|
if !URLUnfurlingSupported(rawURL) {
|
|
|
|
metadata.Permission = URLUnfurlingNotSupported
|
|
|
|
} else if metadata.IsStatusSharedURL {
|
|
|
|
metadata.Permission = URLUnfurlingAllowed
|
2023-08-21 22:31:32 +03:00
|
|
|
} else {
|
2023-11-17 13:32:37 +00:00
|
|
|
switch s.URLUnfurlingMode {
|
|
|
|
case settings.URLUnfurlingAlwaysAsk:
|
|
|
|
metadata.Permission = URLUnfurlingAskUser
|
|
|
|
case settings.URLUnfurlingEnableAll:
|
|
|
|
metadata.Permission = URLUnfurlingAllowed
|
|
|
|
case settings.URLUnfurlingDisableAll:
|
|
|
|
metadata.Permission = URLUnfurlingForbiddenBySettings
|
|
|
|
default:
|
|
|
|
metadata.Permission = URLUnfurlingForbiddenBySettings
|
|
|
|
}
|
2023-08-21 22:31:32 +03:00
|
|
|
}
|
2023-11-10 10:32:58 +00:00
|
|
|
|
2023-11-17 13:32:37 +00:00
|
|
|
result.URLs = append(result.URLs, metadata)
|
|
|
|
}
|
|
|
|
|
|
|
|
return result
|
|
|
|
}
|
|
|
|
|
|
|
|
// Deprecated: GetURLs is deprecated in favor of more generic GetTextURLsToUnfurl.
|
|
|
|
//
|
|
|
|
// This is a wrapper around GetTextURLsToUnfurl that returns the list of URLs found in the text
|
|
|
|
// without any additional information.
|
|
|
|
func (m *Messenger) GetURLs(text string) []string {
|
|
|
|
plan := m.GetTextURLsToUnfurl(text)
|
|
|
|
limit := int(math.Min(UnfurledLinksPerMessageLimit, float64(len(plan.URLs))))
|
|
|
|
urls := make([]string, 0, limit)
|
|
|
|
for _, metadata := range plan.URLs {
|
|
|
|
urls = append(urls, metadata.URL)
|
|
|
|
if len(urls) == limit {
|
2023-11-10 10:32:58 +00:00
|
|
|
break
|
|
|
|
}
|
2023-08-21 22:31:32 +03:00
|
|
|
}
|
|
|
|
return urls
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewDefaultHTTPClient() *http.Client {
|
2023-10-13 13:25:34 +01:00
|
|
|
return &http.Client{Timeout: DefaultRequestTimeout}
|
2023-08-21 22:31:32 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// UnfurlURLs assumes clients pass URLs verbatim that were validated and
|
|
|
|
// processed by GetURLs.
|
2023-10-13 13:25:34 +01:00
|
|
|
func (m *Messenger) UnfurlURLs(httpClient *http.Client, urls []string) (UnfurlURLsResponse, error) {
|
2023-10-13 15:31:56 +01:00
|
|
|
response := UnfurlURLsResponse{}
|
|
|
|
|
|
|
|
// Unfurl in a loop
|
|
|
|
|
|
|
|
response.LinkPreviews = make([]*common.LinkPreview, 0, len(urls))
|
|
|
|
response.StatusLinkPreviews = make([]*common.StatusLinkPreview, 0, len(urls))
|
|
|
|
|
|
|
|
if httpClient == nil {
|
|
|
|
httpClient = NewDefaultHTTPClient()
|
2023-10-13 13:25:34 +01:00
|
|
|
}
|
2023-08-21 22:31:32 +03:00
|
|
|
|
|
|
|
for _, url := range urls {
|
|
|
|
m.logger.Debug("unfurling", zap.String("url", url))
|
2023-10-13 13:25:34 +01:00
|
|
|
|
2023-11-10 15:00:03 +00:00
|
|
|
if IsStatusSharedURL(url) {
|
2023-10-13 13:25:34 +01:00
|
|
|
unfurler := NewStatusUnfurler(url, m, m.logger)
|
|
|
|
preview, err := unfurler.Unfurl()
|
|
|
|
if err != nil {
|
|
|
|
m.logger.Warn("failed to unfurl status link", zap.String("url", url), zap.Error(err))
|
|
|
|
continue
|
|
|
|
}
|
2023-10-13 15:31:56 +01:00
|
|
|
response.StatusLinkPreviews = append(response.StatusLinkPreviews, preview)
|
2023-10-13 13:25:34 +01:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2023-08-21 22:31:32 +03:00
|
|
|
p, err := m.unfurlURL(httpClient, url)
|
|
|
|
if err != nil {
|
2023-10-13 13:25:34 +01:00
|
|
|
m.logger.Warn("failed to unfurl", zap.String("url", url), zap.Error(err))
|
2023-08-21 22:31:32 +03:00
|
|
|
continue
|
|
|
|
}
|
2023-10-13 15:31:56 +01:00
|
|
|
response.LinkPreviews = append(response.LinkPreviews, p)
|
2023-08-21 22:31:32 +03:00
|
|
|
}
|
|
|
|
|
2023-10-13 15:31:56 +01:00
|
|
|
return response, nil
|
2023-08-21 22:31:32 +03:00
|
|
|
}
|