diff --git a/VERSION b/VERSION index c128fa2c8..a771be0c9 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.171.12 +0.171.13 diff --git a/protocol/messenger_linkpreview.go b/protocol/messenger_linkpreview.go index 40f661f52..1072af8e9 100644 --- a/protocol/messenger_linkpreview.go +++ b/protocol/messenger_linkpreview.go @@ -3,6 +3,7 @@ package protocol import ( "errors" "fmt" + "math" "net/http" neturl "net/url" "regexp" @@ -19,6 +20,29 @@ import ( const UnfurledLinksPerMessageLimit = 5 +type URLUnfurlPermission int + +const ( + URLUnfurlingAllowed URLUnfurlPermission = iota + URLUnfurlingAskUser + URLUnfurlingForbiddenBySettings + URLUnfurlingNotSupported +) + +type URLUnfurlingMetadata struct { + URL string `json:"url"` + Permission URLUnfurlPermission `json:"permission"` + IsStatusSharedURL bool `json:"isStatusSharedURL"` +} + +type URLsUnfurlPlan struct { + URLs []URLUnfurlingMetadata `json:"urls"` +} + +func URLUnfurlingSupported(url string) bool { + return !strings.HasSuffix(url, ".gif") +} + type UnfurlURLsResponse struct { LinkPreviews []*common.LinkPreview `json:"linkPreviews,omitempty"` StatusLinkPreviews []*common.StatusLinkPreview `json:"statusLinkPreviews,omitempty"` @@ -92,18 +116,23 @@ func parseValidURL(rawURL string) (*neturl.URL, error) { return u, nil } -// GetURLs returns only what we consider unfurleable URLs. -// -// If we wanted to be extra precise and help improve UX, we could ignore URLs -// that we know can't be unfurled. This is at least possible with the oEmbed -// protocol because providers must specify an endpoint scheme. -func GetURLs(text string) []string { +func (m *Messenger) GetTextURLsToUnfurl(text string) *URLsUnfurlPlan { + s, err := m.getSettings() + if err != nil { + // log the error and keep parsing the text + m.logger.Error("GetTextURLsToUnfurl: failed to get settings", zap.Error(err)) + s.URLUnfurlingMode = settings.URLUnfurlingDisableAll + } + + indexedUrls := map[string]struct{}{} + result := &URLsUnfurlPlan{ + // The usage of `UnfurledLinksPerMessageLimit` is quite random here. I wanted to allocate + // some not-zero place here, using the limit number is at least some binding. + URLs: make([]URLUnfurlingMetadata, 0, UnfurledLinksPerMessageLimit), + } parsedText := markdown.Parse([]byte(text), nil) visitor := common.RunLinksVisitor(parsedText) - urls := make([]string, 0, len(visitor.Links)) - indexed := make(map[string]any, len(visitor.Links)) - for _, rawURL := range visitor.Links { parsedURL, err := parseValidURL(rawURL) if err != nil { @@ -116,23 +145,54 @@ func GetURLs(text string) []string { // case-sensitive, some websites encode base64 in the path, etc. parsedURL.Host = strings.ToLower(parsedURL.Host) - idx := parsedURL.String() - // Removes the spurious trailing forward slash. - idx = strings.TrimRight(idx, "/") - if _, exists := indexed[idx]; exists { + url := parsedURL.String() + url = strings.TrimRight(url, "/") // Removes the spurious trailing forward slash. + if _, exists := indexedUrls[url]; exists { continue - } else { - indexed[idx] = nil - urls = append(urls, idx) } - // This is a temporary limitation solution, - // should be changed with https://github.com/status-im/status-go/issues/4235 - if len(urls) == UnfurledLinksPerMessageLimit { + metadata := URLUnfurlingMetadata{ + URL: url, + IsStatusSharedURL: IsStatusSharedURL(url), + } + + if !URLUnfurlingSupported(rawURL) { + metadata.Permission = URLUnfurlingNotSupported + } else if metadata.IsStatusSharedURL { + metadata.Permission = URLUnfurlingAllowed + } else { + switch s.URLUnfurlingMode { + case settings.URLUnfurlingAlwaysAsk: + metadata.Permission = URLUnfurlingAskUser + case settings.URLUnfurlingEnableAll: + metadata.Permission = URLUnfurlingAllowed + case settings.URLUnfurlingDisableAll: + metadata.Permission = URLUnfurlingForbiddenBySettings + default: + metadata.Permission = URLUnfurlingForbiddenBySettings + } + } + + result.URLs = append(result.URLs, metadata) + } + + return result +} + +// Deprecated: GetURLs is deprecated in favor of more generic GetTextURLsToUnfurl. +// +// This is a wrapper around GetTextURLsToUnfurl that returns the list of URLs found in the text +// without any additional information. +func (m *Messenger) GetURLs(text string) []string { + plan := m.GetTextURLsToUnfurl(text) + limit := int(math.Min(UnfurledLinksPerMessageLimit, float64(len(plan.URLs)))) + urls := make([]string, 0, limit) + for _, metadata := range plan.URLs { + urls = append(urls, metadata.URL) + if len(urls) == limit { break } } - return urls } @@ -145,11 +205,6 @@ func NewDefaultHTTPClient() *http.Client { func (m *Messenger) UnfurlURLs(httpClient *http.Client, urls []string) (UnfurlURLsResponse, error) { response := UnfurlURLsResponse{} - s, err := m.getSettings() - if err != nil { - return response, fmt.Errorf("failed to get settigs: %w", err) - } - // Unfurl in a loop response.LinkPreviews = make([]*common.LinkPreview, 0, len(urls)) @@ -173,12 +228,6 @@ func (m *Messenger) UnfurlURLs(httpClient *http.Client, urls []string) (UnfurlUR continue } - // `AlwaysAsk` mode should be handled on the app side - // and is considered as equal to `EnableAll` in status-go. - if s.URLUnfurlingMode == settings.URLUnfurlingDisableAll { - continue - } - p, err := m.unfurlURL(httpClient, url) if err != nil { m.logger.Warn("failed to unfurl", zap.String("url", url), zap.Error(err)) diff --git a/protocol/messenger_linkpreview_test.go b/protocol/messenger_linkpreview_test.go index 0df0e2b76..27e60ac6e 100644 --- a/protocol/messenger_linkpreview_test.go +++ b/protocol/messenger_linkpreview_test.go @@ -8,6 +8,7 @@ import ( "net/http" "net/url" "regexp" + "strings" "testing" "time" @@ -186,7 +187,7 @@ func (s *MessengerLinkPreviewsTestSuite) Test_GetLinks() { } for _, ex := range examples { - links := GetURLs(ex.args) + links := s.m.GetURLs(ex.args) s.Require().Equal(ex.expected, links, "Failed for args: '%s'", ex.args) } } @@ -606,86 +607,82 @@ func (s *MessengerLinkPreviewsTestSuite) Test_UnfurlURLs_StatusCommunityJoined() } func (s *MessengerLinkPreviewsTestSuite) Test_UnfurlURLs_Settings() { - // Create website stub - ogLink := "https://github.com" - requestsCount := 0 + const ogLink = "https://github.com" + const statusUserLink = "https://status.app/c#zQ3shYSHp7GoiXaauJMnDcjwU2yNjdzpXLosAWapPS4CFxc11" + const gifLink = "https://media1.giphy.com/media/lcG3qwtTKSNI2i5vst/giphy.gif" - transport := StubTransport{} - transport.AddURLMatcherRoundTrip( - ogLink, - func(req *http.Request) *http.Response { - requestsCount++ - responseBody := []byte(``) - return &http.Response{ - StatusCode: http.StatusOK, - Body: ioutil.NopCloser(bytes.NewBuffer(responseBody)), - } - }, - ) - stubbedClient := http.Client{Transport: &transport} - - // Add contact - identity, err := crypto.GenerateKey() - s.Require().NoError(err) - - c, err := BuildContactFromPublicKey(&identity.PublicKey) - s.Require().NoError(err) - s.Require().NotNil(c) - - c.Bio = "TestBio_1" - c.DisplayName = "TestDisplayName_2" - s.m.allContacts.Store(c.ID, c) - statusUserLink, err := s.m.ShareUserURLWithData(c.ID) - s.Require().NoError(err) - - linksToUnfurl := []string{ogLink, statusUserLink} + linksToUnfurl := []string{ogLink, statusUserLink, gifLink} + text := strings.Join(linksToUnfurl, " ") // Test `AlwaysAsk` - // NOTE: on status-go side `AlwaysAsk` == `EnableAll`, "asking" should be processed by the app - requestsCount = 0 - err = s.m.settings.SaveSettingField(settings.URLUnfurlingMode, settings.URLUnfurlingAlwaysAsk) + err := s.m.settings.SaveSettingField(settings.URLUnfurlingMode, settings.URLUnfurlingAlwaysAsk) s.Require().NoError(err) - linkPreviews, err := s.m.UnfurlURLs(&stubbedClient, linksToUnfurl) - s.Require().NoError(err) - s.Require().Len(linkPreviews.LinkPreviews, 1) - s.Require().Len(linkPreviews.StatusLinkPreviews, 1) - s.Require().Equal(requestsCount, 1) + plan := s.m.GetTextURLsToUnfurl(text) + s.Require().Len(plan.URLs, len(linksToUnfurl)) + + s.Require().Equal(plan.URLs[0].URL, ogLink) + s.Require().Equal(plan.URLs[0].IsStatusSharedURL, false) + s.Require().Equal(plan.URLs[0].Permission, URLUnfurlingAskUser) + + s.Require().Equal(plan.URLs[1].URL, statusUserLink) + s.Require().Equal(plan.URLs[1].IsStatusSharedURL, true) + s.Require().Equal(plan.URLs[1].Permission, URLUnfurlingAllowed) + + s.Require().Equal(plan.URLs[2].URL, gifLink) + s.Require().Equal(plan.URLs[2].IsStatusSharedURL, false) + s.Require().Equal(plan.URLs[2].Permission, URLUnfurlingNotSupported) // Test `EnableAll` - requestsCount = 0 err = s.m.settings.SaveSettingField(settings.URLUnfurlingMode, settings.URLUnfurlingEnableAll) s.Require().NoError(err) - linkPreviews, err = s.m.UnfurlURLs(&stubbedClient, linksToUnfurl) - s.Require().NoError(err) - s.Require().Len(linkPreviews.LinkPreviews, 1) - s.Require().Len(linkPreviews.StatusLinkPreviews, 1) - s.Require().Equal(requestsCount, 1) + plan = s.m.GetTextURLsToUnfurl(text) + s.Require().Len(plan.URLs, len(linksToUnfurl)) + + s.Require().Equal(plan.URLs[0].URL, ogLink) + s.Require().Equal(plan.URLs[0].IsStatusSharedURL, false) + s.Require().Equal(plan.URLs[0].Permission, URLUnfurlingAllowed) + + s.Require().Equal(plan.URLs[1].URL, statusUserLink) + s.Require().Equal(plan.URLs[1].IsStatusSharedURL, true) + s.Require().Equal(plan.URLs[1].Permission, URLUnfurlingAllowed) + + s.Require().Equal(plan.URLs[2].URL, gifLink) + s.Require().Equal(plan.URLs[2].IsStatusSharedURL, false) + s.Require().Equal(plan.URLs[2].Permission, URLUnfurlingNotSupported) // Test `DisableAll` - requestsCount = 0 err = s.m.settings.SaveSettingField(settings.URLUnfurlingMode, settings.URLUnfurlingDisableAll) s.Require().NoError(err) - linkPreviews, err = s.m.UnfurlURLs(&stubbedClient, linksToUnfurl) - s.Require().NoError(err) - s.Require().Len(linkPreviews.LinkPreviews, 0) - s.Require().Len(linkPreviews.StatusLinkPreviews, 1) // Status links are always unfurled - s.Require().Equal(requestsCount, 0) + plan = s.m.GetTextURLsToUnfurl(text) + s.Require().Len(plan.URLs, len(linksToUnfurl)) + + s.Require().Equal(plan.URLs[0].URL, ogLink) + s.Require().Equal(plan.URLs[0].IsStatusSharedURL, false) + s.Require().Equal(plan.URLs[0].Permission, URLUnfurlingForbiddenBySettings) + + s.Require().Equal(plan.URLs[1].URL, statusUserLink) + s.Require().Equal(plan.URLs[1].IsStatusSharedURL, true) + s.Require().Equal(plan.URLs[1].Permission, URLUnfurlingAllowed) + + s.Require().Equal(plan.URLs[2].URL, gifLink) + s.Require().Equal(plan.URLs[2].IsStatusSharedURL, false) + s.Require().Equal(plan.URLs[2].Permission, URLUnfurlingNotSupported) } func (s *MessengerLinkPreviewsTestSuite) Test_UnfurlURLs_Limit() { - linksToUnfurl := "https://www.youtube.com/watch?v=6dkDepLX0rk " + + text := "https://www.youtube.com/watch?v=6dkDepLX0rk " + "https://www.youtube.com/watch?v=ferZnZ0_rSM " + "https://www.youtube.com/watch?v=bdneye4pzMw " + "https://www.youtube.com/watch?v=pRERgcQe-fQ " + "https://www.youtube.com/watch?v=j82L3pLjb_0 " + "https://www.youtube.com/watch?v=hxsJvKYyVyg " + - "https://www.youtube.com/watch?v=jIIuzB11dsA" + "https://www.youtube.com/watch?v=jIIuzB11dsA " - urls := GetURLs(linksToUnfurl) + urls := s.m.GetURLs(text) s.Require().Equal(UnfurledLinksPerMessageLimit, len(urls)) } diff --git a/services/ext/api.go b/services/ext/api.go index aea09c530..1750c3903 100644 --- a/services/ext/api.go +++ b/services/ext/api.go @@ -1187,10 +1187,19 @@ func (api *PublicAPI) GetLinkPreviewData(link string) (previewData urls.LinkPrev return urls.GetLinkPreviewData(link) } +// GetTextURLsToUnfurl parses text and returns a deduplicated and (somewhat) normalized +// slice of URLs. The returned URLs can be used as cache keys by clients. +// For each URL there's a corresponding metadata which should be used as to plan the unfurling. +func (api *PublicAPI) GetTextURLsToUnfurl(text string) *protocol.URLsUnfurlPlan { + return api.service.messenger.GetTextURLsToUnfurl(text) +} + +// Deprecated: GetTextURLs is deprecated in favor of more generic GetTextURLsToUnfurl. +// // GetTextURLs parses text and returns a deduplicated and (somewhat) normalized // slice of URLs. The returned URLs can be used as cache keys by clients. func (api *PublicAPI) GetTextURLs(text string) []string { - return protocol.GetURLs(text) + return api.service.messenger.GetURLs(text) } // UnfurlURLs uses a best-effort approach to unfurl each URL. Failed URLs will