feat: new `GetTextURLsToUnfurl` endpoint which extends GetURLs (#4294)

This commit is contained in:
Igor Sirotin 2023-11-17 13:32:37 +00:00 committed by GitHub
parent 4ae9c02e57
commit 2fef9a8f8f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 144 additions and 89 deletions

View File

@ -1 +1 @@
0.171.12 0.171.13

View File

@ -3,6 +3,7 @@ package protocol
import ( import (
"errors" "errors"
"fmt" "fmt"
"math"
"net/http" "net/http"
neturl "net/url" neturl "net/url"
"regexp" "regexp"
@ -19,6 +20,29 @@ import (
const UnfurledLinksPerMessageLimit = 5 const UnfurledLinksPerMessageLimit = 5
type URLUnfurlPermission int
const (
URLUnfurlingAllowed URLUnfurlPermission = iota
URLUnfurlingAskUser
URLUnfurlingForbiddenBySettings
URLUnfurlingNotSupported
)
type URLUnfurlingMetadata struct {
URL string `json:"url"`
Permission URLUnfurlPermission `json:"permission"`
IsStatusSharedURL bool `json:"isStatusSharedURL"`
}
type URLsUnfurlPlan struct {
URLs []URLUnfurlingMetadata `json:"urls"`
}
func URLUnfurlingSupported(url string) bool {
return !strings.HasSuffix(url, ".gif")
}
type UnfurlURLsResponse struct { type UnfurlURLsResponse struct {
LinkPreviews []*common.LinkPreview `json:"linkPreviews,omitempty"` LinkPreviews []*common.LinkPreview `json:"linkPreviews,omitempty"`
StatusLinkPreviews []*common.StatusLinkPreview `json:"statusLinkPreviews,omitempty"` StatusLinkPreviews []*common.StatusLinkPreview `json:"statusLinkPreviews,omitempty"`
@ -92,18 +116,23 @@ func parseValidURL(rawURL string) (*neturl.URL, error) {
return u, nil return u, nil
} }
// GetURLs returns only what we consider unfurleable URLs. func (m *Messenger) GetTextURLsToUnfurl(text string) *URLsUnfurlPlan {
// s, err := m.getSettings()
// If we wanted to be extra precise and help improve UX, we could ignore URLs if err != nil {
// that we know can't be unfurled. This is at least possible with the oEmbed // log the error and keep parsing the text
// protocol because providers must specify an endpoint scheme. m.logger.Error("GetTextURLsToUnfurl: failed to get settings", zap.Error(err))
func GetURLs(text string) []string { s.URLUnfurlingMode = settings.URLUnfurlingDisableAll
}
indexedUrls := map[string]struct{}{}
result := &URLsUnfurlPlan{
// The usage of `UnfurledLinksPerMessageLimit` is quite random here. I wanted to allocate
// some not-zero place here, using the limit number is at least some binding.
URLs: make([]URLUnfurlingMetadata, 0, UnfurledLinksPerMessageLimit),
}
parsedText := markdown.Parse([]byte(text), nil) parsedText := markdown.Parse([]byte(text), nil)
visitor := common.RunLinksVisitor(parsedText) visitor := common.RunLinksVisitor(parsedText)
urls := make([]string, 0, len(visitor.Links))
indexed := make(map[string]any, len(visitor.Links))
for _, rawURL := range visitor.Links { for _, rawURL := range visitor.Links {
parsedURL, err := parseValidURL(rawURL) parsedURL, err := parseValidURL(rawURL)
if err != nil { if err != nil {
@ -116,23 +145,54 @@ func GetURLs(text string) []string {
// case-sensitive, some websites encode base64 in the path, etc. // case-sensitive, some websites encode base64 in the path, etc.
parsedURL.Host = strings.ToLower(parsedURL.Host) parsedURL.Host = strings.ToLower(parsedURL.Host)
idx := parsedURL.String() url := parsedURL.String()
// Removes the spurious trailing forward slash. url = strings.TrimRight(url, "/") // Removes the spurious trailing forward slash.
idx = strings.TrimRight(idx, "/") if _, exists := indexedUrls[url]; exists {
if _, exists := indexed[idx]; exists {
continue continue
} else {
indexed[idx] = nil
urls = append(urls, idx)
} }
// This is a temporary limitation solution, metadata := URLUnfurlingMetadata{
// should be changed with https://github.com/status-im/status-go/issues/4235 URL: url,
if len(urls) == UnfurledLinksPerMessageLimit { IsStatusSharedURL: IsStatusSharedURL(url),
}
if !URLUnfurlingSupported(rawURL) {
metadata.Permission = URLUnfurlingNotSupported
} else if metadata.IsStatusSharedURL {
metadata.Permission = URLUnfurlingAllowed
} else {
switch s.URLUnfurlingMode {
case settings.URLUnfurlingAlwaysAsk:
metadata.Permission = URLUnfurlingAskUser
case settings.URLUnfurlingEnableAll:
metadata.Permission = URLUnfurlingAllowed
case settings.URLUnfurlingDisableAll:
metadata.Permission = URLUnfurlingForbiddenBySettings
default:
metadata.Permission = URLUnfurlingForbiddenBySettings
}
}
result.URLs = append(result.URLs, metadata)
}
return result
}
// Deprecated: GetURLs is deprecated in favor of more generic GetTextURLsToUnfurl.
//
// This is a wrapper around GetTextURLsToUnfurl that returns the list of URLs found in the text
// without any additional information.
func (m *Messenger) GetURLs(text string) []string {
plan := m.GetTextURLsToUnfurl(text)
limit := int(math.Min(UnfurledLinksPerMessageLimit, float64(len(plan.URLs))))
urls := make([]string, 0, limit)
for _, metadata := range plan.URLs {
urls = append(urls, metadata.URL)
if len(urls) == limit {
break break
} }
} }
return urls return urls
} }
@ -145,11 +205,6 @@ func NewDefaultHTTPClient() *http.Client {
func (m *Messenger) UnfurlURLs(httpClient *http.Client, urls []string) (UnfurlURLsResponse, error) { func (m *Messenger) UnfurlURLs(httpClient *http.Client, urls []string) (UnfurlURLsResponse, error) {
response := UnfurlURLsResponse{} response := UnfurlURLsResponse{}
s, err := m.getSettings()
if err != nil {
return response, fmt.Errorf("failed to get settigs: %w", err)
}
// Unfurl in a loop // Unfurl in a loop
response.LinkPreviews = make([]*common.LinkPreview, 0, len(urls)) response.LinkPreviews = make([]*common.LinkPreview, 0, len(urls))
@ -173,12 +228,6 @@ func (m *Messenger) UnfurlURLs(httpClient *http.Client, urls []string) (UnfurlUR
continue continue
} }
// `AlwaysAsk` mode should be handled on the app side
// and is considered as equal to `EnableAll` in status-go.
if s.URLUnfurlingMode == settings.URLUnfurlingDisableAll {
continue
}
p, err := m.unfurlURL(httpClient, url) p, err := m.unfurlURL(httpClient, url)
if err != nil { if err != nil {
m.logger.Warn("failed to unfurl", zap.String("url", url), zap.Error(err)) m.logger.Warn("failed to unfurl", zap.String("url", url), zap.Error(err))

View File

@ -8,6 +8,7 @@ import (
"net/http" "net/http"
"net/url" "net/url"
"regexp" "regexp"
"strings"
"testing" "testing"
"time" "time"
@ -186,7 +187,7 @@ func (s *MessengerLinkPreviewsTestSuite) Test_GetLinks() {
} }
for _, ex := range examples { for _, ex := range examples {
links := GetURLs(ex.args) links := s.m.GetURLs(ex.args)
s.Require().Equal(ex.expected, links, "Failed for args: '%s'", ex.args) s.Require().Equal(ex.expected, links, "Failed for args: '%s'", ex.args)
} }
} }
@ -606,86 +607,82 @@ func (s *MessengerLinkPreviewsTestSuite) Test_UnfurlURLs_StatusCommunityJoined()
} }
func (s *MessengerLinkPreviewsTestSuite) Test_UnfurlURLs_Settings() { func (s *MessengerLinkPreviewsTestSuite) Test_UnfurlURLs_Settings() {
// Create website stub // Create website stub
ogLink := "https://github.com" const ogLink = "https://github.com"
requestsCount := 0 const statusUserLink = "https://status.app/c#zQ3shYSHp7GoiXaauJMnDcjwU2yNjdzpXLosAWapPS4CFxc11"
const gifLink = "https://media1.giphy.com/media/lcG3qwtTKSNI2i5vst/giphy.gif"
transport := StubTransport{} linksToUnfurl := []string{ogLink, statusUserLink, gifLink}
transport.AddURLMatcherRoundTrip( text := strings.Join(linksToUnfurl, " ")
ogLink,
func(req *http.Request) *http.Response {
requestsCount++
responseBody := []byte(`<html><head><meta property="og:title" content="TestTitle"></head></html>`)
return &http.Response{
StatusCode: http.StatusOK,
Body: ioutil.NopCloser(bytes.NewBuffer(responseBody)),
}
},
)
stubbedClient := http.Client{Transport: &transport}
// Add contact
identity, err := crypto.GenerateKey()
s.Require().NoError(err)
c, err := BuildContactFromPublicKey(&identity.PublicKey)
s.Require().NoError(err)
s.Require().NotNil(c)
c.Bio = "TestBio_1"
c.DisplayName = "TestDisplayName_2"
s.m.allContacts.Store(c.ID, c)
statusUserLink, err := s.m.ShareUserURLWithData(c.ID)
s.Require().NoError(err)
linksToUnfurl := []string{ogLink, statusUserLink}
// Test `AlwaysAsk` // Test `AlwaysAsk`
// NOTE: on status-go side `AlwaysAsk` == `EnableAll`, "asking" should be processed by the app
requestsCount = 0 err := s.m.settings.SaveSettingField(settings.URLUnfurlingMode, settings.URLUnfurlingAlwaysAsk)
err = s.m.settings.SaveSettingField(settings.URLUnfurlingMode, settings.URLUnfurlingAlwaysAsk)
s.Require().NoError(err) s.Require().NoError(err)
linkPreviews, err := s.m.UnfurlURLs(&stubbedClient, linksToUnfurl) plan := s.m.GetTextURLsToUnfurl(text)
s.Require().NoError(err) s.Require().Len(plan.URLs, len(linksToUnfurl))
s.Require().Len(linkPreviews.LinkPreviews, 1)
s.Require().Len(linkPreviews.StatusLinkPreviews, 1) s.Require().Equal(plan.URLs[0].URL, ogLink)
s.Require().Equal(requestsCount, 1) s.Require().Equal(plan.URLs[0].IsStatusSharedURL, false)
s.Require().Equal(plan.URLs[0].Permission, URLUnfurlingAskUser)
s.Require().Equal(plan.URLs[1].URL, statusUserLink)
s.Require().Equal(plan.URLs[1].IsStatusSharedURL, true)
s.Require().Equal(plan.URLs[1].Permission, URLUnfurlingAllowed)
s.Require().Equal(plan.URLs[2].URL, gifLink)
s.Require().Equal(plan.URLs[2].IsStatusSharedURL, false)
s.Require().Equal(plan.URLs[2].Permission, URLUnfurlingNotSupported)
// Test `EnableAll` // Test `EnableAll`
requestsCount = 0
err = s.m.settings.SaveSettingField(settings.URLUnfurlingMode, settings.URLUnfurlingEnableAll) err = s.m.settings.SaveSettingField(settings.URLUnfurlingMode, settings.URLUnfurlingEnableAll)
s.Require().NoError(err) s.Require().NoError(err)
linkPreviews, err = s.m.UnfurlURLs(&stubbedClient, linksToUnfurl) plan = s.m.GetTextURLsToUnfurl(text)
s.Require().NoError(err) s.Require().Len(plan.URLs, len(linksToUnfurl))
s.Require().Len(linkPreviews.LinkPreviews, 1)
s.Require().Len(linkPreviews.StatusLinkPreviews, 1) s.Require().Equal(plan.URLs[0].URL, ogLink)
s.Require().Equal(requestsCount, 1) s.Require().Equal(plan.URLs[0].IsStatusSharedURL, false)
s.Require().Equal(plan.URLs[0].Permission, URLUnfurlingAllowed)
s.Require().Equal(plan.URLs[1].URL, statusUserLink)
s.Require().Equal(plan.URLs[1].IsStatusSharedURL, true)
s.Require().Equal(plan.URLs[1].Permission, URLUnfurlingAllowed)
s.Require().Equal(plan.URLs[2].URL, gifLink)
s.Require().Equal(plan.URLs[2].IsStatusSharedURL, false)
s.Require().Equal(plan.URLs[2].Permission, URLUnfurlingNotSupported)
// Test `DisableAll` // Test `DisableAll`
requestsCount = 0
err = s.m.settings.SaveSettingField(settings.URLUnfurlingMode, settings.URLUnfurlingDisableAll) err = s.m.settings.SaveSettingField(settings.URLUnfurlingMode, settings.URLUnfurlingDisableAll)
s.Require().NoError(err) s.Require().NoError(err)
linkPreviews, err = s.m.UnfurlURLs(&stubbedClient, linksToUnfurl) plan = s.m.GetTextURLsToUnfurl(text)
s.Require().NoError(err) s.Require().Len(plan.URLs, len(linksToUnfurl))
s.Require().Len(linkPreviews.LinkPreviews, 0)
s.Require().Len(linkPreviews.StatusLinkPreviews, 1) // Status links are always unfurled s.Require().Equal(plan.URLs[0].URL, ogLink)
s.Require().Equal(requestsCount, 0) s.Require().Equal(plan.URLs[0].IsStatusSharedURL, false)
s.Require().Equal(plan.URLs[0].Permission, URLUnfurlingForbiddenBySettings)
s.Require().Equal(plan.URLs[1].URL, statusUserLink)
s.Require().Equal(plan.URLs[1].IsStatusSharedURL, true)
s.Require().Equal(plan.URLs[1].Permission, URLUnfurlingAllowed)
s.Require().Equal(plan.URLs[2].URL, gifLink)
s.Require().Equal(plan.URLs[2].IsStatusSharedURL, false)
s.Require().Equal(plan.URLs[2].Permission, URLUnfurlingNotSupported)
} }
func (s *MessengerLinkPreviewsTestSuite) Test_UnfurlURLs_Limit() { func (s *MessengerLinkPreviewsTestSuite) Test_UnfurlURLs_Limit() {
linksToUnfurl := "https://www.youtube.com/watch?v=6dkDepLX0rk " + text := "https://www.youtube.com/watch?v=6dkDepLX0rk " +
"https://www.youtube.com/watch?v=ferZnZ0_rSM " + "https://www.youtube.com/watch?v=ferZnZ0_rSM " +
"https://www.youtube.com/watch?v=bdneye4pzMw " + "https://www.youtube.com/watch?v=bdneye4pzMw " +
"https://www.youtube.com/watch?v=pRERgcQe-fQ " + "https://www.youtube.com/watch?v=pRERgcQe-fQ " +
"https://www.youtube.com/watch?v=j82L3pLjb_0 " + "https://www.youtube.com/watch?v=j82L3pLjb_0 " +
"https://www.youtube.com/watch?v=hxsJvKYyVyg " + "https://www.youtube.com/watch?v=hxsJvKYyVyg " +
"https://www.youtube.com/watch?v=jIIuzB11dsA" "https://www.youtube.com/watch?v=jIIuzB11dsA "
urls := GetURLs(linksToUnfurl) urls := s.m.GetURLs(text)
s.Require().Equal(UnfurledLinksPerMessageLimit, len(urls)) s.Require().Equal(UnfurledLinksPerMessageLimit, len(urls))
} }

View File

@ -1187,10 +1187,19 @@ func (api *PublicAPI) GetLinkPreviewData(link string) (previewData urls.LinkPrev
return urls.GetLinkPreviewData(link) return urls.GetLinkPreviewData(link)
} }
// GetTextURLsToUnfurl parses text and returns a deduplicated and (somewhat) normalized
// slice of URLs. The returned URLs can be used as cache keys by clients.
// For each URL there's a corresponding metadata which should be used as to plan the unfurling.
func (api *PublicAPI) GetTextURLsToUnfurl(text string) *protocol.URLsUnfurlPlan {
return api.service.messenger.GetTextURLsToUnfurl(text)
}
// Deprecated: GetTextURLs is deprecated in favor of more generic GetTextURLsToUnfurl.
//
// GetTextURLs parses text and returns a deduplicated and (somewhat) normalized // GetTextURLs parses text and returns a deduplicated and (somewhat) normalized
// slice of URLs. The returned URLs can be used as cache keys by clients. // slice of URLs. The returned URLs can be used as cache keys by clients.
func (api *PublicAPI) GetTextURLs(text string) []string { func (api *PublicAPI) GetTextURLs(text string) []string {
return protocol.GetURLs(text) return api.service.messenger.GetURLs(text)
} }
// UnfurlURLs uses a best-effort approach to unfurl each URL. Failed URLs will // UnfurlURLs uses a best-effort approach to unfurl each URL. Failed URLs will