feat: new `GetTextURLsToUnfurl` endpoint which extends GetURLs (#4294)
This commit is contained in:
parent
4ae9c02e57
commit
2fef9a8f8f
|
@ -3,6 +3,7 @@ package protocol
|
|||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"net/http"
|
||||
neturl "net/url"
|
||||
"regexp"
|
||||
|
@ -19,6 +20,29 @@ import (
|
|||
|
||||
const UnfurledLinksPerMessageLimit = 5
|
||||
|
||||
type URLUnfurlPermission int
|
||||
|
||||
const (
|
||||
URLUnfurlingAllowed URLUnfurlPermission = iota
|
||||
URLUnfurlingAskUser
|
||||
URLUnfurlingForbiddenBySettings
|
||||
URLUnfurlingNotSupported
|
||||
)
|
||||
|
||||
type URLUnfurlingMetadata struct {
|
||||
URL string `json:"url"`
|
||||
Permission URLUnfurlPermission `json:"permission"`
|
||||
IsStatusSharedURL bool `json:"isStatusSharedURL"`
|
||||
}
|
||||
|
||||
type URLsUnfurlPlan struct {
|
||||
URLs []URLUnfurlingMetadata `json:"urls"`
|
||||
}
|
||||
|
||||
func URLUnfurlingSupported(url string) bool {
|
||||
return !strings.HasSuffix(url, ".gif")
|
||||
}
|
||||
|
||||
type UnfurlURLsResponse struct {
|
||||
LinkPreviews []*common.LinkPreview `json:"linkPreviews,omitempty"`
|
||||
StatusLinkPreviews []*common.StatusLinkPreview `json:"statusLinkPreviews,omitempty"`
|
||||
|
@ -92,18 +116,23 @@ func parseValidURL(rawURL string) (*neturl.URL, error) {
|
|||
return u, nil
|
||||
}
|
||||
|
||||
// GetURLs returns only what we consider unfurleable URLs.
|
||||
//
|
||||
// If we wanted to be extra precise and help improve UX, we could ignore URLs
|
||||
// that we know can't be unfurled. This is at least possible with the oEmbed
|
||||
// protocol because providers must specify an endpoint scheme.
|
||||
func GetURLs(text string) []string {
|
||||
func (m *Messenger) GetTextURLsToUnfurl(text string) *URLsUnfurlPlan {
|
||||
s, err := m.getSettings()
|
||||
if err != nil {
|
||||
// log the error and keep parsing the text
|
||||
m.logger.Error("GetTextURLsToUnfurl: failed to get settings", zap.Error(err))
|
||||
s.URLUnfurlingMode = settings.URLUnfurlingDisableAll
|
||||
}
|
||||
|
||||
indexedUrls := map[string]struct{}{}
|
||||
result := &URLsUnfurlPlan{
|
||||
// The usage of `UnfurledLinksPerMessageLimit` is quite random here. I wanted to allocate
|
||||
// some not-zero place here, using the limit number is at least some binding.
|
||||
URLs: make([]URLUnfurlingMetadata, 0, UnfurledLinksPerMessageLimit),
|
||||
}
|
||||
parsedText := markdown.Parse([]byte(text), nil)
|
||||
visitor := common.RunLinksVisitor(parsedText)
|
||||
|
||||
urls := make([]string, 0, len(visitor.Links))
|
||||
indexed := make(map[string]any, len(visitor.Links))
|
||||
|
||||
for _, rawURL := range visitor.Links {
|
||||
parsedURL, err := parseValidURL(rawURL)
|
||||
if err != nil {
|
||||
|
@ -116,23 +145,54 @@ func GetURLs(text string) []string {
|
|||
// case-sensitive, some websites encode base64 in the path, etc.
|
||||
parsedURL.Host = strings.ToLower(parsedURL.Host)
|
||||
|
||||
idx := parsedURL.String()
|
||||
// Removes the spurious trailing forward slash.
|
||||
idx = strings.TrimRight(idx, "/")
|
||||
if _, exists := indexed[idx]; exists {
|
||||
url := parsedURL.String()
|
||||
url = strings.TrimRight(url, "/") // Removes the spurious trailing forward slash.
|
||||
if _, exists := indexedUrls[url]; exists {
|
||||
continue
|
||||
} else {
|
||||
indexed[idx] = nil
|
||||
urls = append(urls, idx)
|
||||
}
|
||||
|
||||
// This is a temporary limitation solution,
|
||||
// should be changed with https://github.com/status-im/status-go/issues/4235
|
||||
if len(urls) == UnfurledLinksPerMessageLimit {
|
||||
metadata := URLUnfurlingMetadata{
|
||||
URL: url,
|
||||
IsStatusSharedURL: IsStatusSharedURL(url),
|
||||
}
|
||||
|
||||
if !URLUnfurlingSupported(rawURL) {
|
||||
metadata.Permission = URLUnfurlingNotSupported
|
||||
} else if metadata.IsStatusSharedURL {
|
||||
metadata.Permission = URLUnfurlingAllowed
|
||||
} else {
|
||||
switch s.URLUnfurlingMode {
|
||||
case settings.URLUnfurlingAlwaysAsk:
|
||||
metadata.Permission = URLUnfurlingAskUser
|
||||
case settings.URLUnfurlingEnableAll:
|
||||
metadata.Permission = URLUnfurlingAllowed
|
||||
case settings.URLUnfurlingDisableAll:
|
||||
metadata.Permission = URLUnfurlingForbiddenBySettings
|
||||
default:
|
||||
metadata.Permission = URLUnfurlingForbiddenBySettings
|
||||
}
|
||||
}
|
||||
|
||||
result.URLs = append(result.URLs, metadata)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// Deprecated: GetURLs is deprecated in favor of more generic GetTextURLsToUnfurl.
|
||||
//
|
||||
// This is a wrapper around GetTextURLsToUnfurl that returns the list of URLs found in the text
|
||||
// without any additional information.
|
||||
func (m *Messenger) GetURLs(text string) []string {
|
||||
plan := m.GetTextURLsToUnfurl(text)
|
||||
limit := int(math.Min(UnfurledLinksPerMessageLimit, float64(len(plan.URLs))))
|
||||
urls := make([]string, 0, limit)
|
||||
for _, metadata := range plan.URLs {
|
||||
urls = append(urls, metadata.URL)
|
||||
if len(urls) == limit {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return urls
|
||||
}
|
||||
|
||||
|
@ -145,11 +205,6 @@ func NewDefaultHTTPClient() *http.Client {
|
|||
func (m *Messenger) UnfurlURLs(httpClient *http.Client, urls []string) (UnfurlURLsResponse, error) {
|
||||
response := UnfurlURLsResponse{}
|
||||
|
||||
s, err := m.getSettings()
|
||||
if err != nil {
|
||||
return response, fmt.Errorf("failed to get settigs: %w", err)
|
||||
}
|
||||
|
||||
// Unfurl in a loop
|
||||
|
||||
response.LinkPreviews = make([]*common.LinkPreview, 0, len(urls))
|
||||
|
@ -173,12 +228,6 @@ func (m *Messenger) UnfurlURLs(httpClient *http.Client, urls []string) (UnfurlUR
|
|||
continue
|
||||
}
|
||||
|
||||
// `AlwaysAsk` mode should be handled on the app side
|
||||
// and is considered as equal to `EnableAll` in status-go.
|
||||
if s.URLUnfurlingMode == settings.URLUnfurlingDisableAll {
|
||||
continue
|
||||
}
|
||||
|
||||
p, err := m.unfurlURL(httpClient, url)
|
||||
if err != nil {
|
||||
m.logger.Warn("failed to unfurl", zap.String("url", url), zap.Error(err))
|
||||
|
|
|
@ -8,6 +8,7 @@ import (
|
|||
"net/http"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
|
@ -186,7 +187,7 @@ func (s *MessengerLinkPreviewsTestSuite) Test_GetLinks() {
|
|||
}
|
||||
|
||||
for _, ex := range examples {
|
||||
links := GetURLs(ex.args)
|
||||
links := s.m.GetURLs(ex.args)
|
||||
s.Require().Equal(ex.expected, links, "Failed for args: '%s'", ex.args)
|
||||
}
|
||||
}
|
||||
|
@ -606,86 +607,82 @@ func (s *MessengerLinkPreviewsTestSuite) Test_UnfurlURLs_StatusCommunityJoined()
|
|||
}
|
||||
|
||||
func (s *MessengerLinkPreviewsTestSuite) Test_UnfurlURLs_Settings() {
|
||||
|
||||
// Create website stub
|
||||
ogLink := "https://github.com"
|
||||
requestsCount := 0
|
||||
const ogLink = "https://github.com"
|
||||
const statusUserLink = "https://status.app/c#zQ3shYSHp7GoiXaauJMnDcjwU2yNjdzpXLosAWapPS4CFxc11"
|
||||
const gifLink = "https://media1.giphy.com/media/lcG3qwtTKSNI2i5vst/giphy.gif"
|
||||
|
||||
transport := StubTransport{}
|
||||
transport.AddURLMatcherRoundTrip(
|
||||
ogLink,
|
||||
func(req *http.Request) *http.Response {
|
||||
requestsCount++
|
||||
responseBody := []byte(`<html><head><meta property="og:title" content="TestTitle"></head></html>`)
|
||||
return &http.Response{
|
||||
StatusCode: http.StatusOK,
|
||||
Body: ioutil.NopCloser(bytes.NewBuffer(responseBody)),
|
||||
}
|
||||
},
|
||||
)
|
||||
stubbedClient := http.Client{Transport: &transport}
|
||||
|
||||
// Add contact
|
||||
identity, err := crypto.GenerateKey()
|
||||
s.Require().NoError(err)
|
||||
|
||||
c, err := BuildContactFromPublicKey(&identity.PublicKey)
|
||||
s.Require().NoError(err)
|
||||
s.Require().NotNil(c)
|
||||
|
||||
c.Bio = "TestBio_1"
|
||||
c.DisplayName = "TestDisplayName_2"
|
||||
s.m.allContacts.Store(c.ID, c)
|
||||
statusUserLink, err := s.m.ShareUserURLWithData(c.ID)
|
||||
s.Require().NoError(err)
|
||||
|
||||
linksToUnfurl := []string{ogLink, statusUserLink}
|
||||
linksToUnfurl := []string{ogLink, statusUserLink, gifLink}
|
||||
text := strings.Join(linksToUnfurl, " ")
|
||||
|
||||
// Test `AlwaysAsk`
|
||||
// NOTE: on status-go side `AlwaysAsk` == `EnableAll`, "asking" should be processed by the app
|
||||
|
||||
requestsCount = 0
|
||||
err = s.m.settings.SaveSettingField(settings.URLUnfurlingMode, settings.URLUnfurlingAlwaysAsk)
|
||||
err := s.m.settings.SaveSettingField(settings.URLUnfurlingMode, settings.URLUnfurlingAlwaysAsk)
|
||||
s.Require().NoError(err)
|
||||
|
||||
linkPreviews, err := s.m.UnfurlURLs(&stubbedClient, linksToUnfurl)
|
||||
s.Require().NoError(err)
|
||||
s.Require().Len(linkPreviews.LinkPreviews, 1)
|
||||
s.Require().Len(linkPreviews.StatusLinkPreviews, 1)
|
||||
s.Require().Equal(requestsCount, 1)
|
||||
plan := s.m.GetTextURLsToUnfurl(text)
|
||||
s.Require().Len(plan.URLs, len(linksToUnfurl))
|
||||
|
||||
s.Require().Equal(plan.URLs[0].URL, ogLink)
|
||||
s.Require().Equal(plan.URLs[0].IsStatusSharedURL, false)
|
||||
s.Require().Equal(plan.URLs[0].Permission, URLUnfurlingAskUser)
|
||||
|
||||
s.Require().Equal(plan.URLs[1].URL, statusUserLink)
|
||||
s.Require().Equal(plan.URLs[1].IsStatusSharedURL, true)
|
||||
s.Require().Equal(plan.URLs[1].Permission, URLUnfurlingAllowed)
|
||||
|
||||
s.Require().Equal(plan.URLs[2].URL, gifLink)
|
||||
s.Require().Equal(plan.URLs[2].IsStatusSharedURL, false)
|
||||
s.Require().Equal(plan.URLs[2].Permission, URLUnfurlingNotSupported)
|
||||
|
||||
// Test `EnableAll`
|
||||
requestsCount = 0
|
||||
err = s.m.settings.SaveSettingField(settings.URLUnfurlingMode, settings.URLUnfurlingEnableAll)
|
||||
s.Require().NoError(err)
|
||||
|
||||
linkPreviews, err = s.m.UnfurlURLs(&stubbedClient, linksToUnfurl)
|
||||
s.Require().NoError(err)
|
||||
s.Require().Len(linkPreviews.LinkPreviews, 1)
|
||||
s.Require().Len(linkPreviews.StatusLinkPreviews, 1)
|
||||
s.Require().Equal(requestsCount, 1)
|
||||
plan = s.m.GetTextURLsToUnfurl(text)
|
||||
s.Require().Len(plan.URLs, len(linksToUnfurl))
|
||||
|
||||
s.Require().Equal(plan.URLs[0].URL, ogLink)
|
||||
s.Require().Equal(plan.URLs[0].IsStatusSharedURL, false)
|
||||
s.Require().Equal(plan.URLs[0].Permission, URLUnfurlingAllowed)
|
||||
|
||||
s.Require().Equal(plan.URLs[1].URL, statusUserLink)
|
||||
s.Require().Equal(plan.URLs[1].IsStatusSharedURL, true)
|
||||
s.Require().Equal(plan.URLs[1].Permission, URLUnfurlingAllowed)
|
||||
|
||||
s.Require().Equal(plan.URLs[2].URL, gifLink)
|
||||
s.Require().Equal(plan.URLs[2].IsStatusSharedURL, false)
|
||||
s.Require().Equal(plan.URLs[2].Permission, URLUnfurlingNotSupported)
|
||||
|
||||
// Test `DisableAll`
|
||||
requestsCount = 0
|
||||
err = s.m.settings.SaveSettingField(settings.URLUnfurlingMode, settings.URLUnfurlingDisableAll)
|
||||
s.Require().NoError(err)
|
||||
|
||||
linkPreviews, err = s.m.UnfurlURLs(&stubbedClient, linksToUnfurl)
|
||||
s.Require().NoError(err)
|
||||
s.Require().Len(linkPreviews.LinkPreviews, 0)
|
||||
s.Require().Len(linkPreviews.StatusLinkPreviews, 1) // Status links are always unfurled
|
||||
s.Require().Equal(requestsCount, 0)
|
||||
plan = s.m.GetTextURLsToUnfurl(text)
|
||||
s.Require().Len(plan.URLs, len(linksToUnfurl))
|
||||
|
||||
s.Require().Equal(plan.URLs[0].URL, ogLink)
|
||||
s.Require().Equal(plan.URLs[0].IsStatusSharedURL, false)
|
||||
s.Require().Equal(plan.URLs[0].Permission, URLUnfurlingForbiddenBySettings)
|
||||
|
||||
s.Require().Equal(plan.URLs[1].URL, statusUserLink)
|
||||
s.Require().Equal(plan.URLs[1].IsStatusSharedURL, true)
|
||||
s.Require().Equal(plan.URLs[1].Permission, URLUnfurlingAllowed)
|
||||
|
||||
s.Require().Equal(plan.URLs[2].URL, gifLink)
|
||||
s.Require().Equal(plan.URLs[2].IsStatusSharedURL, false)
|
||||
s.Require().Equal(plan.URLs[2].Permission, URLUnfurlingNotSupported)
|
||||
}
|
||||
|
||||
func (s *MessengerLinkPreviewsTestSuite) Test_UnfurlURLs_Limit() {
|
||||
linksToUnfurl := "https://www.youtube.com/watch?v=6dkDepLX0rk " +
|
||||
text := "https://www.youtube.com/watch?v=6dkDepLX0rk " +
|
||||
"https://www.youtube.com/watch?v=ferZnZ0_rSM " +
|
||||
"https://www.youtube.com/watch?v=bdneye4pzMw " +
|
||||
"https://www.youtube.com/watch?v=pRERgcQe-fQ " +
|
||||
"https://www.youtube.com/watch?v=j82L3pLjb_0 " +
|
||||
"https://www.youtube.com/watch?v=hxsJvKYyVyg " +
|
||||
"https://www.youtube.com/watch?v=jIIuzB11dsA"
|
||||
"https://www.youtube.com/watch?v=jIIuzB11dsA "
|
||||
|
||||
urls := GetURLs(linksToUnfurl)
|
||||
urls := s.m.GetURLs(text)
|
||||
s.Require().Equal(UnfurledLinksPerMessageLimit, len(urls))
|
||||
}
|
||||
|
|
|
@ -1187,10 +1187,19 @@ func (api *PublicAPI) GetLinkPreviewData(link string) (previewData urls.LinkPrev
|
|||
return urls.GetLinkPreviewData(link)
|
||||
}
|
||||
|
||||
// GetTextURLsToUnfurl parses text and returns a deduplicated and (somewhat) normalized
|
||||
// slice of URLs. The returned URLs can be used as cache keys by clients.
|
||||
// For each URL there's a corresponding metadata which should be used as to plan the unfurling.
|
||||
func (api *PublicAPI) GetTextURLsToUnfurl(text string) *protocol.URLsUnfurlPlan {
|
||||
return api.service.messenger.GetTextURLsToUnfurl(text)
|
||||
}
|
||||
|
||||
// Deprecated: GetTextURLs is deprecated in favor of more generic GetTextURLsToUnfurl.
|
||||
//
|
||||
// GetTextURLs parses text and returns a deduplicated and (somewhat) normalized
|
||||
// slice of URLs. The returned URLs can be used as cache keys by clients.
|
||||
func (api *PublicAPI) GetTextURLs(text string) []string {
|
||||
return protocol.GetURLs(text)
|
||||
return api.service.messenger.GetURLs(text)
|
||||
}
|
||||
|
||||
// UnfurlURLs uses a best-effort approach to unfurl each URL. Failed URLs will
|
||||
|
|
Loading…
Reference in New Issue