feat: new `GetTextURLsToUnfurl` endpoint which extends GetURLs (#4294)
This commit is contained in:
parent
4ae9c02e57
commit
2fef9a8f8f
|
@ -3,6 +3,7 @@ package protocol
|
||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"math"
|
||||||
"net/http"
|
"net/http"
|
||||||
neturl "net/url"
|
neturl "net/url"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
@ -19,6 +20,29 @@ import (
|
||||||
|
|
||||||
const UnfurledLinksPerMessageLimit = 5
|
const UnfurledLinksPerMessageLimit = 5
|
||||||
|
|
||||||
|
type URLUnfurlPermission int
|
||||||
|
|
||||||
|
const (
|
||||||
|
URLUnfurlingAllowed URLUnfurlPermission = iota
|
||||||
|
URLUnfurlingAskUser
|
||||||
|
URLUnfurlingForbiddenBySettings
|
||||||
|
URLUnfurlingNotSupported
|
||||||
|
)
|
||||||
|
|
||||||
|
type URLUnfurlingMetadata struct {
|
||||||
|
URL string `json:"url"`
|
||||||
|
Permission URLUnfurlPermission `json:"permission"`
|
||||||
|
IsStatusSharedURL bool `json:"isStatusSharedURL"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type URLsUnfurlPlan struct {
|
||||||
|
URLs []URLUnfurlingMetadata `json:"urls"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func URLUnfurlingSupported(url string) bool {
|
||||||
|
return !strings.HasSuffix(url, ".gif")
|
||||||
|
}
|
||||||
|
|
||||||
type UnfurlURLsResponse struct {
|
type UnfurlURLsResponse struct {
|
||||||
LinkPreviews []*common.LinkPreview `json:"linkPreviews,omitempty"`
|
LinkPreviews []*common.LinkPreview `json:"linkPreviews,omitempty"`
|
||||||
StatusLinkPreviews []*common.StatusLinkPreview `json:"statusLinkPreviews,omitempty"`
|
StatusLinkPreviews []*common.StatusLinkPreview `json:"statusLinkPreviews,omitempty"`
|
||||||
|
@ -92,18 +116,23 @@ func parseValidURL(rawURL string) (*neturl.URL, error) {
|
||||||
return u, nil
|
return u, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetURLs returns only what we consider unfurleable URLs.
|
func (m *Messenger) GetTextURLsToUnfurl(text string) *URLsUnfurlPlan {
|
||||||
//
|
s, err := m.getSettings()
|
||||||
// If we wanted to be extra precise and help improve UX, we could ignore URLs
|
if err != nil {
|
||||||
// that we know can't be unfurled. This is at least possible with the oEmbed
|
// log the error and keep parsing the text
|
||||||
// protocol because providers must specify an endpoint scheme.
|
m.logger.Error("GetTextURLsToUnfurl: failed to get settings", zap.Error(err))
|
||||||
func GetURLs(text string) []string {
|
s.URLUnfurlingMode = settings.URLUnfurlingDisableAll
|
||||||
|
}
|
||||||
|
|
||||||
|
indexedUrls := map[string]struct{}{}
|
||||||
|
result := &URLsUnfurlPlan{
|
||||||
|
// The usage of `UnfurledLinksPerMessageLimit` is quite random here. I wanted to allocate
|
||||||
|
// some not-zero place here, using the limit number is at least some binding.
|
||||||
|
URLs: make([]URLUnfurlingMetadata, 0, UnfurledLinksPerMessageLimit),
|
||||||
|
}
|
||||||
parsedText := markdown.Parse([]byte(text), nil)
|
parsedText := markdown.Parse([]byte(text), nil)
|
||||||
visitor := common.RunLinksVisitor(parsedText)
|
visitor := common.RunLinksVisitor(parsedText)
|
||||||
|
|
||||||
urls := make([]string, 0, len(visitor.Links))
|
|
||||||
indexed := make(map[string]any, len(visitor.Links))
|
|
||||||
|
|
||||||
for _, rawURL := range visitor.Links {
|
for _, rawURL := range visitor.Links {
|
||||||
parsedURL, err := parseValidURL(rawURL)
|
parsedURL, err := parseValidURL(rawURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -116,23 +145,54 @@ func GetURLs(text string) []string {
|
||||||
// case-sensitive, some websites encode base64 in the path, etc.
|
// case-sensitive, some websites encode base64 in the path, etc.
|
||||||
parsedURL.Host = strings.ToLower(parsedURL.Host)
|
parsedURL.Host = strings.ToLower(parsedURL.Host)
|
||||||
|
|
||||||
idx := parsedURL.String()
|
url := parsedURL.String()
|
||||||
// Removes the spurious trailing forward slash.
|
url = strings.TrimRight(url, "/") // Removes the spurious trailing forward slash.
|
||||||
idx = strings.TrimRight(idx, "/")
|
if _, exists := indexedUrls[url]; exists {
|
||||||
if _, exists := indexed[idx]; exists {
|
|
||||||
continue
|
continue
|
||||||
} else {
|
|
||||||
indexed[idx] = nil
|
|
||||||
urls = append(urls, idx)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// This is a temporary limitation solution,
|
metadata := URLUnfurlingMetadata{
|
||||||
// should be changed with https://github.com/status-im/status-go/issues/4235
|
URL: url,
|
||||||
if len(urls) == UnfurledLinksPerMessageLimit {
|
IsStatusSharedURL: IsStatusSharedURL(url),
|
||||||
|
}
|
||||||
|
|
||||||
|
if !URLUnfurlingSupported(rawURL) {
|
||||||
|
metadata.Permission = URLUnfurlingNotSupported
|
||||||
|
} else if metadata.IsStatusSharedURL {
|
||||||
|
metadata.Permission = URLUnfurlingAllowed
|
||||||
|
} else {
|
||||||
|
switch s.URLUnfurlingMode {
|
||||||
|
case settings.URLUnfurlingAlwaysAsk:
|
||||||
|
metadata.Permission = URLUnfurlingAskUser
|
||||||
|
case settings.URLUnfurlingEnableAll:
|
||||||
|
metadata.Permission = URLUnfurlingAllowed
|
||||||
|
case settings.URLUnfurlingDisableAll:
|
||||||
|
metadata.Permission = URLUnfurlingForbiddenBySettings
|
||||||
|
default:
|
||||||
|
metadata.Permission = URLUnfurlingForbiddenBySettings
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result.URLs = append(result.URLs, metadata)
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// Deprecated: GetURLs is deprecated in favor of more generic GetTextURLsToUnfurl.
|
||||||
|
//
|
||||||
|
// This is a wrapper around GetTextURLsToUnfurl that returns the list of URLs found in the text
|
||||||
|
// without any additional information.
|
||||||
|
func (m *Messenger) GetURLs(text string) []string {
|
||||||
|
plan := m.GetTextURLsToUnfurl(text)
|
||||||
|
limit := int(math.Min(UnfurledLinksPerMessageLimit, float64(len(plan.URLs))))
|
||||||
|
urls := make([]string, 0, limit)
|
||||||
|
for _, metadata := range plan.URLs {
|
||||||
|
urls = append(urls, metadata.URL)
|
||||||
|
if len(urls) == limit {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return urls
|
return urls
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -145,11 +205,6 @@ func NewDefaultHTTPClient() *http.Client {
|
||||||
func (m *Messenger) UnfurlURLs(httpClient *http.Client, urls []string) (UnfurlURLsResponse, error) {
|
func (m *Messenger) UnfurlURLs(httpClient *http.Client, urls []string) (UnfurlURLsResponse, error) {
|
||||||
response := UnfurlURLsResponse{}
|
response := UnfurlURLsResponse{}
|
||||||
|
|
||||||
s, err := m.getSettings()
|
|
||||||
if err != nil {
|
|
||||||
return response, fmt.Errorf("failed to get settigs: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Unfurl in a loop
|
// Unfurl in a loop
|
||||||
|
|
||||||
response.LinkPreviews = make([]*common.LinkPreview, 0, len(urls))
|
response.LinkPreviews = make([]*common.LinkPreview, 0, len(urls))
|
||||||
|
@ -173,12 +228,6 @@ func (m *Messenger) UnfurlURLs(httpClient *http.Client, urls []string) (UnfurlUR
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// `AlwaysAsk` mode should be handled on the app side
|
|
||||||
// and is considered as equal to `EnableAll` in status-go.
|
|
||||||
if s.URLUnfurlingMode == settings.URLUnfurlingDisableAll {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
p, err := m.unfurlURL(httpClient, url)
|
p, err := m.unfurlURL(httpClient, url)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
m.logger.Warn("failed to unfurl", zap.String("url", url), zap.Error(err))
|
m.logger.Warn("failed to unfurl", zap.String("url", url), zap.Error(err))
|
||||||
|
|
|
@ -8,6 +8,7 @@ import (
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
@ -186,7 +187,7 @@ func (s *MessengerLinkPreviewsTestSuite) Test_GetLinks() {
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, ex := range examples {
|
for _, ex := range examples {
|
||||||
links := GetURLs(ex.args)
|
links := s.m.GetURLs(ex.args)
|
||||||
s.Require().Equal(ex.expected, links, "Failed for args: '%s'", ex.args)
|
s.Require().Equal(ex.expected, links, "Failed for args: '%s'", ex.args)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -606,86 +607,82 @@ func (s *MessengerLinkPreviewsTestSuite) Test_UnfurlURLs_StatusCommunityJoined()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *MessengerLinkPreviewsTestSuite) Test_UnfurlURLs_Settings() {
|
func (s *MessengerLinkPreviewsTestSuite) Test_UnfurlURLs_Settings() {
|
||||||
|
|
||||||
// Create website stub
|
// Create website stub
|
||||||
ogLink := "https://github.com"
|
const ogLink = "https://github.com"
|
||||||
requestsCount := 0
|
const statusUserLink = "https://status.app/c#zQ3shYSHp7GoiXaauJMnDcjwU2yNjdzpXLosAWapPS4CFxc11"
|
||||||
|
const gifLink = "https://media1.giphy.com/media/lcG3qwtTKSNI2i5vst/giphy.gif"
|
||||||
|
|
||||||
transport := StubTransport{}
|
linksToUnfurl := []string{ogLink, statusUserLink, gifLink}
|
||||||
transport.AddURLMatcherRoundTrip(
|
text := strings.Join(linksToUnfurl, " ")
|
||||||
ogLink,
|
|
||||||
func(req *http.Request) *http.Response {
|
|
||||||
requestsCount++
|
|
||||||
responseBody := []byte(`<html><head><meta property="og:title" content="TestTitle"></head></html>`)
|
|
||||||
return &http.Response{
|
|
||||||
StatusCode: http.StatusOK,
|
|
||||||
Body: ioutil.NopCloser(bytes.NewBuffer(responseBody)),
|
|
||||||
}
|
|
||||||
},
|
|
||||||
)
|
|
||||||
stubbedClient := http.Client{Transport: &transport}
|
|
||||||
|
|
||||||
// Add contact
|
|
||||||
identity, err := crypto.GenerateKey()
|
|
||||||
s.Require().NoError(err)
|
|
||||||
|
|
||||||
c, err := BuildContactFromPublicKey(&identity.PublicKey)
|
|
||||||
s.Require().NoError(err)
|
|
||||||
s.Require().NotNil(c)
|
|
||||||
|
|
||||||
c.Bio = "TestBio_1"
|
|
||||||
c.DisplayName = "TestDisplayName_2"
|
|
||||||
s.m.allContacts.Store(c.ID, c)
|
|
||||||
statusUserLink, err := s.m.ShareUserURLWithData(c.ID)
|
|
||||||
s.Require().NoError(err)
|
|
||||||
|
|
||||||
linksToUnfurl := []string{ogLink, statusUserLink}
|
|
||||||
|
|
||||||
// Test `AlwaysAsk`
|
// Test `AlwaysAsk`
|
||||||
// NOTE: on status-go side `AlwaysAsk` == `EnableAll`, "asking" should be processed by the app
|
|
||||||
|
|
||||||
requestsCount = 0
|
err := s.m.settings.SaveSettingField(settings.URLUnfurlingMode, settings.URLUnfurlingAlwaysAsk)
|
||||||
err = s.m.settings.SaveSettingField(settings.URLUnfurlingMode, settings.URLUnfurlingAlwaysAsk)
|
|
||||||
s.Require().NoError(err)
|
s.Require().NoError(err)
|
||||||
|
|
||||||
linkPreviews, err := s.m.UnfurlURLs(&stubbedClient, linksToUnfurl)
|
plan := s.m.GetTextURLsToUnfurl(text)
|
||||||
s.Require().NoError(err)
|
s.Require().Len(plan.URLs, len(linksToUnfurl))
|
||||||
s.Require().Len(linkPreviews.LinkPreviews, 1)
|
|
||||||
s.Require().Len(linkPreviews.StatusLinkPreviews, 1)
|
s.Require().Equal(plan.URLs[0].URL, ogLink)
|
||||||
s.Require().Equal(requestsCount, 1)
|
s.Require().Equal(plan.URLs[0].IsStatusSharedURL, false)
|
||||||
|
s.Require().Equal(plan.URLs[0].Permission, URLUnfurlingAskUser)
|
||||||
|
|
||||||
|
s.Require().Equal(plan.URLs[1].URL, statusUserLink)
|
||||||
|
s.Require().Equal(plan.URLs[1].IsStatusSharedURL, true)
|
||||||
|
s.Require().Equal(plan.URLs[1].Permission, URLUnfurlingAllowed)
|
||||||
|
|
||||||
|
s.Require().Equal(plan.URLs[2].URL, gifLink)
|
||||||
|
s.Require().Equal(plan.URLs[2].IsStatusSharedURL, false)
|
||||||
|
s.Require().Equal(plan.URLs[2].Permission, URLUnfurlingNotSupported)
|
||||||
|
|
||||||
// Test `EnableAll`
|
// Test `EnableAll`
|
||||||
requestsCount = 0
|
|
||||||
err = s.m.settings.SaveSettingField(settings.URLUnfurlingMode, settings.URLUnfurlingEnableAll)
|
err = s.m.settings.SaveSettingField(settings.URLUnfurlingMode, settings.URLUnfurlingEnableAll)
|
||||||
s.Require().NoError(err)
|
s.Require().NoError(err)
|
||||||
|
|
||||||
linkPreviews, err = s.m.UnfurlURLs(&stubbedClient, linksToUnfurl)
|
plan = s.m.GetTextURLsToUnfurl(text)
|
||||||
s.Require().NoError(err)
|
s.Require().Len(plan.URLs, len(linksToUnfurl))
|
||||||
s.Require().Len(linkPreviews.LinkPreviews, 1)
|
|
||||||
s.Require().Len(linkPreviews.StatusLinkPreviews, 1)
|
s.Require().Equal(plan.URLs[0].URL, ogLink)
|
||||||
s.Require().Equal(requestsCount, 1)
|
s.Require().Equal(plan.URLs[0].IsStatusSharedURL, false)
|
||||||
|
s.Require().Equal(plan.URLs[0].Permission, URLUnfurlingAllowed)
|
||||||
|
|
||||||
|
s.Require().Equal(plan.URLs[1].URL, statusUserLink)
|
||||||
|
s.Require().Equal(plan.URLs[1].IsStatusSharedURL, true)
|
||||||
|
s.Require().Equal(plan.URLs[1].Permission, URLUnfurlingAllowed)
|
||||||
|
|
||||||
|
s.Require().Equal(plan.URLs[2].URL, gifLink)
|
||||||
|
s.Require().Equal(plan.URLs[2].IsStatusSharedURL, false)
|
||||||
|
s.Require().Equal(plan.URLs[2].Permission, URLUnfurlingNotSupported)
|
||||||
|
|
||||||
// Test `DisableAll`
|
// Test `DisableAll`
|
||||||
requestsCount = 0
|
|
||||||
err = s.m.settings.SaveSettingField(settings.URLUnfurlingMode, settings.URLUnfurlingDisableAll)
|
err = s.m.settings.SaveSettingField(settings.URLUnfurlingMode, settings.URLUnfurlingDisableAll)
|
||||||
s.Require().NoError(err)
|
s.Require().NoError(err)
|
||||||
|
|
||||||
linkPreviews, err = s.m.UnfurlURLs(&stubbedClient, linksToUnfurl)
|
plan = s.m.GetTextURLsToUnfurl(text)
|
||||||
s.Require().NoError(err)
|
s.Require().Len(plan.URLs, len(linksToUnfurl))
|
||||||
s.Require().Len(linkPreviews.LinkPreviews, 0)
|
|
||||||
s.Require().Len(linkPreviews.StatusLinkPreviews, 1) // Status links are always unfurled
|
s.Require().Equal(plan.URLs[0].URL, ogLink)
|
||||||
s.Require().Equal(requestsCount, 0)
|
s.Require().Equal(plan.URLs[0].IsStatusSharedURL, false)
|
||||||
|
s.Require().Equal(plan.URLs[0].Permission, URLUnfurlingForbiddenBySettings)
|
||||||
|
|
||||||
|
s.Require().Equal(plan.URLs[1].URL, statusUserLink)
|
||||||
|
s.Require().Equal(plan.URLs[1].IsStatusSharedURL, true)
|
||||||
|
s.Require().Equal(plan.URLs[1].Permission, URLUnfurlingAllowed)
|
||||||
|
|
||||||
|
s.Require().Equal(plan.URLs[2].URL, gifLink)
|
||||||
|
s.Require().Equal(plan.URLs[2].IsStatusSharedURL, false)
|
||||||
|
s.Require().Equal(plan.URLs[2].Permission, URLUnfurlingNotSupported)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *MessengerLinkPreviewsTestSuite) Test_UnfurlURLs_Limit() {
|
func (s *MessengerLinkPreviewsTestSuite) Test_UnfurlURLs_Limit() {
|
||||||
linksToUnfurl := "https://www.youtube.com/watch?v=6dkDepLX0rk " +
|
text := "https://www.youtube.com/watch?v=6dkDepLX0rk " +
|
||||||
"https://www.youtube.com/watch?v=ferZnZ0_rSM " +
|
"https://www.youtube.com/watch?v=ferZnZ0_rSM " +
|
||||||
"https://www.youtube.com/watch?v=bdneye4pzMw " +
|
"https://www.youtube.com/watch?v=bdneye4pzMw " +
|
||||||
"https://www.youtube.com/watch?v=pRERgcQe-fQ " +
|
"https://www.youtube.com/watch?v=pRERgcQe-fQ " +
|
||||||
"https://www.youtube.com/watch?v=j82L3pLjb_0 " +
|
"https://www.youtube.com/watch?v=j82L3pLjb_0 " +
|
||||||
"https://www.youtube.com/watch?v=hxsJvKYyVyg " +
|
"https://www.youtube.com/watch?v=hxsJvKYyVyg " +
|
||||||
"https://www.youtube.com/watch?v=jIIuzB11dsA"
|
"https://www.youtube.com/watch?v=jIIuzB11dsA "
|
||||||
|
|
||||||
urls := GetURLs(linksToUnfurl)
|
urls := s.m.GetURLs(text)
|
||||||
s.Require().Equal(UnfurledLinksPerMessageLimit, len(urls))
|
s.Require().Equal(UnfurledLinksPerMessageLimit, len(urls))
|
||||||
}
|
}
|
||||||
|
|
|
@ -1187,10 +1187,19 @@ func (api *PublicAPI) GetLinkPreviewData(link string) (previewData urls.LinkPrev
|
||||||
return urls.GetLinkPreviewData(link)
|
return urls.GetLinkPreviewData(link)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetTextURLsToUnfurl parses text and returns a deduplicated and (somewhat) normalized
|
||||||
|
// slice of URLs. The returned URLs can be used as cache keys by clients.
|
||||||
|
// For each URL there's a corresponding metadata which should be used as to plan the unfurling.
|
||||||
|
func (api *PublicAPI) GetTextURLsToUnfurl(text string) *protocol.URLsUnfurlPlan {
|
||||||
|
return api.service.messenger.GetTextURLsToUnfurl(text)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Deprecated: GetTextURLs is deprecated in favor of more generic GetTextURLsToUnfurl.
|
||||||
|
//
|
||||||
// GetTextURLs parses text and returns a deduplicated and (somewhat) normalized
|
// GetTextURLs parses text and returns a deduplicated and (somewhat) normalized
|
||||||
// slice of URLs. The returned URLs can be used as cache keys by clients.
|
// slice of URLs. The returned URLs can be used as cache keys by clients.
|
||||||
func (api *PublicAPI) GetTextURLs(text string) []string {
|
func (api *PublicAPI) GetTextURLs(text string) []string {
|
||||||
return protocol.GetURLs(text)
|
return api.service.messenger.GetURLs(text)
|
||||||
}
|
}
|
||||||
|
|
||||||
// UnfurlURLs uses a best-effort approach to unfurl each URL. Failed URLs will
|
// UnfurlURLs uses a best-effort approach to unfurl each URL. Failed URLs will
|
||||||
|
|
Loading…
Reference in New Issue