Support unfurling more websites (#3530)

Add support for unfurling a wider range of websites. Most code changes are
related to the implementation of a new Unfurler, an OEmbedUnfurler, which is
necessary to get metadata for Reddit URLs using oEmbed, since Reddit does not
support OpenGraph meta tags. The new unfurler will also be useful for other
websites, like Twitter. Also the user agent was changed, and now more websites
consider status-go reasonably human.

Related to issue https://github.com/status-im/status-mobile/issues/15918

Example hostnames that are now unfurleable: reddit.com, open.spotify.com,
music.youtube.com

Other improvements:

- Better error handling, especially because I wasn't wrapping errors correctly.
  I also removed the unnecessary custom error UnfurlErr.
- I made tests truly deterministic by parameterizing the http.Client instance
  and by customizing its Transport field (except for some failing conditions
  where it's even good to hit the real servers).
This commit is contained in:
Icaro Motta 2023-06-05 07:46:17 -03:00 committed by GitHub
parent c6192bd26c
commit 92b5d831fe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 331 additions and 201 deletions

View File

@ -1 +1 @@
0.154.4 0.154.5

View File

@ -1,12 +1,15 @@
package linkpreview package linkpreview
import ( import (
"bytes"
"context" "context"
"encoding/json"
"errors" "errors"
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"net/http" "net/http"
neturl "net/url" neturl "net/url"
"regexp"
"strings" "strings"
"time" "time"
@ -20,53 +23,45 @@ import (
"github.com/status-im/status-go/protocol/common" "github.com/status-im/status-go/protocol/common"
) )
// UnfurlError means a non-critical error, and that processing of the preview
// should be interrupted and the preview probably ignored.
type UnfurlError struct {
msg string
url string
err error
}
func (ue UnfurlError) Error() string {
return fmt.Sprintf("%s, url='%s'", ue.msg, ue.url)
}
func (ue UnfurlError) Unwrap() error {
return ue.err
}
type LinkPreview struct { type LinkPreview struct {
common.LinkPreview common.LinkPreview
} }
type Unfurler interface { type Unfurler interface {
unfurl(*neturl.URL) (common.LinkPreview, error) unfurl() (common.LinkPreview, error)
} }
const ( type Headers map[string]string
requestTimeout = 15000 * time.Millisecond
// Certain websites return an HTML error page if the user agent is unknown to const (
// them, e.g. IMDb. defaultRequestTimeout = 15000 * time.Millisecond
defaultUserAgent = "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/109.0"
headerAcceptJSON = "application/json; charset=utf-8"
headerAcceptText = "text/html; charset=utf-8"
// Without a particular user agent, many providers treat status-go as a
// gluttony bot, and either respond more frequently with a 429 (Too Many
// Requests), or simply refuse to return valid data. Note that using a known
// browser UA doesn't work well with some providers, such as Spotify,
// apparently they still flag status-go as a bad actor.
headerUserAgent = "status-go/v0.151.15"
// Currently set to English, but we could make this setting dynamic according // Currently set to English, but we could make this setting dynamic according
// to the user's language of choice. // to the user's language of choice.
defaultAcceptLanguage = "en-US,en;q=0.5" headerAcceptLanguage = "en-US,en;q=0.5"
) )
var ( func fetchBody(logger *zap.Logger, httpClient http.Client, url string, headers Headers) ([]byte, error) {
httpClient = http.Client{Timeout: requestTimeout} ctx, cancel := context.WithTimeout(context.Background(), defaultRequestTimeout)
)
func fetchResponseBody(logger *zap.Logger, url string) ([]byte, error) {
ctx, cancel := context.WithTimeout(context.Background(), requestTimeout)
defer cancel() defer cancel()
req, err := http.NewRequestWithContext(ctx, "GET", url, nil) req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil { if err != nil {
return nil, err return nil, fmt.Errorf("failed to perform HTTP request: %w", err)
}
for k, v := range headers {
req.Header.Set(k, v)
} }
res, err := httpClient.Do(req) res, err := httpClient.Do(req)
@ -74,18 +69,18 @@ func fetchResponseBody(logger *zap.Logger, url string) ([]byte, error) {
return nil, err return nil, err
} }
defer func() { defer func() {
if err = res.Body.Close(); err != nil { if err := res.Body.Close(); err != nil {
logger.Error("Failed to close response body", zap.Error(err)) logger.Error("failed to close response body", zap.Error(err))
} }
}() }()
if res.StatusCode >= http.StatusBadRequest { if res.StatusCode >= http.StatusBadRequest {
return nil, errors.New(http.StatusText(res.StatusCode)) return nil, fmt.Errorf("http request failed, statusCode='%d'", res.StatusCode)
} }
bodyBytes, err := ioutil.ReadAll(res.Body) bodyBytes, err := ioutil.ReadAll(res.Body)
if err != nil { if err != nil {
return nil, err return nil, fmt.Errorf("failed to read body bytes: %w", err)
} }
return bodyBytes, nil return bodyBytes, nil
@ -98,24 +93,10 @@ func newDefaultLinkPreview(url *neturl.URL) common.LinkPreview {
} }
} }
func httpGETForOpenGraph(url string) (*http.Response, context.CancelFunc, error) { func fetchThumbnail(logger *zap.Logger, httpClient http.Client, url string) (common.LinkPreviewThumbnail, error) {
ctx, cancel := context.WithTimeout(context.Background(), requestTimeout)
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return nil, cancel, err
}
req.Header.Set("User-Agent", defaultUserAgent)
req.Header.Set("Accept-Language", defaultAcceptLanguage)
res, err := httpClient.Do(req)
return res, cancel, err
}
func fetchThumbnail(logger *zap.Logger, url string) (common.LinkPreviewThumbnail, error) {
var thumbnail common.LinkPreviewThumbnail var thumbnail common.LinkPreviewThumbnail
imgBytes, err := fetchResponseBody(logger, url) imgBytes, err := fetchBody(logger, httpClient, url, nil)
if err != nil { if err != nil {
return thumbnail, fmt.Errorf("could not fetch thumbnail: %w", err) return thumbnail, fmt.Errorf("could not fetch thumbnail: %w", err)
} }
@ -136,79 +117,120 @@ func fetchThumbnail(logger *zap.Logger, url string) (common.LinkPreviewThumbnail
return thumbnail, nil return thumbnail, nil
} }
type OEmbedUnfurler struct {
logger *zap.Logger
httpClient http.Client
// oembedEndpoint describes where the consumer may request representations for
// the supported URL scheme. For example, for YouTube, it is
// https://www.youtube.com/oembed.
oembedEndpoint string
// url is the actual URL to be unfurled.
url *neturl.URL
}
type OEmbedResponse struct {
Title string `json:"title"`
ThumbnailURL string `json:"thumbnail_url"`
}
func (u OEmbedUnfurler) newOEmbedURL() (*neturl.URL, error) {
oembedURL, err := neturl.Parse(u.oembedEndpoint)
if err != nil {
return nil, err
}
// When format is specified, the provider MUST return data in the requested
// format, else return an error.
oembedURL.RawQuery = neturl.Values{
"url": {u.url.String()},
"format": {"json"},
}.Encode()
return oembedURL, nil
}
func (u OEmbedUnfurler) unfurl() (common.LinkPreview, error) {
preview := newDefaultLinkPreview(u.url)
oembedURL, err := u.newOEmbedURL()
if err != nil {
return preview, err
}
headers := map[string]string{
"accept": headerAcceptJSON,
"accept-language": headerAcceptLanguage,
"user-agent": headerUserAgent,
}
oembedBytes, err := fetchBody(u.logger, u.httpClient, oembedURL.String(), headers)
if err != nil {
return preview, err
}
var oembedResponse OEmbedResponse
if err != nil {
return preview, err
}
err = json.Unmarshal(oembedBytes, &oembedResponse)
if err != nil {
return preview, err
}
if oembedResponse.Title == "" {
return preview, fmt.Errorf("missing required title in oEmbed response")
}
preview.Title = oembedResponse.Title
return preview, nil
}
type OpenGraphMetadata struct { type OpenGraphMetadata struct {
Title string `json:"title" meta:"og:title"` Title string `json:"title" meta:"og:title"`
Description string `json:"description" meta:"og:description"` Description string `json:"description" meta:"og:description"`
ThumbnailURL string `json:"thumbnailUrl" meta:"og:image"` ThumbnailURL string `json:"thumbnailUrl" meta:"og:image"`
} }
// OpenGraphUnfurler can be used either as the default unfurler for some websites // OpenGraphUnfurler should be preferred over OEmbedUnfurler because oEmbed
// (e.g. GitHub), or as a fallback strategy. It parses HTML and extract // gives back a JSON response with a "html" field that's supposed to be embedded
// OpenGraph meta tags. If an oEmbed endpoint is available, it should be // in an iframe (hardly useful for existing Status' clients).
// preferred.
type OpenGraphUnfurler struct { type OpenGraphUnfurler struct {
logger *zap.Logger url *neturl.URL
logger *zap.Logger
httpClient http.Client
} }
func (u OpenGraphUnfurler) unfurl(url *neturl.URL) (common.LinkPreview, error) { func (u OpenGraphUnfurler) unfurl() (common.LinkPreview, error) {
preview := newDefaultLinkPreview(url) preview := newDefaultLinkPreview(u.url)
res, cancel, err := httpGETForOpenGraph(url.String()) headers := map[string]string{
defer cancel() "accept": headerAcceptText,
defer func() { "accept-language": headerAcceptLanguage,
if res != nil { "user-agent": headerUserAgent,
if err = res.Body.Close(); err != nil {
u.logger.Error("failed to close response body", zap.Error(err))
}
}
}()
if err != nil {
return preview, UnfurlError{
msg: "failed to get HTML page",
url: url.String(),
err: err,
}
} }
bodyBytes, err := fetchBody(u.logger, u.httpClient, u.url.String(), headers)
// Behave like WhatsApp, i.e. if the response is a 404, consider the URL if err != nil {
// unfurleable. We can try to unfurl from the 404 HTML, which works well for return preview, err
// certain websites, like GitHub, but it also potentially confuses users
// because they'll be sharing previews that don't match the actual URLs.
if res.StatusCode == http.StatusNotFound {
return preview, UnfurlError{
msg: "could not find page",
url: url.String(),
err: errors.New(""),
}
} }
var ogMetadata OpenGraphMetadata var ogMetadata OpenGraphMetadata
err = metabolize.Metabolize(res.Body, &ogMetadata) err = metabolize.Metabolize(ioutil.NopCloser(bytes.NewBuffer(bodyBytes)), &ogMetadata)
if err != nil { if err != nil {
return preview, UnfurlError{ return preview, fmt.Errorf("failed to parse OpenGraph data")
msg: "failed to parse OpenGraph data",
url: url.String(),
err: err,
}
} }
// There are URLs like https://wikipedia.org/ that don't have an OpenGraph // There are URLs like https://wikipedia.org/ that don't have an OpenGraph
// title tag, but article pages do. In the future, we can fallback to the // title tag, but article pages do. In the future, we can fallback to the
// website's title by using the <title> tag. // website's title by using the <title> tag.
if ogMetadata.Title == "" { if ogMetadata.Title == "" {
return preview, UnfurlError{ return preview, fmt.Errorf("missing required title in OpenGraph response")
msg: "missing title",
url: url.String(),
err: errors.New(""),
}
} }
if ogMetadata.ThumbnailURL != "" { if ogMetadata.ThumbnailURL != "" {
t, err := fetchThumbnail(u.logger, ogMetadata.ThumbnailURL) t, err := fetchThumbnail(u.logger, u.httpClient, ogMetadata.ThumbnailURL)
if err != nil { if err != nil {
// Given we want to fetch thumbnails on a best-effort basis, if an error // Given we want to fetch thumbnails on a best-effort basis, if an error
// happens we simply log it. // happens we simply log it.
u.logger.Info("failed to fetch thumbnail", zap.String("url", url.String()), zap.Error(err)) u.logger.Info("failed to fetch thumbnail", zap.String("url", u.url.String()), zap.Error(err))
} else { } else {
preview.Thumbnail = t preview.Thumbnail = t
} }
@ -219,13 +241,31 @@ func (u OpenGraphUnfurler) unfurl(url *neturl.URL) (common.LinkPreview, error) {
return preview, nil return preview, nil
} }
func newUnfurler(logger *zap.Logger, url *neturl.URL) Unfurler { func normalizeHostname(hostname string) string {
u := new(OpenGraphUnfurler) hostname = strings.ToLower(hostname)
u.logger = logger re := regexp.MustCompile(`^www\.(.*)$`)
return u return re.ReplaceAllString(hostname, "$1")
} }
func unfurl(logger *zap.Logger, url string) (common.LinkPreview, error) { func newUnfurler(logger *zap.Logger, httpClient http.Client, url *neturl.URL) Unfurler {
switch normalizeHostname(url.Hostname()) {
case "reddit.com":
return OEmbedUnfurler{
oembedEndpoint: "https://www.reddit.com/oembed",
url: url,
logger: logger,
httpClient: httpClient,
}
default:
return OpenGraphUnfurler{
url: url,
logger: logger,
httpClient: httpClient,
}
}
}
func unfurl(logger *zap.Logger, httpClient http.Client, url string) (common.LinkPreview, error) {
var preview common.LinkPreview var preview common.LinkPreview
parsedURL, err := neturl.Parse(url) parsedURL, err := neturl.Parse(url)
@ -233,8 +273,8 @@ func unfurl(logger *zap.Logger, url string) (common.LinkPreview, error) {
return preview, err return preview, err
} }
unfurler := newUnfurler(logger, parsedURL) unfurler := newUnfurler(logger, httpClient, parsedURL)
preview, err = unfurler.unfurl(parsedURL) preview, err = unfurler.unfurl()
if err != nil { if err != nil {
return preview, err return preview, err
} }
@ -264,6 +304,10 @@ func parseValidURL(rawURL string) (*neturl.URL, error) {
} }
// GetURLs returns only what we consider unfurleable URLs. // GetURLs returns only what we consider unfurleable URLs.
//
// If we wanted to be extra precise and help improve UX, we could ignore URLs
// that we know can't be unfurled. This is at least possible with the oEmbed
// protocol because providers must specify an endpoint scheme.
func GetURLs(text string) []string { func GetURLs(text string) []string {
parsedText := markdown.Parse([]byte(text), nil) parsedText := markdown.Parse([]byte(text), nil)
visitor := common.RunLinksVisitor(parsedText) visitor := common.RunLinksVisitor(parsedText)
@ -297,9 +341,13 @@ func GetURLs(text string) []string {
return urls return urls
} }
func NewDefaultHTTPClient() http.Client {
return http.Client{Timeout: defaultRequestTimeout}
}
// UnfurlURLs assumes clients pass URLs verbatim that were validated and // UnfurlURLs assumes clients pass URLs verbatim that were validated and
// processed by GetURLs. // processed by GetURLs.
func UnfurlURLs(logger *zap.Logger, urls []string) ([]common.LinkPreview, error) { func UnfurlURLs(logger *zap.Logger, httpClient http.Client, urls []string) ([]common.LinkPreview, error) {
var err error var err error
if logger == nil { if logger == nil {
logger, err = zap.NewDevelopment() logger, err = zap.NewDevelopment()
@ -311,14 +359,11 @@ func UnfurlURLs(logger *zap.Logger, urls []string) ([]common.LinkPreview, error)
previews := make([]common.LinkPreview, 0, len(urls)) previews := make([]common.LinkPreview, 0, len(urls))
for _, url := range urls { for _, url := range urls {
p, err := unfurl(logger, url) logger.Debug("unfurling", zap.String("url", url))
p, err := unfurl(logger, httpClient, url)
if err != nil { if err != nil {
if unfurlErr, ok := err.(UnfurlError); ok { logger.Info("failed to unfurl", zap.String("url", url), zap.Error(err))
logger.Info("failed to unfurl", zap.Error(unfurlErr)) continue
continue
}
return nil, err
} }
previews = append(previews, p) previews = append(previews, p)
} }

View File

@ -1,14 +1,81 @@
package linkpreview package linkpreview
import ( import (
"bytes"
"fmt"
"io/ioutil"
"math" "math"
"net/http"
"regexp"
"testing" "testing"
"time"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/status-im/status-go/protocol/common" "github.com/status-im/status-go/protocol/common"
) )
// StubMatcher should either return an http.Response or nil in case the request
// doesn't match.
type StubMatcher func(req *http.Request) *http.Response
type StubTransport struct {
// fallbackToDefaultTransport when true will make the transport use
// http.DefaultTransport in case no matcher is found.
fallbackToDefaultTransport bool
// disabledStubs when true, will skip all matchers and use
// http.DefaultTransport.
//
// Useful while testing to toggle between the original and stubbed responses.
disabledStubs bool
// matchers are http.RoundTripper functions.
matchers []StubMatcher
}
// RoundTrip returns a stubbed response if any matcher returns a non-nil
// http.Response. If no matcher is found and fallbackToDefaultTransport is true,
// then it executes the HTTP request using the default http transport.
//
// If StubTransport#disabledStubs is true, the default http transport is used.
func (t *StubTransport) RoundTrip(req *http.Request) (*http.Response, error) {
if t.disabledStubs {
return http.DefaultTransport.RoundTrip(req)
}
for _, matcher := range t.matchers {
res := matcher(req)
if res != nil {
return res, nil
}
}
if t.fallbackToDefaultTransport {
return http.DefaultTransport.RoundTrip(req)
}
return nil, fmt.Errorf("no HTTP matcher found")
}
// Add a matcher based on a URL regexp. If a given request URL matches the
// regexp, then responseBody will be returned with a hardcoded 200 status code.
func (t *StubTransport) AddURLMatcher(urlRegexp string, responseBody []byte) {
matcher := func(req *http.Request) *http.Response {
rx, err := regexp.Compile(regexp.QuoteMeta(urlRegexp))
if err != nil {
return nil
}
if rx.MatchString(req.URL.String()) {
return &http.Response{
StatusCode: http.StatusOK,
Body: ioutil.NopCloser(bytes.NewBuffer(responseBody)),
}
}
return nil
}
t.matchers = append(t.matchers, matcher)
}
// assertContainsLongString verifies if actual contains a slice of expected and // assertContainsLongString verifies if actual contains a slice of expected and
// correctly prints the cause of the failure. The default behavior of // correctly prints the cause of the failure. The default behavior of
// require.Contains with long strings is to not print the formatted message // require.Contains with long strings is to not print the formatted message
@ -37,7 +104,7 @@ func assertContainsLongString(t *testing.T, expected string, actual string, maxL
) )
} }
func TestGetLinks(t *testing.T) { func Test_GetLinks(t *testing.T) {
examples := []struct { examples := []struct {
args string args string
expected []string expected []string
@ -88,106 +155,124 @@ func TestGetLinks(t *testing.T) {
} }
} }
func TestUnfurlURLs(t *testing.T) { func readAsset(t *testing.T, filename string) []byte {
examples := []struct { b, err := ioutil.ReadFile("../../_assets/tests/" + filename)
url string
expected common.LinkPreview
}{
{
url: "https://github.com/",
expected: common.LinkPreview{
Description: "GitHub is where over 100 million developers shape the future of software, together. Contribute to the open source community, manage your Git repositories, review code like a pro, track bugs and fea...",
Hostname: "github.com",
Title: "GitHub: Lets build from here",
URL: "https://github.com/",
Thumbnail: common.LinkPreviewThumbnail{
Width: 1200,
Height: 630,
URL: "",
DataURI: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAABLAAAAJ2CAMAAAB4",
},
},
},
{
url: "https://github.com/status-im/status-mobile/issues/15469",
expected: common.LinkPreview{
Description: "Designs https://www.figma.com/file/wA8Epdki2OWa8Vr067PCNQ/Composer-for-Mobile?node-id=2102-232933&t=tTYKjMpICnzwF5Zv-0 Out of scope Enable link previews (we can assume for now that is always on) Mu...",
Hostname: "github.com",
Title: "Allow users to customize links · Issue #15469 · status-im/status-mobile",
URL: "https://github.com/status-im/status-mobile/issues/15469",
Thumbnail: common.LinkPreviewThumbnail{
Width: 1200,
Height: 600,
URL: "",
DataURI: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAABLAAAAJYCAYAAABy",
},
},
},
{
url: "https://www.imdb.com/title/tt0117500/",
expected: common.LinkPreview{
Description: "The Rock: Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer. A mild-mannered chemist and an ex-con must lead the counterstrike when a rogue group of military men, led by a renegade general, threaten a nerve gas attack from Alcatraz against San Francisco.",
Hostname: "www.imdb.com",
Title: "The Rock (1996) - IMDb",
URL: "https://www.imdb.com/title/tt0117500/",
Thumbnail: common.LinkPreviewThumbnail{
Width: 1000,
Height: 1481,
URL: "",
DataURI: "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wCEAAkJCgg",
},
},
},
{
url: "https://www.youtube.com/watch?v=lE4UXdJSJM4",
expected: common.LinkPreview{
URL: "https://www.youtube.com/watch?v=lE4UXdJSJM4",
Hostname: "www.youtube.com",
Title: "Interview with a GNU/Linux user - Partition 1",
Description: "GNU/Linux Operating SystemInterview with a GNU/Linux user with Richie Guix - aired on © The GNU Linux.Programmer humorLinux humorProgramming jokesProgramming...",
Thumbnail: common.LinkPreviewThumbnail{
Width: 1280,
Height: 720,
DataURI: "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wCEAAUDBA8",
},
},
},
}
var urls []string
for _, e := range examples {
urls = append(urls, e.url)
}
links, err := UnfurlURLs(nil, urls)
require.NoError(t, err) require.NoError(t, err)
require.Len(t, links, len(examples), "all URLs should have been unfurled successfully") return b
}
for i, link := range links { func Test_UnfurlURLs_YouTube(t *testing.T) {
e := examples[i] url := "https://www.youtube.com/watch?v=lE4UXdJSJM4"
require.Equal(t, e.expected.URL, link.URL, e.url) thumbnailURL := "https://i.ytimg.com/vi/lE4UXdJSJM4/maxresdefault.jpg"
require.Equal(t, e.expected.Hostname, link.Hostname, e.url) expected := common.LinkPreview{
require.Equal(t, e.expected.Title, link.Title, e.url) URL: url,
require.Equal(t, e.expected.Description, link.Description, e.url) Hostname: "www.youtube.com",
Title: "Interview with a GNU/Linux user - Partition 1",
require.Equal(t, e.expected.Thumbnail.Width, link.Thumbnail.Width, e.url) Description: "GNU/Linux Operating SystemInterview with a GNU/Linux user with Richie Guix - aired on © The GNU Linux.Programmer humorLinux humorProgramming jokesProgramming...",
require.Equal(t, e.expected.Thumbnail.Height, link.Thumbnail.Height, e.url) Thumbnail: common.LinkPreviewThumbnail{
require.Equal(t, e.expected.Thumbnail.URL, link.Thumbnail.URL, e.url) Width: 1,
assertContainsLongString(t, e.expected.Thumbnail.DataURI, link.Thumbnail.DataURI, 100) Height: 1,
DataURI: "data:image/webp;base64,UklGRiQAAABXRUJQVlA4IBgAAAAwAQCdASoBAAEAAQAaJaQAA3AA/vpMgAA",
},
} }
transport := StubTransport{}
transport.AddURLMatcher(
url,
[]byte(fmt.Sprintf(`
<html>
<head>
<meta property="og:title" content="%s">
<meta property="og:description" content="%s">
<meta property="og:image" content="%s">
</head>
</html>
`, expected.Title, expected.Description, thumbnailURL)),
)
transport.AddURLMatcher(thumbnailURL, readAsset(t, "1.jpg"))
stubbedClient := http.Client{Transport: &transport}
previews, err := UnfurlURLs(nil, stubbedClient, []string{url})
require.NoError(t, err)
require.Len(t, previews, 1)
preview := previews[0]
require.Equal(t, expected.URL, preview.URL)
require.Equal(t, expected.Hostname, preview.Hostname)
require.Equal(t, expected.Title, preview.Title)
require.Equal(t, expected.Description, preview.Description)
require.Equal(t, expected.Thumbnail.Width, preview.Thumbnail.Width)
require.Equal(t, expected.Thumbnail.Height, preview.Thumbnail.Height)
require.Equal(t, expected.Thumbnail.URL, preview.Thumbnail.URL)
assertContainsLongString(t, expected.Thumbnail.DataURI, preview.Thumbnail.DataURI, 100)
}
func Test_UnfurlURLs_Reddit(t *testing.T) {
url := "https://www.reddit.com/r/Bitcoin/comments/13j0tzr/the_best_bitcoin_explanation_of_all_times/?utm_source=share"
expected := common.LinkPreview{
URL: url,
Hostname: "www.reddit.com",
Title: "The best bitcoin explanation of all times.",
Description: "",
Thumbnail: common.LinkPreviewThumbnail{},
}
transport := StubTransport{}
transport.AddURLMatcher(
"https://www.reddit.com/oembed",
[]byte(`
{
"provider_url": "https://www.reddit.com/",
"version": "1.0",
"title": "The best bitcoin explanation of all times.",
"provider_name": "reddit",
"type": "rich",
"author_name": "DTheDev"
}
`),
)
stubbedClient := http.Client{Transport: &transport}
previews, err := UnfurlURLs(nil, stubbedClient, []string{url})
require.NoError(t, err)
require.Len(t, previews, 1)
preview := previews[0]
require.Equal(t, expected.URL, preview.URL)
require.Equal(t, expected.Hostname, preview.Hostname)
require.Equal(t, expected.Title, preview.Title)
require.Equal(t, expected.Description, preview.Description)
require.Equal(t, expected.Thumbnail, preview.Thumbnail)
}
func Test_UnfurlURLs_Timeout(t *testing.T) {
httpClient := http.Client{Timeout: time.Nanosecond}
previews, err := UnfurlURLs(nil, httpClient, []string{"https://status.im"})
require.NoError(t, err)
require.Empty(t, previews)
}
func Test_UnfurlURLs_CommonFailures(t *testing.T) {
httpClient := http.Client{}
// Test URL that doesn't return any OpenGraph title. // Test URL that doesn't return any OpenGraph title.
previews, err := UnfurlURLs(nil, []string{"https://wikipedia.org"}) transport := StubTransport{}
transport.AddURLMatcher(
"https://wikipedia.org",
[]byte("<html><head></head></html>"),
)
stubbedClient := http.Client{Transport: &transport}
previews, err := UnfurlURLs(nil, stubbedClient, []string{"https://wikipedia.org"})
require.NoError(t, err) require.NoError(t, err)
require.Empty(t, previews) require.Empty(t, previews)
// Test 404. // Test 404.
previews, err = UnfurlURLs(nil, []string{"https://github.com/status-im/i_do_not_exist"}) previews, err = UnfurlURLs(nil, httpClient, []string{"https://github.com/status-im/i_do_not_exist"})
require.NoError(t, err) require.NoError(t, err)
require.Empty(t, previews) require.Empty(t, previews)
// Test no response when trying to get OpenGraph metadata. // Test no response when trying to get OpenGraph metadata.
previews, err = UnfurlURLs(nil, []string{"https://wikipedia.o"}) previews, err = UnfurlURLs(nil, httpClient, []string{"https://wikipedia.o"})
require.NoError(t, err) require.NoError(t, err)
require.Empty(t, previews) require.Empty(t, previews)
} }

View File

@ -5937,7 +5937,7 @@ func generateAliasAndIdenticon(pk string) (string, string, error) {
} }
func (m *Messenger) UnfurlURLs(urls []string) ([]common.LinkPreview, error) { func (m *Messenger) UnfurlURLs(urls []string) ([]common.LinkPreview, error) {
return linkpreview.UnfurlURLs(m.logger, urls) return linkpreview.UnfurlURLs(m.logger, linkpreview.NewDefaultHTTPClient(), urls)
} }
func (m *Messenger) SendEmojiReaction(ctx context.Context, chatID, messageID string, emojiID protobuf.EmojiReaction_Type) (*MessengerResponse, error) { func (m *Messenger) SendEmojiReaction(ctx context.Context, chatID, messageID string, emojiID protobuf.EmojiReaction_Type) (*MessengerResponse, error) {