Support unfurling more websites (#3530)
Add support for unfurling a wider range of websites. Most code changes are related to the implementation of a new Unfurler, an OEmbedUnfurler, which is necessary to get metadata for Reddit URLs using oEmbed, since Reddit does not support OpenGraph meta tags. The new unfurler will also be useful for other websites, like Twitter. Also the user agent was changed, and now more websites consider status-go reasonably human. Related to issue https://github.com/status-im/status-mobile/issues/15918 Example hostnames that are now unfurleable: reddit.com, open.spotify.com, music.youtube.com Other improvements: - Better error handling, especially because I wasn't wrapping errors correctly. I also removed the unnecessary custom error UnfurlErr. - I made tests truly deterministic by parameterizing the http.Client instance and by customizing its Transport field (except for some failing conditions where it's even good to hit the real servers).
This commit is contained in:
parent
c6192bd26c
commit
92b5d831fe
|
@ -1,12 +1,15 @@
|
||||||
package linkpreview
|
package linkpreview
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"net/http"
|
"net/http"
|
||||||
neturl "net/url"
|
neturl "net/url"
|
||||||
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
@ -20,53 +23,45 @@ import (
|
||||||
"github.com/status-im/status-go/protocol/common"
|
"github.com/status-im/status-go/protocol/common"
|
||||||
)
|
)
|
||||||
|
|
||||||
// UnfurlError means a non-critical error, and that processing of the preview
|
|
||||||
// should be interrupted and the preview probably ignored.
|
|
||||||
type UnfurlError struct {
|
|
||||||
msg string
|
|
||||||
url string
|
|
||||||
err error
|
|
||||||
}
|
|
||||||
|
|
||||||
func (ue UnfurlError) Error() string {
|
|
||||||
return fmt.Sprintf("%s, url='%s'", ue.msg, ue.url)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (ue UnfurlError) Unwrap() error {
|
|
||||||
return ue.err
|
|
||||||
}
|
|
||||||
|
|
||||||
type LinkPreview struct {
|
type LinkPreview struct {
|
||||||
common.LinkPreview
|
common.LinkPreview
|
||||||
}
|
}
|
||||||
|
|
||||||
type Unfurler interface {
|
type Unfurler interface {
|
||||||
unfurl(*neturl.URL) (common.LinkPreview, error)
|
unfurl() (common.LinkPreview, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
type Headers map[string]string
|
||||||
requestTimeout = 15000 * time.Millisecond
|
|
||||||
|
|
||||||
// Certain websites return an HTML error page if the user agent is unknown to
|
const (
|
||||||
// them, e.g. IMDb.
|
defaultRequestTimeout = 15000 * time.Millisecond
|
||||||
defaultUserAgent = "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/109.0"
|
|
||||||
|
headerAcceptJSON = "application/json; charset=utf-8"
|
||||||
|
headerAcceptText = "text/html; charset=utf-8"
|
||||||
|
|
||||||
|
// Without a particular user agent, many providers treat status-go as a
|
||||||
|
// gluttony bot, and either respond more frequently with a 429 (Too Many
|
||||||
|
// Requests), or simply refuse to return valid data. Note that using a known
|
||||||
|
// browser UA doesn't work well with some providers, such as Spotify,
|
||||||
|
// apparently they still flag status-go as a bad actor.
|
||||||
|
headerUserAgent = "status-go/v0.151.15"
|
||||||
|
|
||||||
// Currently set to English, but we could make this setting dynamic according
|
// Currently set to English, but we could make this setting dynamic according
|
||||||
// to the user's language of choice.
|
// to the user's language of choice.
|
||||||
defaultAcceptLanguage = "en-US,en;q=0.5"
|
headerAcceptLanguage = "en-US,en;q=0.5"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
func fetchBody(logger *zap.Logger, httpClient http.Client, url string, headers Headers) ([]byte, error) {
|
||||||
httpClient = http.Client{Timeout: requestTimeout}
|
ctx, cancel := context.WithTimeout(context.Background(), defaultRequestTimeout)
|
||||||
)
|
|
||||||
|
|
||||||
func fetchResponseBody(logger *zap.Logger, url string) ([]byte, error) {
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), requestTimeout)
|
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
|
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, fmt.Errorf("failed to perform HTTP request: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for k, v := range headers {
|
||||||
|
req.Header.Set(k, v)
|
||||||
}
|
}
|
||||||
|
|
||||||
res, err := httpClient.Do(req)
|
res, err := httpClient.Do(req)
|
||||||
|
@ -74,18 +69,18 @@ func fetchResponseBody(logger *zap.Logger, url string) ([]byte, error) {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
defer func() {
|
defer func() {
|
||||||
if err = res.Body.Close(); err != nil {
|
if err := res.Body.Close(); err != nil {
|
||||||
logger.Error("Failed to close response body", zap.Error(err))
|
logger.Error("failed to close response body", zap.Error(err))
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
if res.StatusCode >= http.StatusBadRequest {
|
if res.StatusCode >= http.StatusBadRequest {
|
||||||
return nil, errors.New(http.StatusText(res.StatusCode))
|
return nil, fmt.Errorf("http request failed, statusCode='%d'", res.StatusCode)
|
||||||
}
|
}
|
||||||
|
|
||||||
bodyBytes, err := ioutil.ReadAll(res.Body)
|
bodyBytes, err := ioutil.ReadAll(res.Body)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, fmt.Errorf("failed to read body bytes: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return bodyBytes, nil
|
return bodyBytes, nil
|
||||||
|
@ -98,24 +93,10 @@ func newDefaultLinkPreview(url *neturl.URL) common.LinkPreview {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func httpGETForOpenGraph(url string) (*http.Response, context.CancelFunc, error) {
|
func fetchThumbnail(logger *zap.Logger, httpClient http.Client, url string) (common.LinkPreviewThumbnail, error) {
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), requestTimeout)
|
|
||||||
|
|
||||||
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
|
|
||||||
if err != nil {
|
|
||||||
return nil, cancel, err
|
|
||||||
}
|
|
||||||
req.Header.Set("User-Agent", defaultUserAgent)
|
|
||||||
req.Header.Set("Accept-Language", defaultAcceptLanguage)
|
|
||||||
|
|
||||||
res, err := httpClient.Do(req)
|
|
||||||
return res, cancel, err
|
|
||||||
}
|
|
||||||
|
|
||||||
func fetchThumbnail(logger *zap.Logger, url string) (common.LinkPreviewThumbnail, error) {
|
|
||||||
var thumbnail common.LinkPreviewThumbnail
|
var thumbnail common.LinkPreviewThumbnail
|
||||||
|
|
||||||
imgBytes, err := fetchResponseBody(logger, url)
|
imgBytes, err := fetchBody(logger, httpClient, url, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return thumbnail, fmt.Errorf("could not fetch thumbnail: %w", err)
|
return thumbnail, fmt.Errorf("could not fetch thumbnail: %w", err)
|
||||||
}
|
}
|
||||||
|
@ -136,79 +117,120 @@ func fetchThumbnail(logger *zap.Logger, url string) (common.LinkPreviewThumbnail
|
||||||
return thumbnail, nil
|
return thumbnail, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type OEmbedUnfurler struct {
|
||||||
|
logger *zap.Logger
|
||||||
|
httpClient http.Client
|
||||||
|
// oembedEndpoint describes where the consumer may request representations for
|
||||||
|
// the supported URL scheme. For example, for YouTube, it is
|
||||||
|
// https://www.youtube.com/oembed.
|
||||||
|
oembedEndpoint string
|
||||||
|
// url is the actual URL to be unfurled.
|
||||||
|
url *neturl.URL
|
||||||
|
}
|
||||||
|
|
||||||
|
type OEmbedResponse struct {
|
||||||
|
Title string `json:"title"`
|
||||||
|
ThumbnailURL string `json:"thumbnail_url"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (u OEmbedUnfurler) newOEmbedURL() (*neturl.URL, error) {
|
||||||
|
oembedURL, err := neturl.Parse(u.oembedEndpoint)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// When format is specified, the provider MUST return data in the requested
|
||||||
|
// format, else return an error.
|
||||||
|
oembedURL.RawQuery = neturl.Values{
|
||||||
|
"url": {u.url.String()},
|
||||||
|
"format": {"json"},
|
||||||
|
}.Encode()
|
||||||
|
|
||||||
|
return oembedURL, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (u OEmbedUnfurler) unfurl() (common.LinkPreview, error) {
|
||||||
|
preview := newDefaultLinkPreview(u.url)
|
||||||
|
|
||||||
|
oembedURL, err := u.newOEmbedURL()
|
||||||
|
if err != nil {
|
||||||
|
return preview, err
|
||||||
|
}
|
||||||
|
|
||||||
|
headers := map[string]string{
|
||||||
|
"accept": headerAcceptJSON,
|
||||||
|
"accept-language": headerAcceptLanguage,
|
||||||
|
"user-agent": headerUserAgent,
|
||||||
|
}
|
||||||
|
oembedBytes, err := fetchBody(u.logger, u.httpClient, oembedURL.String(), headers)
|
||||||
|
if err != nil {
|
||||||
|
return preview, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var oembedResponse OEmbedResponse
|
||||||
|
if err != nil {
|
||||||
|
return preview, err
|
||||||
|
}
|
||||||
|
err = json.Unmarshal(oembedBytes, &oembedResponse)
|
||||||
|
if err != nil {
|
||||||
|
return preview, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if oembedResponse.Title == "" {
|
||||||
|
return preview, fmt.Errorf("missing required title in oEmbed response")
|
||||||
|
}
|
||||||
|
|
||||||
|
preview.Title = oembedResponse.Title
|
||||||
|
return preview, nil
|
||||||
|
}
|
||||||
|
|
||||||
type OpenGraphMetadata struct {
|
type OpenGraphMetadata struct {
|
||||||
Title string `json:"title" meta:"og:title"`
|
Title string `json:"title" meta:"og:title"`
|
||||||
Description string `json:"description" meta:"og:description"`
|
Description string `json:"description" meta:"og:description"`
|
||||||
ThumbnailURL string `json:"thumbnailUrl" meta:"og:image"`
|
ThumbnailURL string `json:"thumbnailUrl" meta:"og:image"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// OpenGraphUnfurler can be used either as the default unfurler for some websites
|
// OpenGraphUnfurler should be preferred over OEmbedUnfurler because oEmbed
|
||||||
// (e.g. GitHub), or as a fallback strategy. It parses HTML and extract
|
// gives back a JSON response with a "html" field that's supposed to be embedded
|
||||||
// OpenGraph meta tags. If an oEmbed endpoint is available, it should be
|
// in an iframe (hardly useful for existing Status' clients).
|
||||||
// preferred.
|
|
||||||
type OpenGraphUnfurler struct {
|
type OpenGraphUnfurler struct {
|
||||||
logger *zap.Logger
|
url *neturl.URL
|
||||||
|
logger *zap.Logger
|
||||||
|
httpClient http.Client
|
||||||
}
|
}
|
||||||
|
|
||||||
func (u OpenGraphUnfurler) unfurl(url *neturl.URL) (common.LinkPreview, error) {
|
func (u OpenGraphUnfurler) unfurl() (common.LinkPreview, error) {
|
||||||
preview := newDefaultLinkPreview(url)
|
preview := newDefaultLinkPreview(u.url)
|
||||||
|
|
||||||
res, cancel, err := httpGETForOpenGraph(url.String())
|
headers := map[string]string{
|
||||||
defer cancel()
|
"accept": headerAcceptText,
|
||||||
defer func() {
|
"accept-language": headerAcceptLanguage,
|
||||||
if res != nil {
|
"user-agent": headerUserAgent,
|
||||||
if err = res.Body.Close(); err != nil {
|
|
||||||
u.logger.Error("failed to close response body", zap.Error(err))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
if err != nil {
|
|
||||||
return preview, UnfurlError{
|
|
||||||
msg: "failed to get HTML page",
|
|
||||||
url: url.String(),
|
|
||||||
err: err,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
bodyBytes, err := fetchBody(u.logger, u.httpClient, u.url.String(), headers)
|
||||||
// Behave like WhatsApp, i.e. if the response is a 404, consider the URL
|
if err != nil {
|
||||||
// unfurleable. We can try to unfurl from the 404 HTML, which works well for
|
return preview, err
|
||||||
// certain websites, like GitHub, but it also potentially confuses users
|
|
||||||
// because they'll be sharing previews that don't match the actual URLs.
|
|
||||||
if res.StatusCode == http.StatusNotFound {
|
|
||||||
return preview, UnfurlError{
|
|
||||||
msg: "could not find page",
|
|
||||||
url: url.String(),
|
|
||||||
err: errors.New(""),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var ogMetadata OpenGraphMetadata
|
var ogMetadata OpenGraphMetadata
|
||||||
err = metabolize.Metabolize(res.Body, &ogMetadata)
|
err = metabolize.Metabolize(ioutil.NopCloser(bytes.NewBuffer(bodyBytes)), &ogMetadata)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return preview, UnfurlError{
|
return preview, fmt.Errorf("failed to parse OpenGraph data")
|
||||||
msg: "failed to parse OpenGraph data",
|
|
||||||
url: url.String(),
|
|
||||||
err: err,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// There are URLs like https://wikipedia.org/ that don't have an OpenGraph
|
// There are URLs like https://wikipedia.org/ that don't have an OpenGraph
|
||||||
// title tag, but article pages do. In the future, we can fallback to the
|
// title tag, but article pages do. In the future, we can fallback to the
|
||||||
// website's title by using the <title> tag.
|
// website's title by using the <title> tag.
|
||||||
if ogMetadata.Title == "" {
|
if ogMetadata.Title == "" {
|
||||||
return preview, UnfurlError{
|
return preview, fmt.Errorf("missing required title in OpenGraph response")
|
||||||
msg: "missing title",
|
|
||||||
url: url.String(),
|
|
||||||
err: errors.New(""),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ogMetadata.ThumbnailURL != "" {
|
if ogMetadata.ThumbnailURL != "" {
|
||||||
t, err := fetchThumbnail(u.logger, ogMetadata.ThumbnailURL)
|
t, err := fetchThumbnail(u.logger, u.httpClient, ogMetadata.ThumbnailURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// Given we want to fetch thumbnails on a best-effort basis, if an error
|
// Given we want to fetch thumbnails on a best-effort basis, if an error
|
||||||
// happens we simply log it.
|
// happens we simply log it.
|
||||||
u.logger.Info("failed to fetch thumbnail", zap.String("url", url.String()), zap.Error(err))
|
u.logger.Info("failed to fetch thumbnail", zap.String("url", u.url.String()), zap.Error(err))
|
||||||
} else {
|
} else {
|
||||||
preview.Thumbnail = t
|
preview.Thumbnail = t
|
||||||
}
|
}
|
||||||
|
@ -219,13 +241,31 @@ func (u OpenGraphUnfurler) unfurl(url *neturl.URL) (common.LinkPreview, error) {
|
||||||
return preview, nil
|
return preview, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func newUnfurler(logger *zap.Logger, url *neturl.URL) Unfurler {
|
func normalizeHostname(hostname string) string {
|
||||||
u := new(OpenGraphUnfurler)
|
hostname = strings.ToLower(hostname)
|
||||||
u.logger = logger
|
re := regexp.MustCompile(`^www\.(.*)$`)
|
||||||
return u
|
return re.ReplaceAllString(hostname, "$1")
|
||||||
}
|
}
|
||||||
|
|
||||||
func unfurl(logger *zap.Logger, url string) (common.LinkPreview, error) {
|
func newUnfurler(logger *zap.Logger, httpClient http.Client, url *neturl.URL) Unfurler {
|
||||||
|
switch normalizeHostname(url.Hostname()) {
|
||||||
|
case "reddit.com":
|
||||||
|
return OEmbedUnfurler{
|
||||||
|
oembedEndpoint: "https://www.reddit.com/oembed",
|
||||||
|
url: url,
|
||||||
|
logger: logger,
|
||||||
|
httpClient: httpClient,
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return OpenGraphUnfurler{
|
||||||
|
url: url,
|
||||||
|
logger: logger,
|
||||||
|
httpClient: httpClient,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func unfurl(logger *zap.Logger, httpClient http.Client, url string) (common.LinkPreview, error) {
|
||||||
var preview common.LinkPreview
|
var preview common.LinkPreview
|
||||||
|
|
||||||
parsedURL, err := neturl.Parse(url)
|
parsedURL, err := neturl.Parse(url)
|
||||||
|
@ -233,8 +273,8 @@ func unfurl(logger *zap.Logger, url string) (common.LinkPreview, error) {
|
||||||
return preview, err
|
return preview, err
|
||||||
}
|
}
|
||||||
|
|
||||||
unfurler := newUnfurler(logger, parsedURL)
|
unfurler := newUnfurler(logger, httpClient, parsedURL)
|
||||||
preview, err = unfurler.unfurl(parsedURL)
|
preview, err = unfurler.unfurl()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return preview, err
|
return preview, err
|
||||||
}
|
}
|
||||||
|
@ -264,6 +304,10 @@ func parseValidURL(rawURL string) (*neturl.URL, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetURLs returns only what we consider unfurleable URLs.
|
// GetURLs returns only what we consider unfurleable URLs.
|
||||||
|
//
|
||||||
|
// If we wanted to be extra precise and help improve UX, we could ignore URLs
|
||||||
|
// that we know can't be unfurled. This is at least possible with the oEmbed
|
||||||
|
// protocol because providers must specify an endpoint scheme.
|
||||||
func GetURLs(text string) []string {
|
func GetURLs(text string) []string {
|
||||||
parsedText := markdown.Parse([]byte(text), nil)
|
parsedText := markdown.Parse([]byte(text), nil)
|
||||||
visitor := common.RunLinksVisitor(parsedText)
|
visitor := common.RunLinksVisitor(parsedText)
|
||||||
|
@ -297,9 +341,13 @@ func GetURLs(text string) []string {
|
||||||
return urls
|
return urls
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func NewDefaultHTTPClient() http.Client {
|
||||||
|
return http.Client{Timeout: defaultRequestTimeout}
|
||||||
|
}
|
||||||
|
|
||||||
// UnfurlURLs assumes clients pass URLs verbatim that were validated and
|
// UnfurlURLs assumes clients pass URLs verbatim that were validated and
|
||||||
// processed by GetURLs.
|
// processed by GetURLs.
|
||||||
func UnfurlURLs(logger *zap.Logger, urls []string) ([]common.LinkPreview, error) {
|
func UnfurlURLs(logger *zap.Logger, httpClient http.Client, urls []string) ([]common.LinkPreview, error) {
|
||||||
var err error
|
var err error
|
||||||
if logger == nil {
|
if logger == nil {
|
||||||
logger, err = zap.NewDevelopment()
|
logger, err = zap.NewDevelopment()
|
||||||
|
@ -311,14 +359,11 @@ func UnfurlURLs(logger *zap.Logger, urls []string) ([]common.LinkPreview, error)
|
||||||
previews := make([]common.LinkPreview, 0, len(urls))
|
previews := make([]common.LinkPreview, 0, len(urls))
|
||||||
|
|
||||||
for _, url := range urls {
|
for _, url := range urls {
|
||||||
p, err := unfurl(logger, url)
|
logger.Debug("unfurling", zap.String("url", url))
|
||||||
|
p, err := unfurl(logger, httpClient, url)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if unfurlErr, ok := err.(UnfurlError); ok {
|
logger.Info("failed to unfurl", zap.String("url", url), zap.Error(err))
|
||||||
logger.Info("failed to unfurl", zap.Error(unfurlErr))
|
continue
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil, err
|
|
||||||
}
|
}
|
||||||
previews = append(previews, p)
|
previews = append(previews, p)
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,14 +1,81 @@
|
||||||
package linkpreview
|
package linkpreview
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
"math"
|
"math"
|
||||||
|
"net/http"
|
||||||
|
"regexp"
|
||||||
"testing"
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
"github.com/status-im/status-go/protocol/common"
|
"github.com/status-im/status-go/protocol/common"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// StubMatcher should either return an http.Response or nil in case the request
|
||||||
|
// doesn't match.
|
||||||
|
type StubMatcher func(req *http.Request) *http.Response
|
||||||
|
|
||||||
|
type StubTransport struct {
|
||||||
|
// fallbackToDefaultTransport when true will make the transport use
|
||||||
|
// http.DefaultTransport in case no matcher is found.
|
||||||
|
fallbackToDefaultTransport bool
|
||||||
|
// disabledStubs when true, will skip all matchers and use
|
||||||
|
// http.DefaultTransport.
|
||||||
|
//
|
||||||
|
// Useful while testing to toggle between the original and stubbed responses.
|
||||||
|
disabledStubs bool
|
||||||
|
// matchers are http.RoundTripper functions.
|
||||||
|
matchers []StubMatcher
|
||||||
|
}
|
||||||
|
|
||||||
|
// RoundTrip returns a stubbed response if any matcher returns a non-nil
|
||||||
|
// http.Response. If no matcher is found and fallbackToDefaultTransport is true,
|
||||||
|
// then it executes the HTTP request using the default http transport.
|
||||||
|
//
|
||||||
|
// If StubTransport#disabledStubs is true, the default http transport is used.
|
||||||
|
func (t *StubTransport) RoundTrip(req *http.Request) (*http.Response, error) {
|
||||||
|
if t.disabledStubs {
|
||||||
|
return http.DefaultTransport.RoundTrip(req)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, matcher := range t.matchers {
|
||||||
|
res := matcher(req)
|
||||||
|
if res != nil {
|
||||||
|
return res, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if t.fallbackToDefaultTransport {
|
||||||
|
return http.DefaultTransport.RoundTrip(req)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, fmt.Errorf("no HTTP matcher found")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add a matcher based on a URL regexp. If a given request URL matches the
|
||||||
|
// regexp, then responseBody will be returned with a hardcoded 200 status code.
|
||||||
|
func (t *StubTransport) AddURLMatcher(urlRegexp string, responseBody []byte) {
|
||||||
|
matcher := func(req *http.Request) *http.Response {
|
||||||
|
rx, err := regexp.Compile(regexp.QuoteMeta(urlRegexp))
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if rx.MatchString(req.URL.String()) {
|
||||||
|
return &http.Response{
|
||||||
|
StatusCode: http.StatusOK,
|
||||||
|
Body: ioutil.NopCloser(bytes.NewBuffer(responseBody)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
t.matchers = append(t.matchers, matcher)
|
||||||
|
}
|
||||||
|
|
||||||
// assertContainsLongString verifies if actual contains a slice of expected and
|
// assertContainsLongString verifies if actual contains a slice of expected and
|
||||||
// correctly prints the cause of the failure. The default behavior of
|
// correctly prints the cause of the failure. The default behavior of
|
||||||
// require.Contains with long strings is to not print the formatted message
|
// require.Contains with long strings is to not print the formatted message
|
||||||
|
@ -37,7 +104,7 @@ func assertContainsLongString(t *testing.T, expected string, actual string, maxL
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGetLinks(t *testing.T) {
|
func Test_GetLinks(t *testing.T) {
|
||||||
examples := []struct {
|
examples := []struct {
|
||||||
args string
|
args string
|
||||||
expected []string
|
expected []string
|
||||||
|
@ -88,106 +155,124 @@ func TestGetLinks(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestUnfurlURLs(t *testing.T) {
|
func readAsset(t *testing.T, filename string) []byte {
|
||||||
examples := []struct {
|
b, err := ioutil.ReadFile("../../_assets/tests/" + filename)
|
||||||
url string
|
|
||||||
expected common.LinkPreview
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
url: "https://github.com/",
|
|
||||||
expected: common.LinkPreview{
|
|
||||||
Description: "GitHub is where over 100 million developers shape the future of software, together. Contribute to the open source community, manage your Git repositories, review code like a pro, track bugs and fea...",
|
|
||||||
Hostname: "github.com",
|
|
||||||
Title: "GitHub: Let’s build from here",
|
|
||||||
URL: "https://github.com/",
|
|
||||||
Thumbnail: common.LinkPreviewThumbnail{
|
|
||||||
Width: 1200,
|
|
||||||
Height: 630,
|
|
||||||
URL: "",
|
|
||||||
DataURI: "",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
url: "https://github.com/status-im/status-mobile/issues/15469",
|
|
||||||
expected: common.LinkPreview{
|
|
||||||
Description: "Designs https://www.figma.com/file/wA8Epdki2OWa8Vr067PCNQ/Composer-for-Mobile?node-id=2102-232933&t=tTYKjMpICnzwF5Zv-0 Out of scope Enable link previews (we can assume for now that is always on) Mu...",
|
|
||||||
Hostname: "github.com",
|
|
||||||
Title: "Allow users to customize links · Issue #15469 · status-im/status-mobile",
|
|
||||||
URL: "https://github.com/status-im/status-mobile/issues/15469",
|
|
||||||
Thumbnail: common.LinkPreviewThumbnail{
|
|
||||||
Width: 1200,
|
|
||||||
Height: 600,
|
|
||||||
URL: "",
|
|
||||||
DataURI: "",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
url: "https://www.imdb.com/title/tt0117500/",
|
|
||||||
expected: common.LinkPreview{
|
|
||||||
Description: "The Rock: Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer. A mild-mannered chemist and an ex-con must lead the counterstrike when a rogue group of military men, led by a renegade general, threaten a nerve gas attack from Alcatraz against San Francisco.",
|
|
||||||
Hostname: "www.imdb.com",
|
|
||||||
Title: "The Rock (1996) - IMDb",
|
|
||||||
URL: "https://www.imdb.com/title/tt0117500/",
|
|
||||||
Thumbnail: common.LinkPreviewThumbnail{
|
|
||||||
Width: 1000,
|
|
||||||
Height: 1481,
|
|
||||||
URL: "",
|
|
||||||
DataURI: "",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
url: "https://www.youtube.com/watch?v=lE4UXdJSJM4",
|
|
||||||
expected: common.LinkPreview{
|
|
||||||
URL: "https://www.youtube.com/watch?v=lE4UXdJSJM4",
|
|
||||||
Hostname: "www.youtube.com",
|
|
||||||
Title: "Interview with a GNU/Linux user - Partition 1",
|
|
||||||
Description: "GNU/Linux Operating SystemInterview with a GNU/Linux user with Richie Guix - aired on © The GNU Linux.Programmer humorLinux humorProgramming jokesProgramming...",
|
|
||||||
Thumbnail: common.LinkPreviewThumbnail{
|
|
||||||
Width: 1280,
|
|
||||||
Height: 720,
|
|
||||||
DataURI: "",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
var urls []string
|
|
||||||
for _, e := range examples {
|
|
||||||
urls = append(urls, e.url)
|
|
||||||
}
|
|
||||||
|
|
||||||
links, err := UnfurlURLs(nil, urls)
|
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Len(t, links, len(examples), "all URLs should have been unfurled successfully")
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
for i, link := range links {
|
func Test_UnfurlURLs_YouTube(t *testing.T) {
|
||||||
e := examples[i]
|
url := "https://www.youtube.com/watch?v=lE4UXdJSJM4"
|
||||||
require.Equal(t, e.expected.URL, link.URL, e.url)
|
thumbnailURL := "https://i.ytimg.com/vi/lE4UXdJSJM4/maxresdefault.jpg"
|
||||||
require.Equal(t, e.expected.Hostname, link.Hostname, e.url)
|
expected := common.LinkPreview{
|
||||||
require.Equal(t, e.expected.Title, link.Title, e.url)
|
URL: url,
|
||||||
require.Equal(t, e.expected.Description, link.Description, e.url)
|
Hostname: "www.youtube.com",
|
||||||
|
Title: "Interview with a GNU/Linux user - Partition 1",
|
||||||
require.Equal(t, e.expected.Thumbnail.Width, link.Thumbnail.Width, e.url)
|
Description: "GNU/Linux Operating SystemInterview with a GNU/Linux user with Richie Guix - aired on © The GNU Linux.Programmer humorLinux humorProgramming jokesProgramming...",
|
||||||
require.Equal(t, e.expected.Thumbnail.Height, link.Thumbnail.Height, e.url)
|
Thumbnail: common.LinkPreviewThumbnail{
|
||||||
require.Equal(t, e.expected.Thumbnail.URL, link.Thumbnail.URL, e.url)
|
Width: 1,
|
||||||
assertContainsLongString(t, e.expected.Thumbnail.DataURI, link.Thumbnail.DataURI, 100)
|
Height: 1,
|
||||||
|
DataURI: "",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
transport := StubTransport{}
|
||||||
|
transport.AddURLMatcher(
|
||||||
|
url,
|
||||||
|
[]byte(fmt.Sprintf(`
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta property="og:title" content="%s">
|
||||||
|
<meta property="og:description" content="%s">
|
||||||
|
<meta property="og:image" content="%s">
|
||||||
|
</head>
|
||||||
|
</html>
|
||||||
|
`, expected.Title, expected.Description, thumbnailURL)),
|
||||||
|
)
|
||||||
|
transport.AddURLMatcher(thumbnailURL, readAsset(t, "1.jpg"))
|
||||||
|
stubbedClient := http.Client{Transport: &transport}
|
||||||
|
|
||||||
|
previews, err := UnfurlURLs(nil, stubbedClient, []string{url})
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Len(t, previews, 1)
|
||||||
|
preview := previews[0]
|
||||||
|
|
||||||
|
require.Equal(t, expected.URL, preview.URL)
|
||||||
|
require.Equal(t, expected.Hostname, preview.Hostname)
|
||||||
|
require.Equal(t, expected.Title, preview.Title)
|
||||||
|
require.Equal(t, expected.Description, preview.Description)
|
||||||
|
require.Equal(t, expected.Thumbnail.Width, preview.Thumbnail.Width)
|
||||||
|
require.Equal(t, expected.Thumbnail.Height, preview.Thumbnail.Height)
|
||||||
|
require.Equal(t, expected.Thumbnail.URL, preview.Thumbnail.URL)
|
||||||
|
assertContainsLongString(t, expected.Thumbnail.DataURI, preview.Thumbnail.DataURI, 100)
|
||||||
|
}
|
||||||
|
|
||||||
|
func Test_UnfurlURLs_Reddit(t *testing.T) {
|
||||||
|
url := "https://www.reddit.com/r/Bitcoin/comments/13j0tzr/the_best_bitcoin_explanation_of_all_times/?utm_source=share"
|
||||||
|
expected := common.LinkPreview{
|
||||||
|
URL: url,
|
||||||
|
Hostname: "www.reddit.com",
|
||||||
|
Title: "The best bitcoin explanation of all times.",
|
||||||
|
Description: "",
|
||||||
|
Thumbnail: common.LinkPreviewThumbnail{},
|
||||||
|
}
|
||||||
|
|
||||||
|
transport := StubTransport{}
|
||||||
|
transport.AddURLMatcher(
|
||||||
|
"https://www.reddit.com/oembed",
|
||||||
|
[]byte(`
|
||||||
|
{
|
||||||
|
"provider_url": "https://www.reddit.com/",
|
||||||
|
"version": "1.0",
|
||||||
|
"title": "The best bitcoin explanation of all times.",
|
||||||
|
"provider_name": "reddit",
|
||||||
|
"type": "rich",
|
||||||
|
"author_name": "DTheDev"
|
||||||
|
}
|
||||||
|
`),
|
||||||
|
)
|
||||||
|
stubbedClient := http.Client{Transport: &transport}
|
||||||
|
|
||||||
|
previews, err := UnfurlURLs(nil, stubbedClient, []string{url})
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Len(t, previews, 1)
|
||||||
|
preview := previews[0]
|
||||||
|
|
||||||
|
require.Equal(t, expected.URL, preview.URL)
|
||||||
|
require.Equal(t, expected.Hostname, preview.Hostname)
|
||||||
|
require.Equal(t, expected.Title, preview.Title)
|
||||||
|
require.Equal(t, expected.Description, preview.Description)
|
||||||
|
require.Equal(t, expected.Thumbnail, preview.Thumbnail)
|
||||||
|
}
|
||||||
|
|
||||||
|
func Test_UnfurlURLs_Timeout(t *testing.T) {
|
||||||
|
httpClient := http.Client{Timeout: time.Nanosecond}
|
||||||
|
previews, err := UnfurlURLs(nil, httpClient, []string{"https://status.im"})
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Empty(t, previews)
|
||||||
|
}
|
||||||
|
|
||||||
|
func Test_UnfurlURLs_CommonFailures(t *testing.T) {
|
||||||
|
httpClient := http.Client{}
|
||||||
|
|
||||||
// Test URL that doesn't return any OpenGraph title.
|
// Test URL that doesn't return any OpenGraph title.
|
||||||
previews, err := UnfurlURLs(nil, []string{"https://wikipedia.org"})
|
transport := StubTransport{}
|
||||||
|
transport.AddURLMatcher(
|
||||||
|
"https://wikipedia.org",
|
||||||
|
[]byte("<html><head></head></html>"),
|
||||||
|
)
|
||||||
|
stubbedClient := http.Client{Transport: &transport}
|
||||||
|
previews, err := UnfurlURLs(nil, stubbedClient, []string{"https://wikipedia.org"})
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Empty(t, previews)
|
require.Empty(t, previews)
|
||||||
|
|
||||||
// Test 404.
|
// Test 404.
|
||||||
previews, err = UnfurlURLs(nil, []string{"https://github.com/status-im/i_do_not_exist"})
|
previews, err = UnfurlURLs(nil, httpClient, []string{"https://github.com/status-im/i_do_not_exist"})
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Empty(t, previews)
|
require.Empty(t, previews)
|
||||||
|
|
||||||
// Test no response when trying to get OpenGraph metadata.
|
// Test no response when trying to get OpenGraph metadata.
|
||||||
previews, err = UnfurlURLs(nil, []string{"https://wikipedia.o"})
|
previews, err = UnfurlURLs(nil, httpClient, []string{"https://wikipedia.o"})
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Empty(t, previews)
|
require.Empty(t, previews)
|
||||||
}
|
}
|
||||||
|
|
|
@ -5937,7 +5937,7 @@ func generateAliasAndIdenticon(pk string) (string, string, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *Messenger) UnfurlURLs(urls []string) ([]common.LinkPreview, error) {
|
func (m *Messenger) UnfurlURLs(urls []string) ([]common.LinkPreview, error) {
|
||||||
return linkpreview.UnfurlURLs(m.logger, urls)
|
return linkpreview.UnfurlURLs(m.logger, linkpreview.NewDefaultHTTPClient(), urls)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *Messenger) SendEmojiReaction(ctx context.Context, chatID, messageID string, emojiID protobuf.EmojiReaction_Type) (*MessengerResponse, error) {
|
func (m *Messenger) SendEmojiReaction(ctx context.Context, chatID, messageID string, emojiID protobuf.EmojiReaction_Type) (*MessengerResponse, error) {
|
||||||
|
|
Loading…
Reference in New Issue