chore: remove urls package (#5810)

* chore!: remove urls package

* fix_: move encodeDataURL and decodeDataURL to messenger_share_urls
This commit is contained in:
Igor Sirotin 2024-09-09 14:45:21 +01:00 committed by GitHub
parent c24eba8af2
commit bbdf5d5ae0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 45 additions and 592 deletions

View File

@ -1,12 +1,16 @@
package protocol
import (
"bytes"
"encoding/base64"
"fmt"
"regexp"
"strings"
"github.com/golang/protobuf/proto"
"github.com/andybalholm/brotli"
"github.com/status-im/status-go/api/multiformat"
"github.com/status-im/status-go/eth-node/crypto"
"github.com/status-im/status-go/eth-node/types"
@ -15,7 +19,6 @@ import (
"github.com/status-im/status-go/protocol/communities"
"github.com/status-im/status-go/protocol/protobuf"
"github.com/status-im/status-go/protocol/requests"
"github.com/status-im/status-go/protocol/urls"
"github.com/status-im/status-go/services/utils"
)
@ -140,7 +143,7 @@ func (m *Messenger) prepareEncodedCommunityData(community *communities.Community
return "", "", err
}
encodedData, err := urls.EncodeDataURL(urlData)
encodedData, err := encodeDataURL(urlData)
if err != nil {
return "", "", err
}
@ -168,7 +171,7 @@ func parseCommunityURLWithData(data string, chatKey string) (*URLDataResponse, e
return nil, err
}
urlData, err := urls.DecodeDataURL(data)
urlData, err := decodeDataURL(data)
if err != nil {
return nil, err
}
@ -291,7 +294,7 @@ func (m *Messenger) prepareEncodedCommunityChannelData(community *communities.Co
if err != nil {
return "", "", err
}
encodedData, err := urls.EncodeDataURL(urlData)
encodedData, err := encodeDataURL(urlData)
if err != nil {
return "", "", err
}
@ -337,7 +340,7 @@ func parseCommunityChannelURLWithData(data string, chatKey string) (*URLDataResp
return nil, err
}
urlData, err := urls.DecodeDataURL(data)
urlData, err := decodeDataURL(data)
if err != nil {
return nil, err
}
@ -461,7 +464,7 @@ func (m *Messenger) prepareEncodedUserData(contact *Contact) (string, string, er
return "", "", err
}
encodedData, err := urls.EncodeDataURL(urlData)
encodedData, err := encodeDataURL(urlData)
if err != nil {
return "", "", err
}
@ -484,7 +487,7 @@ func (m *Messenger) ShareUserURLWithData(contactID string) (string, error) {
}
func parseUserURLWithData(data string, chatKey string) (*URLDataResponse, error) {
urlData, err := urls.DecodeDataURL(data)
urlData, err := decodeDataURL(data)
if err != nil {
return nil, err
}
@ -575,3 +578,36 @@ func ParseSharedURL(url string) (*URLDataResponse, error) {
return nil, fmt.Errorf("not a status shared url")
}
func encodeDataURL(data []byte) (string, error) {
bb := bytes.NewBuffer([]byte{})
writer := brotli.NewWriter(bb)
_, err := writer.Write(data)
if err != nil {
return "", err
}
err = writer.Close()
if err != nil {
return "", err
}
return base64.URLEncoding.EncodeToString(bb.Bytes()), nil
}
func decodeDataURL(data string) ([]byte, error) {
decoded, err := base64.URLEncoding.DecodeString(data)
if err != nil {
return nil, err
}
output := make([]byte, 4096)
bb := bytes.NewBuffer(decoded)
reader := brotli.NewReader(bb)
n, err := reader.Read(output)
if err != nil {
return nil, err
}
return output[:n], nil
}

View File

@ -13,7 +13,6 @@ import (
"github.com/status-im/status-go/protocol/communities"
"github.com/status-im/status-go/protocol/protobuf"
"github.com/status-im/status-go/protocol/requests"
"github.com/status-im/status-go/protocol/urls"
)
const (
@ -112,10 +111,10 @@ func (s *MessengerShareUrlsSuite) TestDecodeEncodeDataURL() {
}
for i := range ts {
encodedData, err := urls.EncodeDataURL(ts[i])
encodedData, err := encodeDataURL(ts[i])
s.Require().NoError(err)
decodedData, err := urls.DecodeDataURL(encodedData)
decodedData, err := decodeDataURL(encodedData)
s.Require().NoError(err)
s.Require().Equal(ts[i], decodedData)
}

View File

@ -1,303 +0,0 @@
package urls
import (
"encoding/json"
"fmt"
"html"
"io/ioutil"
"net/http"
"net/url"
"strings"
"time"
"github.com/keighl/metabolize"
)
type YoutubeOembedData struct {
ProviderName string `json:"provider_name"`
Title string `json:"title"`
ThumbnailURL string `json:"thumbnail_url"`
}
type TwitterOembedData struct {
ProviderName string `json:"provider_name"`
AuthorName string `json:"author_name"`
HTML string `json:"html"`
}
type GiphyOembedData struct {
ProviderName string `json:"provider_name"`
Title string `json:"title"`
URL string `json:"url"`
Height int `json:"height"`
Width int `json:"width"`
}
type LinkPreviewData struct {
Site string `json:"site" meta:"og:site_name"`
Title string `json:"title" meta:"og:title"`
ThumbnailURL string `json:"thumbnailUrl" meta:"og:image"`
ContentType string `json:"contentType"`
Height int `json:"height"`
Width int `json:"width"`
}
type Site struct {
Title string `json:"title"`
Address string `json:"address"`
ImageSite bool `json:"imageSite"`
}
const (
YoutubeOembedLink = "https://www.youtube.com/oembed?format=json&url=%s"
TwitterOembedLink = "https://publish.twitter.com/oembed?url=%s"
GiphyOembedLink = "https://giphy.com/services/oembed?url=%s"
)
var (
httpClient = http.Client{Timeout: 30 * time.Second}
)
func LinkPreviewWhitelist() []Site {
return []Site{
{
Title: "Status",
Address: "our.status.im",
ImageSite: false,
},
{
Title: "YouTube",
Address: "youtube.com",
ImageSite: false,
},
{
Title: "YouTube with subdomain",
Address: "www.youtube.com",
ImageSite: false,
},
{
Title: "YouTube shortener",
Address: "youtu.be",
ImageSite: false,
},
{
Title: "YouTube Mobile",
Address: "m.youtube.com",
ImageSite: false,
},
{
Title: "Twitter",
Address: "twitter.com",
ImageSite: false,
},
{
Title: "Twitter Mobile",
Address: "mobile.twitter.com",
ImageSite: false,
},
{
Title: "GIPHY GIFs shortener",
Address: "gph.is",
ImageSite: true,
},
{
Title: "GIPHY GIFs",
Address: "giphy.com",
ImageSite: true,
},
{
Title: "GIPHY GIFs subdomain",
Address: "media.giphy.com",
ImageSite: true,
},
{
Title: "GitHub",
Address: "github.com",
ImageSite: false,
},
{
Title: "Tenor GIFs subdomain",
Address: "media.tenor.com",
ImageSite: false,
},
// Medium unfurling is failing - https://github.com/status-im/status-go/issues/2192
//
// {
// Title: "Medium",
// Address: "medium.com",
// ImageSite: false,
// },
}
}
func getURLContent(url string) (data []byte, err error) {
response, err := httpClient.Get(url)
if err != nil {
return data, fmt.Errorf("can't get content from link %s", url)
}
defer response.Body.Close()
return ioutil.ReadAll(response.Body)
}
func GetOembed(name, endpoint, url string, data interface{}) error {
oembedLink := fmt.Sprintf(endpoint, url)
jsonBytes, err := getURLContent(oembedLink)
if err != nil {
return fmt.Errorf("can't get bytes from %s oembed response on %s link", name, oembedLink)
}
return json.Unmarshal(jsonBytes, &data)
}
func GetYoutubePreviewData(link string) (previewData LinkPreviewData, err error) {
oembedData := new(YoutubeOembedData)
err = GetOembed("Youtube", YoutubeOembedLink, link, &oembedData)
if err != nil {
return
}
previewData.Title = oembedData.Title
previewData.Site = oembedData.ProviderName
previewData.ThumbnailURL = oembedData.ThumbnailURL
return
}
func GetTwitterPreviewData(link string) (previewData LinkPreviewData, err error) {
oembedData := new(TwitterOembedData)
err = GetOembed("Twitter", TwitterOembedLink, link, oembedData)
if err != nil {
return previewData, err
}
previewData.Title = getReadableTextFromTweetHTML(oembedData.HTML)
previewData.Site = oembedData.ProviderName
return previewData, nil
}
func getReadableTextFromTweetHTML(s string) string {
s = strings.ReplaceAll(s, "\u003Cbr\u003E", "\n") // Adds line break for all <br>
s = strings.ReplaceAll(s, "https://", "\nhttps://") // Displays links in next line
s = html.UnescapeString(s) // Parses html special characters like &#225;
s = stripHTMLTags(s)
s = strings.TrimSpace(s)
s = strings.TrimRight(s, "\n")
s = strings.TrimLeft(s, "\n")
return s
}
func GetGenericLinkPreviewData(link string) (previewData LinkPreviewData, err error) {
res, err := httpClient.Get(link)
if err != nil {
return previewData, fmt.Errorf("can't get content from link %s", link)
}
err = metabolize.Metabolize(res.Body, &previewData)
if err != nil {
return previewData, fmt.Errorf("can't get meta info from link %s", link)
}
return previewData, nil
}
func FakeGenericImageLinkPreviewData(title string, link string) (previewData LinkPreviewData, err error) {
url, err := url.Parse(link)
if err != nil {
return previewData, fmt.Errorf("Failed to parse link %s", link)
}
res, err := httpClient.Head(link)
if err != nil {
return previewData, fmt.Errorf("Failed to get HEAD from link %s", link)
}
if res.StatusCode != 200 {
return previewData, fmt.Errorf("Image link %s is not available", link)
}
previewData.Title = title
previewData.Site = strings.ToLower(url.Hostname())
previewData.ContentType = res.Header.Get("Content-type")
previewData.ThumbnailURL = link
previewData.Height = 0
previewData.Width = 0
return previewData, nil
}
func GetGiphyPreviewData(link string) (previewData LinkPreviewData, err error) {
oembedData := new(GiphyOembedData)
err = GetOembed("Giphy", GiphyOembedLink, link, oembedData)
if err != nil {
return previewData, err
}
previewData.Title = oembedData.Title
previewData.Site = oembedData.ProviderName
previewData.ThumbnailURL = oembedData.URL
previewData.Height = oembedData.Height
previewData.Width = oembedData.Width
return previewData, nil
}
// GetGiphyLongURL Giphy has a shortener service called gph.is, the oembed service doesn't work with shortened urls,
// so we need to fetch the long url first
func GetGiphyLongURL(shortURL string) (longURL string, err error) {
res, err := httpClient.Get(shortURL)
if err != nil {
return longURL, fmt.Errorf("can't get bytes from Giphy's short url at %s", shortURL)
}
canonicalURL := res.Request.URL.String()
if canonicalURL == shortURL {
// no redirect, ie. not a valid url
return longURL, fmt.Errorf("unable to process Giphy's short url at %s", shortURL)
}
return canonicalURL, err
}
func GetGiphyShortURLPreviewData(shortURL string) (data LinkPreviewData, err error) {
longURL, err := GetGiphyLongURL(shortURL)
if err != nil {
return data, err
}
return GetGiphyPreviewData(longURL)
}
func GetLinkPreviewData(link string) (previewData LinkPreviewData, err error) {
u, err := url.Parse(link)
if err != nil {
return previewData, fmt.Errorf("cant't parse link %s", link)
}
hostname := strings.ToLower(u.Hostname())
switch hostname {
case "youtube.com", "youtu.be", "www.youtube.com", "m.youtube.com":
return GetYoutubePreviewData(link)
case "github.com", "our.status.im":
return GetGenericLinkPreviewData(link)
case "giphy.com", "media.giphy.com":
return GetGiphyPreviewData(link)
case "gph.is":
return GetGiphyShortURLPreviewData(link)
case "twitter.com", "mobile.twitter.com":
return GetTwitterPreviewData(link)
case "media.tenor.com":
return FakeGenericImageLinkPreviewData("Tenor", link)
default:
return previewData, fmt.Errorf("link %s isn't whitelisted. Hostname - %s", link, u.Hostname())
}
}
func EncodeDataURL(data []byte) (string, error) {
return encodeDataURL(data)
}
func DecodeDataURL(data string) ([]byte, error) {
return decodeDataURL(data)
}

View File

@ -1,179 +0,0 @@
package urls
import (
"strings"
"testing"
"github.com/stretchr/testify/require"
)
func TestGetLinkPreviewData(t *testing.T) {
statusTownhall := LinkPreviewData{
Site: "YouTube",
Title: "Status Town Hall #67 - 12 October 2020",
ThumbnailURL: "https://i.ytimg.com/vi/mzOyYtfXkb0/hqdefault.jpg",
}
ts := []struct {
URL string
ShouldFail bool
}{
{"https://www.youtube.com/watch?v=mzOyYtfXkb0", false},
{"https://youtu.be/mzOyYtfXkb0", false},
{"https://www.test.com/unknown", true},
}
for _, u := range ts {
previewData, err := GetLinkPreviewData(u.URL)
if u.ShouldFail {
require.Error(t, err)
continue
}
require.NoError(t, err)
require.Equal(t, statusTownhall.Site, previewData.Site)
require.Equal(t, statusTownhall.Title, previewData.Title)
require.Equal(t, statusTownhall.ThumbnailURL, previewData.ThumbnailURL)
}
}
// split at "." and ignore the first item
func thumbnailURLWithoutSubdomain(url string) []string {
return strings.Split(url, ".")[1:]
}
func TestGetGiphyPreviewData(t *testing.T) {
validGiphyLink := "https://giphy.com/gifs/FullMag-robot-boston-dynamics-dance-lcG3qwtTKSNI2i5vst"
previewData, err := GetGiphyPreviewData(validGiphyLink)
bostonDynamicsEthGifData := LinkPreviewData{
Site: "GIPHY",
Title: "Boston Dynamics Yes GIF by FullMag - Find & Share on GIPHY",
ThumbnailURL: "https://media1.giphy.com/media/lcG3qwtTKSNI2i5vst/giphy.gif",
Height: 480,
Width: 480,
}
require.NoError(t, err)
require.Equal(t, bostonDynamicsEthGifData.Site, previewData.Site)
require.Equal(t, bostonDynamicsEthGifData.Title, previewData.Title)
require.Equal(t, bostonDynamicsEthGifData.Height, previewData.Height)
require.Equal(t, bostonDynamicsEthGifData.Width, previewData.Width)
// Giphy oembed returns links to different servers: https://media1.giphy.com, https://media2.giphy.com and so on
// We don't care about the server as long as other parts are equal, so we split at "." and ignore the first item
require.Equal(t, thumbnailURLWithoutSubdomain(bostonDynamicsEthGifData.ThumbnailURL), thumbnailURLWithoutSubdomain(previewData.ThumbnailURL))
invalidGiphyLink := "https://giphy.com/gifs/this-gif-does-not-exist-44444"
_, err = GetGiphyPreviewData(invalidGiphyLink)
require.Error(t, err)
mediaLink := "https://media.giphy.com/media/lcG3qwtTKSNI2i5vst/giphy.gif"
mediaLinkData, _ := GetGiphyPreviewData(mediaLink)
require.Equal(t, thumbnailURLWithoutSubdomain(mediaLinkData.ThumbnailURL), thumbnailURLWithoutSubdomain(previewData.ThumbnailURL))
}
func TestGetGiphyLongURL(t *testing.T) {
shortURL := "https://gph.is/g/aXLyK7P"
computedLongURL, _ := GetGiphyLongURL(shortURL)
actualLongURL := "https://giphy.com/gifs/FullMag-robot-boston-dynamics-dance-lcG3qwtTKSNI2i5vst"
require.Equal(t, computedLongURL, actualLongURL)
_, err := GetGiphyLongURL("http://this-giphy-site-doesn-not-exist.se/bogus-url")
require.Error(t, err)
_, err = GetGiphyLongURL("http://gph.is/bogus-url-but-correct-domain")
require.Error(t, err)
}
func TestGetGiphyShortURLPreviewData(t *testing.T) {
shortURL := "https://gph.is/g/aXLyK7P"
previewData, err := GetGiphyShortURLPreviewData(shortURL)
bostonDynamicsEthGifData := LinkPreviewData{
Site: "GIPHY",
Title: "Boston Dynamics Yes GIF by FullMag - Find & Share on GIPHY",
ThumbnailURL: "https://media1.giphy.com/media/lcG3qwtTKSNI2i5vst/giphy.gif",
}
require.NoError(t, err)
require.Equal(t, bostonDynamicsEthGifData.Site, previewData.Site)
require.Equal(t, bostonDynamicsEthGifData.Title, previewData.Title)
}
func TestStatusLinkPreviewData(t *testing.T) {
statusSecurityAudit := LinkPreviewData{
Site: "Our Status",
Title: "What is a Security Audit, When You Should Get One, and How to Prepare.",
ThumbnailURL: "https://our.status.im/content/images/2021/02/Security-Audit-Header.png",
}
previewData, err := GetLinkPreviewData("https://our.status.im/what-is-a-security-audit-when-you-should-get-one-and-how-to-prepare/")
require.NoError(t, err)
require.Equal(t, statusSecurityAudit.Site, previewData.Site)
require.Equal(t, statusSecurityAudit.Title, previewData.Title)
require.Equal(t, statusSecurityAudit.ThumbnailURL, previewData.ThumbnailURL)
}
// Medium unfurling is failing - https://github.com/status-im/status-go/issues/2192
func TestMediumLinkPreviewData(t *testing.T) {
t.Skip("broken test") // https://github.com/status-im/status-go/issues/2192
statusSecurityAudit := LinkPreviewData{
Site: "Medium",
Title: "A Look at the Status.im ICO Token Distribution",
ThumbnailURL: "https://miro.medium.com/max/700/1*Smc0y_TOL1XsofS1wxa3rg.jpeg",
}
previewData, err := GetLinkPreviewData("https://medium.com/the-bitcoin-podcast-blog/a-look-at-the-status-im-ico-token-distribution-f5bcf7f00907")
require.NoError(t, err)
require.Equal(t, statusSecurityAudit.Site, previewData.Site)
require.Equal(t, statusSecurityAudit.Title, previewData.Title)
require.Equal(t, statusSecurityAudit.ThumbnailURL, previewData.ThumbnailURL)
}
func TestTwitterLinkPreviewData(t *testing.T) {
t.Skip("flaky test") // https://github.com/status-im/status-go/issues/3529
statusTweet1 := LinkPreviewData{
Site: "Twitter",
Title: "Crypto isn't going anywhere.— Status (@ethstatus) July 26, 2021",
}
statusTweet2 := LinkPreviewData{
Site: "Twitter",
Title: "🎉 Status v1.15 is a go! 🎉\n\n📌 Pin important messages in chats and groups" +
"\n✏ Edit messages after sending\n🔬 Scan QR codes with the browser\n⚡ FASTER app navigation!" +
"\nhttps://t.co/qKrhDArVKb— Status (@ethstatus) July 27, 2021",
}
statusProfile := LinkPreviewData{
Site: "Twitter",
Title: "Tweets by ethstatus",
}
ts := []struct {
URL string
Expected LinkPreviewData
ShouldFail bool
}{
{"https://twitter.com/ethstatus/status/1419674733885407236", statusTweet1, false},
{"https://twitter.com/ethstatus/status/1420035091997278214", statusTweet2, false},
{"https://twitter.com/ethstatus", statusProfile, false},
{"https://www.test.com/unknown", LinkPreviewData{}, true},
}
for _, u := range ts {
previewData, err := GetLinkPreviewData(u.URL)
if u.ShouldFail {
require.Error(t, err)
continue
}
require.NoError(t, err)
require.Equal(t, u.Expected.Site, previewData.Site)
require.Equal(t, u.Expected.Title, previewData.Title)
require.Equal(t, u.Expected.ThumbnailURL, previewData.ThumbnailURL)
}
}

View File

@ -1,91 +0,0 @@
package urls
import (
"bytes"
"encoding/base64"
"strings"
"unicode/utf8"
"github.com/andybalholm/brotli"
)
const (
htmlTagStart = 60 // Unicode `<`
htmlTagEnd = 62 // Unicode `>`
)
// Taken from https://stackoverflow.com/a/64701836
// Aggressively strips HTML tags from a string.
// It will only keep anything between `>` and `<`.
func stripHTMLTags(s string) string {
// Setup a string builder and allocate enough memory for the new string.
var builder strings.Builder
builder.Grow(len(s) + utf8.UTFMax)
in := false // True if we are inside an HTML tag.
start := 0 // The index of the previous start tag character `<`
end := 0 // The index of the previous end tag character `>`
for i, c := range s {
// If this is the last character and we are not in an HTML tag, save it.
if (i+1) == len(s) && end >= start {
builder.WriteString(s[end:])
}
// Keep going if the character is not `<` or `>`
if c != htmlTagStart && c != htmlTagEnd {
continue
}
if c == htmlTagStart {
// Only update the start if we are not in a tag.
// This make sure we strip out `<<br>` not just `<br>`
if !in {
start = i
}
in = true
// Write the valid string between the close and start of the two tags.
builder.WriteString(s[end:start])
continue
}
// else c == htmlTagEnd
in = false
end = i + 1
}
s = builder.String()
return s
}
func encodeDataURL(data []byte) (string, error) {
bb := bytes.NewBuffer([]byte{})
writer := brotli.NewWriter(bb)
_, err := writer.Write(data)
if err != nil {
return "", err
}
err = writer.Close()
if err != nil {
return "", err
}
return base64.URLEncoding.EncodeToString(bb.Bytes()), nil
}
func decodeDataURL(data string) ([]byte, error) {
decoded, err := base64.URLEncoding.DecodeString(data)
if err != nil {
return nil, err
}
output := make([]byte, 4096)
bb := bytes.NewBuffer(decoded)
reader := brotli.NewReader(bb)
n, err := reader.Read(output)
if err != nil {
return nil, err
}
return output[:n], nil
}

View File

@ -42,7 +42,6 @@ import (
"github.com/status-im/status-go/protocol/pushnotificationclient"
"github.com/status-im/status-go/protocol/requests"
"github.com/status-im/status-go/protocol/transport"
"github.com/status-im/status-go/protocol/urls"
"github.com/status-im/status-go/protocol/verification"
"github.com/status-im/status-go/services/ext/mailservers"
)
@ -1262,14 +1261,6 @@ func (api *PublicAPI) EmojiReactionsByChatIDMessageID(chatID string, messageID s
return api.service.messenger.EmojiReactionsByChatIDMessageID(chatID, messageID)
}
func (api *PublicAPI) GetLinkPreviewWhitelist() []urls.Site {
return urls.LinkPreviewWhitelist()
}
func (api *PublicAPI) GetLinkPreviewData(link string) (previewData urls.LinkPreviewData, err error) {
return urls.GetLinkPreviewData(link)
}
// GetTextURLsToUnfurl parses text and returns a deduplicated and (somewhat) normalized
// slice of URLs. The returned URLs can be used as cache keys by clients.
// For each URL there's a corresponding metadata which should be used as to plan the unfurling.