Add unfurling for Twitter links

Fix lint issues

Wrap json.Unmarshall() errors in urls.go

Bump version 0.83.9 -> 0.83.10
This commit is contained in:
Ismael 2021-08-03 23:30:23 +02:00 committed by flexsurfer
parent 7dfeda1511
commit 1efe023dd7
4 changed files with 155 additions and 4 deletions

View File

@ -1 +1 @@
0.83.9
0.83.10

View File

@ -3,6 +3,7 @@ package urls
import (
"encoding/json"
"fmt"
"html"
"io/ioutil"
"net/http"
"net/url"
@ -18,6 +19,12 @@ type YoutubeOembedData struct {
ThumbnailURL string `json:"thumbnail_url"`
}
type TwitterOembedData struct {
ProviderName string `json:"provider_name"`
AuthorName string `json:"author_name"`
HTML string `json:"html"`
}
type GiphyOembedData struct {
ProviderName string `json:"provider_name"`
Title string `json:"title"`
@ -50,6 +57,7 @@ type Site struct {
}
const YoutubeOembedLink = "https://www.youtube.com/oembed?format=json&url=%s"
const TwitterOembedLink = "https://publish.twitter.com/oembed?url=%s"
const GiphyOembedLink = "https://giphy.com/services/oembed?url=%s"
const TenorOembedLink = "https://tenor.com/oembed?url=%s"
@ -74,6 +82,11 @@ func LinkPreviewWhitelist() []Site {
Address: "youtu.be",
ImageSite: false,
},
Site{
Title: "Twitter",
Address: "twitter.com",
ImageSite: false,
},
// Site{
// Title: "Tenor GIFs",
// Address: "tenor.com",
@ -129,7 +142,7 @@ func GetYoutubeOembed(url string) (data YoutubeOembedData, err error) {
err = json.Unmarshal(jsonBytes, &data)
if err != nil {
return data, fmt.Errorf("can't unmarshall json")
return data, fmt.Errorf("can't unmarshall json %w", err)
}
return data, nil
@ -148,6 +161,46 @@ func GetYoutubePreviewData(link string) (previewData LinkPreviewData, err error)
return previewData, nil
}
func GetTwitterOembed(url string) (data TwitterOembedData, err error) {
oembedLink := fmt.Sprintf(TwitterOembedLink, url)
jsonBytes, err := GetURLContent(oembedLink)
if err != nil {
return data, fmt.Errorf("can't get bytes from twitter oembed response on %s link", oembedLink)
}
err = json.Unmarshal(jsonBytes, &data)
if err != nil {
return data, fmt.Errorf("can't unmarshall json %w", err)
}
return data, nil
}
func GetTwitterPreviewData(link string) (previewData LinkPreviewData, err error) {
oembedData, err := GetTwitterOembed(link)
if err != nil {
return previewData, err
}
previewData.Title = GetReadableTextFromTweetHTML(oembedData.HTML)
previewData.Site = oembedData.ProviderName
return previewData, nil
}
func GetReadableTextFromTweetHTML(s string) string {
s = strings.ReplaceAll(s, "\u003Cbr\u003E", "\n") // Adds line break for all <br>
s = strings.ReplaceAll(s, "https://", "\nhttps://") // Displays links in next line
s = html.UnescapeString(s) // Parses html special characters like &#225;
s = stripHTMLTags(s)
s = strings.TrimSpace(s)
s = strings.TrimRight(s, "\n")
s = strings.TrimLeft(s, "\n")
return s
}
func GetGenericLinkPreviewData(link string) (previewData LinkPreviewData, err error) {
// nolint: gosec
res, err := httpClient.Get(link)
@ -175,7 +228,7 @@ func GetGiphyOembed(url string) (data GiphyOembedData, err error) {
err = json.Unmarshal(jsonBytes, &data)
if err != nil {
return data, fmt.Errorf("can't unmarshall json")
return data, fmt.Errorf("can't unmarshall json %w", err)
}
return data, nil
@ -236,7 +289,7 @@ func GetTenorOembed(url string) (data TenorOembedData, err error) {
err = json.Unmarshal(jsonBytes, &data)
if err != nil {
return data, fmt.Errorf("can't unmarshall json")
return data, fmt.Errorf("can't unmarshall json %w", err)
}
return data, nil
@ -276,6 +329,8 @@ func GetLinkPreviewData(link string) (previewData LinkPreviewData, err error) {
return GetGiphyShortURLPreviewData(link)
case "tenor.com":
return GetTenorPreviewData(link)
case "twitter.com":
return GetTwitterPreviewData(link)
default:
return previewData, fmt.Errorf("link %s isn't whitelisted. Hostname - %s", link, url.Hostname())
}

View File

@ -151,3 +151,45 @@ func TestStatusLinkPreviewData(t *testing.T) {
// require.Equal(t, statusSecurityAudit.Title, previewData.Title)
// require.Equal(t, statusSecurityAudit.ThumbnailURL, previewData.ThumbnailURL)
// }
func TestTwitterLinkPreviewData(t *testing.T) {
statusTweet1 := LinkPreviewData{
Site: "Twitter",
Title: "Crypto isn't going anywhere.— Status (@ethstatus) July 26, 2021",
}
previewData1, err := GetLinkPreviewData("https://twitter.com/ethstatus/status/1419674733885407236")
require.NoError(t, err)
require.Equal(t, statusTweet1.Site, previewData1.Site)
require.Equal(t, statusTweet1.Title, previewData1.Title)
require.Equal(t, statusTweet1.ThumbnailURL, "")
statusTweet2 := LinkPreviewData{
Site: "Twitter",
Title: "🎉 Status v1.15 is a go! 🎉\n\n📌 Pin important messages in chats and groups" +
"\n✏ Edit messages after sending\n🔬 Scan QR codes with the browser\n⚡ FASTER app navigation!" +
"\nhttps://t.co/qKrhDArVKb— Status (@ethstatus) July 27, 2021",
}
previewData2, err := GetLinkPreviewData("https://twitter.com/ethstatus/status/1420035091997278214")
require.NoError(t, err)
require.Equal(t, statusTweet2.Site, previewData2.Site)
require.Equal(t, statusTweet2.Title, previewData2.Title)
require.Equal(t, statusTweet2.ThumbnailURL, "")
statusProfile := LinkPreviewData{
Site: "Twitter",
Title: "Tweets by ethstatus",
}
previewData3, err := GetLinkPreviewData("https://twitter.com/ethstatus")
require.NoError(t, err)
require.Equal(t, statusProfile.Site, previewData3.Site)
require.Equal(t, statusProfile.Title, previewData3.Title)
require.Equal(t, statusProfile.ThumbnailURL, "")
_, err = GetLinkPreviewData("https://www.test.com/unknown")
require.Error(t, err)
}

54
protocol/urls/utils.go Normal file
View File

@ -0,0 +1,54 @@
package urls
import (
"strings"
"unicode/utf8"
)
const (
htmlTagStart = 60 // Unicode `<`
htmlTagEnd = 62 // Unicode `>`
)
// Taken from https://stackoverflow.com/a/64701836
// Aggressively strips HTML tags from a string.
// It will only keep anything between `>` and `<`.
func stripHTMLTags(s string) string {
// Setup a string builder and allocate enough memory for the new string.
var builder strings.Builder
builder.Grow(len(s) + utf8.UTFMax)
in := false // True if we are inside an HTML tag.
start := 0 // The index of the previous start tag character `<`
end := 0 // The index of the previous end tag character `>`
for i, c := range s {
// If this is the last character and we are not in an HTML tag, save it.
if (i+1) == len(s) && end >= start {
builder.WriteString(s[end:])
}
// Keep going if the character is not `<` or `>`
if c != htmlTagStart && c != htmlTagEnd {
continue
}
if c == htmlTagStart {
// Only update the start if we are not in a tag.
// This make sure we strip out `<<br>` not just `<br>`
if !in {
start = i
}
in = true
// Write the valid string between the close and start of the two tags.
builder.WriteString(s[end:start])
continue
}
// else c == htmlTagEnd
in = false
end = i + 1
}
s = builder.String()
return s
}