2020-10-27 17:35:28 +00:00
|
|
|
package urls
|
|
|
|
|
|
|
|
import (
|
|
|
|
"encoding/json"
|
|
|
|
"fmt"
|
2021-08-03 21:30:23 +00:00
|
|
|
"html"
|
2020-10-27 17:35:28 +00:00
|
|
|
"io/ioutil"
|
|
|
|
"net/http"
|
|
|
|
"net/url"
|
2020-12-18 08:33:24 +00:00
|
|
|
"strings"
|
2020-12-21 13:00:40 +00:00
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/keighl/metabolize"
|
2020-10-27 17:35:28 +00:00
|
|
|
)
|
|
|
|
|
2021-01-19 15:53:27 +00:00
|
|
|
type YoutubeOembedData struct {
|
2020-10-27 17:35:28 +00:00
|
|
|
ProviderName string `json:"provider_name"`
|
|
|
|
Title string `json:"title"`
|
|
|
|
ThumbnailURL string `json:"thumbnail_url"`
|
|
|
|
}
|
|
|
|
|
2021-08-03 21:30:23 +00:00
|
|
|
type TwitterOembedData struct {
|
|
|
|
ProviderName string `json:"provider_name"`
|
|
|
|
AuthorName string `json:"author_name"`
|
|
|
|
HTML string `json:"html"`
|
|
|
|
}
|
|
|
|
|
2021-01-19 15:53:27 +00:00
|
|
|
type GiphyOembedData struct {
|
|
|
|
ProviderName string `json:"provider_name"`
|
2021-01-20 11:24:41 +00:00
|
|
|
Title string `json:"title"`
|
|
|
|
URL string `json:"url"`
|
2021-02-05 12:58:09 +00:00
|
|
|
Height int `json:"height"`
|
|
|
|
Width int `json:"width"`
|
2021-01-19 15:53:27 +00:00
|
|
|
}
|
|
|
|
|
2020-10-27 17:35:28 +00:00
|
|
|
type LinkPreviewData struct {
|
2020-12-21 13:00:40 +00:00
|
|
|
Site string `json:"site" meta:"og:site_name"`
|
|
|
|
Title string `json:"title" meta:"og:title"`
|
|
|
|
ThumbnailURL string `json:"thumbnailUrl" meta:"og:image"`
|
2020-12-18 08:33:24 +00:00
|
|
|
ContentType string `json:"contentType"`
|
2021-02-05 12:58:09 +00:00
|
|
|
Height int `json:"height"`
|
|
|
|
Width int `json:"width"`
|
2020-10-27 17:35:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
type Site struct {
|
2020-12-18 08:33:24 +00:00
|
|
|
Title string `json:"title"`
|
|
|
|
Address string `json:"address"`
|
|
|
|
ImageSite bool `json:"imageSite"`
|
2020-10-27 17:35:28 +00:00
|
|
|
}
|
|
|
|
|
2022-08-16 10:44:46 +00:00
|
|
|
const (
|
|
|
|
YoutubeOembedLink = "https://www.youtube.com/oembed?format=json&url=%s"
|
|
|
|
TwitterOembedLink = "https://publish.twitter.com/oembed?url=%s"
|
|
|
|
GiphyOembedLink = "https://giphy.com/services/oembed?url=%s"
|
|
|
|
)
|
2021-01-20 11:14:24 +00:00
|
|
|
|
2022-08-16 10:44:46 +00:00
|
|
|
var (
|
|
|
|
httpClient = http.Client{Timeout: 30 * time.Second}
|
|
|
|
)
|
2020-12-21 13:00:40 +00:00
|
|
|
|
2020-10-27 17:35:28 +00:00
|
|
|
func LinkPreviewWhitelist() []Site {
|
|
|
|
return []Site{
|
2022-08-16 10:44:46 +00:00
|
|
|
{
|
2021-02-02 07:38:14 +00:00
|
|
|
Title: "Status",
|
|
|
|
Address: "our.status.im",
|
|
|
|
ImageSite: false,
|
|
|
|
},
|
2022-08-16 10:44:46 +00:00
|
|
|
{
|
2020-12-18 08:33:24 +00:00
|
|
|
Title: "YouTube",
|
|
|
|
Address: "youtube.com",
|
|
|
|
ImageSite: false,
|
2020-10-27 17:35:28 +00:00
|
|
|
},
|
2023-01-30 11:39:02 +00:00
|
|
|
{
|
|
|
|
Title: "YouTube with subdomain",
|
|
|
|
Address: "www.youtube.com",
|
|
|
|
ImageSite: false,
|
|
|
|
},
|
2022-08-16 10:44:46 +00:00
|
|
|
{
|
2020-12-18 08:33:24 +00:00
|
|
|
Title: "YouTube shortener",
|
|
|
|
Address: "youtu.be",
|
|
|
|
ImageSite: false,
|
|
|
|
},
|
2022-08-16 10:44:46 +00:00
|
|
|
{
|
2021-08-03 21:30:23 +00:00
|
|
|
Title: "Twitter",
|
|
|
|
Address: "twitter.com",
|
|
|
|
ImageSite: false,
|
|
|
|
},
|
2022-08-16 10:44:46 +00:00
|
|
|
{
|
2021-02-03 13:58:12 +00:00
|
|
|
Title: "GIPHY GIFs shortener",
|
|
|
|
Address: "gph.is",
|
2021-02-03 13:11:53 +00:00
|
|
|
ImageSite: true,
|
|
|
|
},
|
2022-08-16 10:44:46 +00:00
|
|
|
{
|
2020-12-18 08:33:24 +00:00
|
|
|
Title: "GIPHY GIFs",
|
|
|
|
Address: "giphy.com",
|
|
|
|
ImageSite: true,
|
2020-10-27 17:35:28 +00:00
|
|
|
},
|
2022-08-16 10:44:46 +00:00
|
|
|
{
|
2021-02-03 11:27:04 +00:00
|
|
|
Title: "GIPHY GIFs subdomain",
|
|
|
|
Address: "media.giphy.com",
|
|
|
|
ImageSite: true,
|
|
|
|
},
|
2022-08-16 10:44:46 +00:00
|
|
|
{
|
2020-12-21 13:00:40 +00:00
|
|
|
Title: "GitHub",
|
|
|
|
Address: "github.com",
|
|
|
|
ImageSite: false,
|
|
|
|
},
|
2022-09-09 13:40:18 +00:00
|
|
|
{
|
2022-09-08 14:50:17 +00:00
|
|
|
Title: "Tenor GIFs subdomain",
|
2022-08-31 17:16:06 +00:00
|
|
|
Address: "media.tenor.com",
|
|
|
|
ImageSite: false,
|
|
|
|
},
|
2021-04-12 09:19:34 +00:00
|
|
|
// Medium unfurling is failing - https://github.com/status-im/status-go/issues/2192
|
|
|
|
//
|
2022-08-16 10:44:46 +00:00
|
|
|
// {
|
2021-04-12 09:19:34 +00:00
|
|
|
// Title: "Medium",
|
|
|
|
// Address: "medium.com",
|
|
|
|
// ImageSite: false,
|
|
|
|
// },
|
2020-10-27 17:35:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-08-16 10:44:46 +00:00
|
|
|
func getURLContent(url string) (data []byte, err error) {
|
2020-12-21 13:00:40 +00:00
|
|
|
response, err := httpClient.Get(url)
|
2020-10-27 17:35:28 +00:00
|
|
|
if err != nil {
|
2021-01-28 09:27:35 +00:00
|
|
|
return data, fmt.Errorf("can't get content from link %s", url)
|
2020-10-27 17:35:28 +00:00
|
|
|
}
|
|
|
|
defer response.Body.Close()
|
|
|
|
return ioutil.ReadAll(response.Body)
|
|
|
|
}
|
|
|
|
|
2022-08-16 10:44:46 +00:00
|
|
|
func GetOembed(name, endpoint, url string, data interface{}) error {
|
|
|
|
oembedLink := fmt.Sprintf(endpoint, url)
|
2020-10-27 17:35:28 +00:00
|
|
|
|
2022-08-16 10:44:46 +00:00
|
|
|
jsonBytes, err := getURLContent(oembedLink)
|
2020-10-27 17:35:28 +00:00
|
|
|
if err != nil {
|
2022-08-16 10:44:46 +00:00
|
|
|
return fmt.Errorf("can't get bytes from %s oembed response on %s link", name, oembedLink)
|
2020-10-27 17:35:28 +00:00
|
|
|
}
|
|
|
|
|
2022-08-16 10:44:46 +00:00
|
|
|
return json.Unmarshal(jsonBytes, &data)
|
2020-10-27 17:35:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func GetYoutubePreviewData(link string) (previewData LinkPreviewData, err error) {
|
2022-08-16 10:44:46 +00:00
|
|
|
oembedData := new(YoutubeOembedData)
|
|
|
|
err = GetOembed("Youtube", YoutubeOembedLink, link, &oembedData)
|
2020-10-27 17:35:28 +00:00
|
|
|
if err != nil {
|
2022-08-16 10:44:46 +00:00
|
|
|
return
|
2020-10-27 17:35:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
previewData.Title = oembedData.Title
|
|
|
|
previewData.Site = oembedData.ProviderName
|
|
|
|
previewData.ThumbnailURL = oembedData.ThumbnailURL
|
2022-08-16 10:44:46 +00:00
|
|
|
return
|
2021-08-03 21:30:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func GetTwitterPreviewData(link string) (previewData LinkPreviewData, err error) {
|
2022-08-16 10:44:46 +00:00
|
|
|
oembedData := new(TwitterOembedData)
|
|
|
|
err = GetOembed("Twitter", TwitterOembedLink, link, oembedData)
|
2021-08-03 21:30:23 +00:00
|
|
|
if err != nil {
|
|
|
|
return previewData, err
|
|
|
|
}
|
|
|
|
|
2022-08-16 10:44:46 +00:00
|
|
|
previewData.Title = getReadableTextFromTweetHTML(oembedData.HTML)
|
2021-08-03 21:30:23 +00:00
|
|
|
previewData.Site = oembedData.ProviderName
|
|
|
|
|
|
|
|
return previewData, nil
|
|
|
|
}
|
|
|
|
|
2022-08-16 10:44:46 +00:00
|
|
|
func getReadableTextFromTweetHTML(s string) string {
|
2021-08-03 21:30:23 +00:00
|
|
|
s = strings.ReplaceAll(s, "\u003Cbr\u003E", "\n") // Adds line break for all <br>
|
|
|
|
s = strings.ReplaceAll(s, "https://", "\nhttps://") // Displays links in next line
|
|
|
|
s = html.UnescapeString(s) // Parses html special characters like á
|
|
|
|
s = stripHTMLTags(s)
|
|
|
|
s = strings.TrimSpace(s)
|
|
|
|
s = strings.TrimRight(s, "\n")
|
|
|
|
s = strings.TrimLeft(s, "\n")
|
|
|
|
|
|
|
|
return s
|
|
|
|
}
|
|
|
|
|
2021-02-02 07:38:14 +00:00
|
|
|
func GetGenericLinkPreviewData(link string) (previewData LinkPreviewData, err error) {
|
2020-12-21 13:00:40 +00:00
|
|
|
res, err := httpClient.Get(link)
|
|
|
|
if err != nil {
|
2021-01-28 09:27:35 +00:00
|
|
|
return previewData, fmt.Errorf("can't get content from link %s", link)
|
2020-12-21 13:00:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
err = metabolize.Metabolize(res.Body, &previewData)
|
|
|
|
if err != nil {
|
2021-01-28 09:27:35 +00:00
|
|
|
return previewData, fmt.Errorf("can't get meta info from link %s", link)
|
2020-12-21 13:00:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return previewData, nil
|
|
|
|
}
|
|
|
|
|
2022-09-08 14:50:17 +00:00
|
|
|
func FakeGenericImageLinkPreviewData(title string, link string) (previewData LinkPreviewData, err error) {
|
2023-02-08 13:23:55 +00:00
|
|
|
url, err := url.Parse(link)
|
|
|
|
if err != nil {
|
|
|
|
return previewData, fmt.Errorf("Failed to parse link %s", link)
|
|
|
|
}
|
|
|
|
|
|
|
|
res, err := httpClient.Head(link)
|
|
|
|
if err != nil {
|
|
|
|
return previewData, fmt.Errorf("Failed to get HEAD from link %s", link)
|
|
|
|
}
|
|
|
|
|
|
|
|
if res.StatusCode != 200 {
|
|
|
|
return previewData, fmt.Errorf("Image link %s is not available", link)
|
|
|
|
}
|
|
|
|
|
2022-08-31 17:16:06 +00:00
|
|
|
previewData.Title = title
|
2023-02-08 13:23:55 +00:00
|
|
|
previewData.Site = strings.ToLower(url.Hostname())
|
|
|
|
previewData.ContentType = res.Header.Get("Content-type")
|
2022-08-31 17:16:06 +00:00
|
|
|
previewData.ThumbnailURL = link
|
|
|
|
previewData.Height = 0
|
|
|
|
previewData.Width = 0
|
|
|
|
return previewData, nil
|
|
|
|
}
|
|
|
|
|
2021-01-19 15:53:27 +00:00
|
|
|
func GetGiphyPreviewData(link string) (previewData LinkPreviewData, err error) {
|
2022-08-16 10:44:46 +00:00
|
|
|
oembedData := new(GiphyOembedData)
|
|
|
|
err = GetOembed("Giphy", GiphyOembedLink, link, oembedData)
|
2021-01-19 15:53:27 +00:00
|
|
|
if err != nil {
|
|
|
|
return previewData, err
|
|
|
|
}
|
|
|
|
|
|
|
|
previewData.Title = oembedData.Title
|
|
|
|
previewData.Site = oembedData.ProviderName
|
|
|
|
previewData.ThumbnailURL = oembedData.URL
|
2021-02-04 11:20:10 +00:00
|
|
|
previewData.Height = oembedData.Height
|
|
|
|
previewData.Width = oembedData.Width
|
2021-01-19 15:53:27 +00:00
|
|
|
|
|
|
|
return previewData, nil
|
|
|
|
}
|
|
|
|
|
2022-09-09 13:40:18 +00:00
|
|
|
// GetGiphyLongURL Giphy has a shortener service called gph.is, the oembed service doesn't work with shortened urls,
|
2021-02-03 13:11:53 +00:00
|
|
|
// so we need to fetch the long url first
|
|
|
|
func GetGiphyLongURL(shortURL string) (longURL string, err error) {
|
2022-09-09 13:40:18 +00:00
|
|
|
res, err := httpClient.Get(shortURL)
|
2021-02-03 13:11:53 +00:00
|
|
|
if err != nil {
|
|
|
|
return longURL, fmt.Errorf("can't get bytes from Giphy's short url at %s", shortURL)
|
|
|
|
}
|
|
|
|
|
|
|
|
canonicalURL := res.Request.URL.String()
|
2021-02-03 13:25:42 +00:00
|
|
|
if canonicalURL == shortURL {
|
2021-02-03 13:11:53 +00:00
|
|
|
// no redirect, ie. not a valid url
|
|
|
|
return longURL, fmt.Errorf("unable to process Giphy's short url at %s", shortURL)
|
|
|
|
}
|
2021-02-03 13:25:42 +00:00
|
|
|
|
|
|
|
return canonicalURL, err
|
2021-02-03 13:11:53 +00:00
|
|
|
}
|
|
|
|
|
2021-02-03 13:25:42 +00:00
|
|
|
func GetGiphyShortURLPreviewData(shortURL string) (data LinkPreviewData, err error) {
|
2021-02-03 13:11:53 +00:00
|
|
|
longURL, err := GetGiphyLongURL(shortURL)
|
|
|
|
if err != nil {
|
|
|
|
return data, err
|
|
|
|
}
|
2021-02-03 13:25:42 +00:00
|
|
|
|
|
|
|
return GetGiphyPreviewData(longURL)
|
2021-02-03 13:11:53 +00:00
|
|
|
}
|
|
|
|
|
2021-01-20 11:14:24 +00:00
|
|
|
func GetLinkPreviewData(link string) (previewData LinkPreviewData, err error) {
|
2022-08-16 10:44:46 +00:00
|
|
|
u, err := url.Parse(link)
|
2020-10-27 17:35:28 +00:00
|
|
|
if err != nil {
|
2021-01-28 09:27:35 +00:00
|
|
|
return previewData, fmt.Errorf("cant't parse link %s", link)
|
2020-10-27 17:35:28 +00:00
|
|
|
}
|
|
|
|
|
2022-08-16 10:44:46 +00:00
|
|
|
hostname := strings.ToLower(u.Hostname())
|
2021-01-20 12:02:26 +00:00
|
|
|
|
|
|
|
switch hostname {
|
|
|
|
case "youtube.com", "youtu.be", "www.youtube.com":
|
|
|
|
return GetYoutubePreviewData(link)
|
2021-04-12 09:19:34 +00:00
|
|
|
case "github.com", "our.status.im":
|
2021-02-02 07:38:14 +00:00
|
|
|
return GetGenericLinkPreviewData(link)
|
2021-02-04 11:20:10 +00:00
|
|
|
case "giphy.com", "media.giphy.com":
|
2021-01-20 11:14:24 +00:00
|
|
|
return GetGiphyPreviewData(link)
|
2021-02-03 13:58:12 +00:00
|
|
|
case "gph.is":
|
2021-02-03 13:11:53 +00:00
|
|
|
return GetGiphyShortURLPreviewData(link)
|
2021-08-03 21:30:23 +00:00
|
|
|
case "twitter.com":
|
|
|
|
return GetTwitterPreviewData(link)
|
2022-09-08 14:50:17 +00:00
|
|
|
case "media.tenor.com":
|
|
|
|
return FakeGenericImageLinkPreviewData("Tenor", link)
|
2021-01-20 12:02:26 +00:00
|
|
|
default:
|
2022-08-16 10:44:46 +00:00
|
|
|
return previewData, fmt.Errorf("link %s isn't whitelisted. Hostname - %s", link, u.Hostname())
|
2021-01-20 11:14:24 +00:00
|
|
|
}
|
2020-10-27 17:35:28 +00:00
|
|
|
}
|