Add GitHub to whitelist for URL unfurling #11426

This commit is contained in:
Frank Tang 2020-12-21 21:00:40 +08:00 committed by Andrea Maria Piana
parent e1465ca890
commit 922e785512
12 changed files with 333 additions and 5 deletions

View File

@ -1 +1 @@
0.67.2
0.68.0

1
go.mod
View File

@ -28,6 +28,7 @@ require (
github.com/gorilla/mux v1.7.3 // indirect
github.com/jinzhu/copier v0.0.0-20190924061706-b57f9002281a
github.com/karalabe/usb v0.0.0-20191104083709-911d15fe12a9 // indirect
github.com/keighl/metabolize v0.0.0-20150915210303-97ab655d4034
github.com/kilic/bls12-381 v0.0.0-20200607163746-32e1441c8a9f
github.com/leodido/go-urn v1.2.0 // indirect
github.com/lib/pq v1.3.0

2
go.sum
View File

@ -315,6 +315,8 @@ github.com/karalabe/usb v0.0.0-20190819132248-550797b1cad8/go.mod h1:Od972xHfMJo
github.com/karalabe/usb v0.0.0-20191104083709-911d15fe12a9 h1:ZHuwnjpP8LsVsUYqTqeVAI+GfDfJ6UNPrExZF+vX/DQ=
github.com/karalabe/usb v0.0.0-20191104083709-911d15fe12a9/go.mod h1:Od972xHfMJowv7NGVDiWVxk2zxnWgjLlJzE+F4F7AGU=
github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0/go.mod h1:1NbS8ALrpOvjt0rHPNLyCIeMtbizbir8U//inJ+zuB8=
github.com/keighl/metabolize v0.0.0-20150915210303-97ab655d4034 h1:1ijjWJbbN7za3tZ7eXUO5fVcC9ogGYShQh+zM6YiCYE=
github.com/keighl/metabolize v0.0.0-20150915210303-97ab655d4034/go.mod h1:xxAJtNhpzBtSWAYybYGKfMFYx71aqCyNe/8FraO/1ac=
github.com/kilic/bls12-381 v0.0.0-20200607163746-32e1441c8a9f h1:qET3Wx0v8tMtoTOQnsJXVvqvCopSf48qobR6tcJuDHo=
github.com/kilic/bls12-381 v0.0.0-20200607163746-32e1441c8a9f/go.mod h1:XXfR6YFCRSrkEXbNlIyDsgXVNJWVUV30m/ebkVy9n6s=
github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q=

View File

@ -7,6 +7,9 @@ import (
"net/http"
"net/url"
"strings"
"time"
"github.com/keighl/metabolize"
)
type OembedData struct {
@ -16,9 +19,9 @@ type OembedData struct {
}
type LinkPreviewData struct {
Site string `json:"site"`
Title string `json:"title"`
ThumbnailURL string `json:"thumbnailUrl"`
Site string `json:"site" meta:"og:site_name"`
Title string `json:"title" meta:"og:title"`
ThumbnailURL string `json:"thumbnailUrl" meta:"og:image"`
ContentType string `json:"contentType"`
}
@ -30,6 +33,10 @@ type Site struct {
const YouTubeOembedLink = "https://www.youtube.com/oembed?format=json&url=%s"
var httpClient = http.Client{
Timeout: 30 * time.Second,
}
func LinkPreviewWhitelist() []Site {
return []Site{
Site{
@ -52,13 +59,18 @@ func LinkPreviewWhitelist() []Site {
Address: "giphy.com",
ImageSite: true,
},
Site{
Title: "GitHub",
Address: "github.com",
ImageSite: false,
},
}
}
func GetURLContent(url string) (data []byte, err error) {
// nolint: gosec
response, err := http.Get(url)
response, err := httpClient.Get(url)
if err != nil {
return data, fmt.Errorf("Can't get content from link %s", url)
}
@ -95,6 +107,22 @@ func GetYoutubePreviewData(link string) (previewData LinkPreviewData, err error)
return previewData, nil
}
func GetGithubPreviewData(link string) (previewData LinkPreviewData, err error) {
// nolint: gosec
res, err := httpClient.Get(link)
if err != nil {
return previewData, fmt.Errorf("Can't get content from link %s", link)
}
err = metabolize.Metabolize(res.Body, &previewData)
if err != nil {
return previewData, fmt.Errorf("Can't get meta info from link %s", link)
}
return previewData, nil
}
func GetLinkPreviewData(link string) (previewData LinkPreviewData, err error) {
url, err := url.Parse(link)
@ -109,6 +137,10 @@ func GetLinkPreviewData(link string) (previewData LinkPreviewData, err error) {
return GetYoutubePreviewData(link)
}
}
if "github.com" == hostname {
return GetGithubPreviewData(link)
}
for _, site := range LinkPreviewWhitelist() {
if strings.HasSuffix(hostname, site.Address) && site.ImageSite {
content, contentErr := GetURLContent(link)

20
vendor/github.com/keighl/metabolize/.editorconfig generated vendored Normal file
View File

@ -0,0 +1,20 @@
root = true
; Unix-style newlines with a newline ending every file
[*]
end_of_line = lf
indent_size = 4
indent_style = space
insert_final_newline = true
trim_trailing_whitespace = true
insert_final_newline = true
charset = utf-8
; Golang
[*.go]
indent_style = tab
indent_size = 4
; YAML
[*.{yaml,yml}]
indent_size = 2

2
vendor/github.com/keighl/metabolize/.gitignore generated vendored Normal file
View File

@ -0,0 +1,2 @@
.DS_Store
*.out

12
vendor/github.com/keighl/metabolize/.travis.yml generated vendored Normal file
View File

@ -0,0 +1,12 @@
sudo: false
language: go
go:
- 1.4
- tip
before_install:
- go get github.com/axw/gocov/gocov
- go get github.com/mattn/goveralls
- if ! go get code.google.com/p/go.tools/cmd/cover; then go get golang.org/x/tools/cmd/cover; fi
script:
- $HOME/gopath/bin/goveralls -service=travis-ci

8
vendor/github.com/keighl/metabolize/CHANGELOG.md generated vendored Normal file
View File

@ -0,0 +1,8 @@
# Changelog
All notable changes to this project will be documented in this file.
## 0.1 - 2015-09-08
* Initial release

14
vendor/github.com/keighl/metabolize/LICENSE generated vendored Normal file
View File

@ -0,0 +1,14 @@
Copyright (c) 2015 keighl.
All rights reserved.
Redistribution and use in source and binary forms are permitted
provided that the above copyright notice and this paragraph are
duplicated in all such forms and that any documentation,
advertising materials, and other materials related to such
distribution and use acknowledge that the software was developed
by keighl. The name of the
keighl may not be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.

82
vendor/github.com/keighl/metabolize/README.md generated vendored Normal file
View File

@ -0,0 +1,82 @@
# Metabolize
[![Build Status](https://travis-ci.org/keighl/metabolize.png?branch=master)](https://travis-ci.org/keighl/metabolize) [![Coverage Status](https://coveralls.io/repos/keighl/metabolize/badge.svg)](https://coveralls.io/r/keighl/metabolize)
Decodes HTML <meta> values into a Golang struct. Great for quickly grabbing [open graph](http://ogp.me/) data.
### Installation
go get -u github.com/keighl/metabolize
### Usage
Use `meta:"xxx"` tags on your struct to tell metabolize how to decode metadata from an HTML document.
```go
type MetaData struct {
Title string `meta:"og:title"`
// If no `og:description`, will fall back to `description`
Description string `meta:"og:description,description"`
}
```
Example
```go
package main
import (
"fmt"
m "github.com/keighl/metabolize"
"net/http"
"net/url"
)
type MetaData struct {
Title string `meta:"og:title"`
Description string `meta:"og:description,description"`
Type string `meta:"og:type"`
URL url.URL `meta:"og:url"`
VideoWidth int64 `meta:"og:video:width"`
VideoHeight int64 `meta:"og:video:height"`
}
func main() {
res, _ := http.Get("https://www.youtube.com/watch?v=FzRH3iTQPrk")
data := new(MetaData)
err := m.Metabolize(res.Body, data)
if err != nil {
panic(err)
}
fmt.Printf("Title: %s\n", data.Title)
fmt.Printf("Description: %s\n", data.Description)
fmt.Printf("Type: %s\n", data.Type)
fmt.Printf("URL: %s\n", data.URL.String())
fmt.Printf("VideoWidth: %d\n", data.VideoWidth)
fmt.Printf("VideoHeight: %d\n", data.VideoHeight)
}
```
Outputs:
```
Title: The Sneezing Baby Panda
Description: A Baby Panda Sneezing Original footage taken and being used with kind permission of LJM Productions Pty. Ltd.,/Wild Candy Pty. Ltd. Authentic t-shirts http:/...
Type: video
URL: http://www.youtube.com/watch?v=FzRH3iTQPrk
VideoWidth: 480
VideoHeight: 360
```
### Supported types
* `string`
* `bool`
* `float64`
* `int64`
* `time.Time`
* `url.URL`

153
vendor/github.com/keighl/metabolize/metabolize.go generated vendored Normal file
View File

@ -0,0 +1,153 @@
package metabolize
import (
"fmt"
"golang.org/x/net/html"
"io"
"net/url"
"reflect"
"strconv"
"strings"
"time"
)
const (
TagName = `meta`
htmlRegion = `head`
htmlTag = `meta`
)
var (
NotStructError = fmt.Errorf(`Destination is not a struct`)
)
type MetaData map[string]string
type Meta struct {
Title string `meta:og:title`
Desc string `meta:og:image`
}
func Metabolize(doc io.Reader, obj interface{}) error {
data, err := ParseDocument(doc)
if err != nil {
return err
}
return Decode(data, obj)
}
func Decode(data MetaData, obj interface{}) error {
elem := reflect.ValueOf(obj).Elem()
if elem.Kind() != reflect.Struct {
return NotStructError
}
for i := 0; i < elem.NumField(); i++ {
field := elem.Type().Field(i)
fieldValue := elem.FieldByName(field.Name)
if !fieldValue.IsValid() {
continue
}
if !fieldValue.CanSet() {
continue
}
tag := field.Tag.Get(TagName)
if tag == "" {
continue
}
tags := strings.Split(tag, ",")
for _, tagItem := range tags {
if data[tagItem] == "" {
continue
}
if fieldValue.Kind() == reflect.String {
val := string(data[tagItem])
fieldValue.SetString(val)
}
if fieldValue.Kind() == reflect.Bool {
val, err := strconv.ParseBool(data[tagItem])
if err != nil {
continue
}
fieldValue.SetBool(val)
}
if fieldValue.Kind() == reflect.Float64 {
val, err := strconv.ParseFloat(data[tagItem], 64)
if err != nil {
continue
}
fieldValue.SetFloat(val)
}
if fieldValue.Kind() == reflect.Int64 {
val, err := strconv.ParseInt(data[tagItem], 0, 64)
if err != nil {
continue
}
fieldValue.SetInt(val)
}
if field.Type.Name() == "URL" {
val, err := url.Parse(data[tagItem])
if err != nil {
continue
}
fieldValue.Set(reflect.ValueOf(*val))
}
if field.Type.Name() == "Time" {
val, err := time.Parse(time.RFC3339, data[tagItem])
if err != nil {
continue
}
fieldValue.Set(reflect.ValueOf(val))
}
}
}
return nil
}
func ParseDocument(doc io.Reader) (MetaData, error) {
data := MetaData{}
tokenizer := html.NewTokenizer(doc)
for {
tt := tokenizer.Next()
if tt == html.ErrorToken {
if tokenizer.Err() == io.EOF {
return data, nil
}
return nil, tokenizer.Err()
}
token := tokenizer.Token()
if token.Type == html.EndTagToken && token.Data == htmlRegion {
return data, nil
}
if token.Data == htmlTag {
var property, content string
for _, attr := range token.Attr {
switch attr.Key {
case "property", "name":
property = strings.ToLower(attr.Val)
case "content":
content = attr.Val
}
}
if property != "" {
data[strings.TrimSpace(property)] = content
}
}
}
return data, nil
}

2
vendor/modules.txt vendored
View File

@ -191,6 +191,8 @@ github.com/jbenet/goprocess/periodic
github.com/jinzhu/copier
# github.com/karalabe/usb v0.0.0-20191104083709-911d15fe12a9
github.com/karalabe/usb
# github.com/keighl/metabolize v0.0.0-20150915210303-97ab655d4034
github.com/keighl/metabolize
# github.com/kilic/bls12-381 v0.0.0-20200607163746-32e1441c8a9f
github.com/kilic/bls12-381
# github.com/koron/go-ssdp v0.0.0-20191105050749-2e1c40ed0b5d