From 922e78551284a4762336962d7d34ce39ca5546b6 Mon Sep 17 00:00:00 2001 From: Frank Tang Date: Mon, 21 Dec 2020 21:00:40 +0800 Subject: [PATCH] Add GitHub to whitelist for URL unfurling #11426 --- VERSION | 2 +- go.mod | 1 + go.sum | 2 + protocol/urls/urls.go | 40 ++++- .../keighl/metabolize/.editorconfig | 20 +++ .../github.com/keighl/metabolize/.gitignore | 2 + .../github.com/keighl/metabolize/.travis.yml | 12 ++ .../github.com/keighl/metabolize/CHANGELOG.md | 8 + vendor/github.com/keighl/metabolize/LICENSE | 14 ++ vendor/github.com/keighl/metabolize/README.md | 82 ++++++++++ .../keighl/metabolize/metabolize.go | 153 ++++++++++++++++++ vendor/modules.txt | 2 + 12 files changed, 333 insertions(+), 5 deletions(-) create mode 100644 vendor/github.com/keighl/metabolize/.editorconfig create mode 100644 vendor/github.com/keighl/metabolize/.gitignore create mode 100644 vendor/github.com/keighl/metabolize/.travis.yml create mode 100644 vendor/github.com/keighl/metabolize/CHANGELOG.md create mode 100644 vendor/github.com/keighl/metabolize/LICENSE create mode 100644 vendor/github.com/keighl/metabolize/README.md create mode 100644 vendor/github.com/keighl/metabolize/metabolize.go diff --git a/VERSION b/VERSION index 04530c833..4a46fb5b7 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.67.2 +0.68.0 diff --git a/go.mod b/go.mod index c51c8ea85..71f4b097a 100644 --- a/go.mod +++ b/go.mod @@ -28,6 +28,7 @@ require ( github.com/gorilla/mux v1.7.3 // indirect github.com/jinzhu/copier v0.0.0-20190924061706-b57f9002281a github.com/karalabe/usb v0.0.0-20191104083709-911d15fe12a9 // indirect + github.com/keighl/metabolize v0.0.0-20150915210303-97ab655d4034 github.com/kilic/bls12-381 v0.0.0-20200607163746-32e1441c8a9f github.com/leodido/go-urn v1.2.0 // indirect github.com/lib/pq v1.3.0 diff --git a/go.sum b/go.sum index 2ffef3d5a..977924d36 100644 --- a/go.sum +++ b/go.sum @@ -315,6 +315,8 @@ github.com/karalabe/usb v0.0.0-20190819132248-550797b1cad8/go.mod h1:Od972xHfMJo github.com/karalabe/usb v0.0.0-20191104083709-911d15fe12a9 h1:ZHuwnjpP8LsVsUYqTqeVAI+GfDfJ6UNPrExZF+vX/DQ= github.com/karalabe/usb v0.0.0-20191104083709-911d15fe12a9/go.mod h1:Od972xHfMJowv7NGVDiWVxk2zxnWgjLlJzE+F4F7AGU= github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0/go.mod h1:1NbS8ALrpOvjt0rHPNLyCIeMtbizbir8U//inJ+zuB8= +github.com/keighl/metabolize v0.0.0-20150915210303-97ab655d4034 h1:1ijjWJbbN7za3tZ7eXUO5fVcC9ogGYShQh+zM6YiCYE= +github.com/keighl/metabolize v0.0.0-20150915210303-97ab655d4034/go.mod h1:xxAJtNhpzBtSWAYybYGKfMFYx71aqCyNe/8FraO/1ac= github.com/kilic/bls12-381 v0.0.0-20200607163746-32e1441c8a9f h1:qET3Wx0v8tMtoTOQnsJXVvqvCopSf48qobR6tcJuDHo= github.com/kilic/bls12-381 v0.0.0-20200607163746-32e1441c8a9f/go.mod h1:XXfR6YFCRSrkEXbNlIyDsgXVNJWVUV30m/ebkVy9n6s= github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= diff --git a/protocol/urls/urls.go b/protocol/urls/urls.go index 1c713c4ac..41a577bbc 100644 --- a/protocol/urls/urls.go +++ b/protocol/urls/urls.go @@ -7,6 +7,9 @@ import ( "net/http" "net/url" "strings" + "time" + + "github.com/keighl/metabolize" ) type OembedData struct { @@ -16,9 +19,9 @@ type OembedData struct { } type LinkPreviewData struct { - Site string `json:"site"` - Title string `json:"title"` - ThumbnailURL string `json:"thumbnailUrl"` + Site string `json:"site" meta:"og:site_name"` + Title string `json:"title" meta:"og:title"` + ThumbnailURL string `json:"thumbnailUrl" meta:"og:image"` ContentType string `json:"contentType"` } @@ -30,6 +33,10 @@ type Site struct { const YouTubeOembedLink = "https://www.youtube.com/oembed?format=json&url=%s" +var httpClient = http.Client{ + Timeout: 30 * time.Second, +} + func LinkPreviewWhitelist() []Site { return []Site{ Site{ @@ -52,13 +59,18 @@ func LinkPreviewWhitelist() []Site { Address: "giphy.com", ImageSite: true, }, + Site{ + Title: "GitHub", + Address: "github.com", + ImageSite: false, + }, } } func GetURLContent(url string) (data []byte, err error) { // nolint: gosec - response, err := http.Get(url) + response, err := httpClient.Get(url) if err != nil { return data, fmt.Errorf("Can't get content from link %s", url) } @@ -95,6 +107,22 @@ func GetYoutubePreviewData(link string) (previewData LinkPreviewData, err error) return previewData, nil } +func GetGithubPreviewData(link string) (previewData LinkPreviewData, err error) { + // nolint: gosec + res, err := httpClient.Get(link) + + if err != nil { + return previewData, fmt.Errorf("Can't get content from link %s", link) + } + + err = metabolize.Metabolize(res.Body, &previewData) + if err != nil { + return previewData, fmt.Errorf("Can't get meta info from link %s", link) + } + + return previewData, nil +} + func GetLinkPreviewData(link string) (previewData LinkPreviewData, err error) { url, err := url.Parse(link) @@ -109,6 +137,10 @@ func GetLinkPreviewData(link string) (previewData LinkPreviewData, err error) { return GetYoutubePreviewData(link) } } + if "github.com" == hostname { + return GetGithubPreviewData(link) + } + for _, site := range LinkPreviewWhitelist() { if strings.HasSuffix(hostname, site.Address) && site.ImageSite { content, contentErr := GetURLContent(link) diff --git a/vendor/github.com/keighl/metabolize/.editorconfig b/vendor/github.com/keighl/metabolize/.editorconfig new file mode 100644 index 000000000..4cb22db67 --- /dev/null +++ b/vendor/github.com/keighl/metabolize/.editorconfig @@ -0,0 +1,20 @@ +root = true + +; Unix-style newlines with a newline ending every file +[*] +end_of_line = lf +indent_size = 4 +indent_style = space +insert_final_newline = true +trim_trailing_whitespace = true +insert_final_newline = true +charset = utf-8 + +; Golang +[*.go] +indent_style = tab +indent_size = 4 + +; YAML +[*.{yaml,yml}] +indent_size = 2 \ No newline at end of file diff --git a/vendor/github.com/keighl/metabolize/.gitignore b/vendor/github.com/keighl/metabolize/.gitignore new file mode 100644 index 000000000..37a7e92f0 --- /dev/null +++ b/vendor/github.com/keighl/metabolize/.gitignore @@ -0,0 +1,2 @@ +.DS_Store +*.out diff --git a/vendor/github.com/keighl/metabolize/.travis.yml b/vendor/github.com/keighl/metabolize/.travis.yml new file mode 100644 index 000000000..1bb1da897 --- /dev/null +++ b/vendor/github.com/keighl/metabolize/.travis.yml @@ -0,0 +1,12 @@ +sudo: false +language: go +go: +- 1.4 +- tip + +before_install: + - go get github.com/axw/gocov/gocov + - go get github.com/mattn/goveralls + - if ! go get code.google.com/p/go.tools/cmd/cover; then go get golang.org/x/tools/cmd/cover; fi +script: + - $HOME/gopath/bin/goveralls -service=travis-ci \ No newline at end of file diff --git a/vendor/github.com/keighl/metabolize/CHANGELOG.md b/vendor/github.com/keighl/metabolize/CHANGELOG.md new file mode 100644 index 000000000..dffcc143c --- /dev/null +++ b/vendor/github.com/keighl/metabolize/CHANGELOG.md @@ -0,0 +1,8 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +## 0.1 - 2015-09-08 + +* Initial release + diff --git a/vendor/github.com/keighl/metabolize/LICENSE b/vendor/github.com/keighl/metabolize/LICENSE new file mode 100644 index 000000000..8c9bbec56 --- /dev/null +++ b/vendor/github.com/keighl/metabolize/LICENSE @@ -0,0 +1,14 @@ +Copyright (c) 2015 keighl. +All rights reserved. + +Redistribution and use in source and binary forms are permitted +provided that the above copyright notice and this paragraph are +duplicated in all such forms and that any documentation, +advertising materials, and other materials related to such +distribution and use acknowledge that the software was developed +by keighl. The name of the +keighl may not be used to endorse or promote products derived +from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. \ No newline at end of file diff --git a/vendor/github.com/keighl/metabolize/README.md b/vendor/github.com/keighl/metabolize/README.md new file mode 100644 index 000000000..ab58aed29 --- /dev/null +++ b/vendor/github.com/keighl/metabolize/README.md @@ -0,0 +1,82 @@ +# Metabolize + +[![Build Status](https://travis-ci.org/keighl/metabolize.png?branch=master)](https://travis-ci.org/keighl/metabolize) [![Coverage Status](https://coveralls.io/repos/keighl/metabolize/badge.svg)](https://coveralls.io/r/keighl/metabolize) + +Decodes HTML values into a Golang struct. Great for quickly grabbing [open graph](http://ogp.me/) data. + +### Installation + + go get -u github.com/keighl/metabolize + +### Usage + +Use `meta:"xxx"` tags on your struct to tell metabolize how to decode metadata from an HTML document. + +```go +type MetaData struct { + Title string `meta:"og:title"` + // If no `og:description`, will fall back to `description` + Description string `meta:"og:description,description"` +} +``` + +Example + +```go +package main + +import ( + "fmt" + m "github.com/keighl/metabolize" + "net/http" + "net/url" +) + +type MetaData struct { + Title string `meta:"og:title"` + Description string `meta:"og:description,description"` + Type string `meta:"og:type"` + URL url.URL `meta:"og:url"` + VideoWidth int64 `meta:"og:video:width"` + VideoHeight int64 `meta:"og:video:height"` +} + +func main() { + res, _ := http.Get("https://www.youtube.com/watch?v=FzRH3iTQPrk") + + data := new(MetaData) + + err := m.Metabolize(res.Body, data) + if err != nil { + panic(err) + } + + fmt.Printf("Title: %s\n", data.Title) + fmt.Printf("Description: %s\n", data.Description) + fmt.Printf("Type: %s\n", data.Type) + fmt.Printf("URL: %s\n", data.URL.String()) + fmt.Printf("VideoWidth: %d\n", data.VideoWidth) + fmt.Printf("VideoHeight: %d\n", data.VideoHeight) +} +``` + +Outputs: + +``` +Title: The Sneezing Baby Panda +Description: A Baby Panda Sneezing Original footage taken and being used with kind permission of LJM Productions Pty. Ltd.,/Wild Candy Pty. Ltd. Authentic t-shirts http:/... +Type: video +URL: http://www.youtube.com/watch?v=FzRH3iTQPrk +VideoWidth: 480 +VideoHeight: 360 +``` + +### Supported types + +* `string` +* `bool` +* `float64` +* `int64` +* `time.Time` +* `url.URL` + diff --git a/vendor/github.com/keighl/metabolize/metabolize.go b/vendor/github.com/keighl/metabolize/metabolize.go new file mode 100644 index 000000000..6e7186254 --- /dev/null +++ b/vendor/github.com/keighl/metabolize/metabolize.go @@ -0,0 +1,153 @@ +package metabolize + +import ( + "fmt" + "golang.org/x/net/html" + "io" + "net/url" + "reflect" + "strconv" + "strings" + "time" +) + +const ( + TagName = `meta` + htmlRegion = `head` + htmlTag = `meta` +) + +var ( + NotStructError = fmt.Errorf(`Destination is not a struct`) +) + +type MetaData map[string]string + +type Meta struct { + Title string `meta:og:title` + Desc string `meta:og:image` +} + +func Metabolize(doc io.Reader, obj interface{}) error { + data, err := ParseDocument(doc) + if err != nil { + return err + } + return Decode(data, obj) +} + +func Decode(data MetaData, obj interface{}) error { + elem := reflect.ValueOf(obj).Elem() + if elem.Kind() != reflect.Struct { + return NotStructError + } + + for i := 0; i < elem.NumField(); i++ { + field := elem.Type().Field(i) + + fieldValue := elem.FieldByName(field.Name) + if !fieldValue.IsValid() { + continue + } + if !fieldValue.CanSet() { + continue + } + + tag := field.Tag.Get(TagName) + if tag == "" { + continue + } + + tags := strings.Split(tag, ",") + for _, tagItem := range tags { + if data[tagItem] == "" { + continue + } + + if fieldValue.Kind() == reflect.String { + val := string(data[tagItem]) + fieldValue.SetString(val) + } + + if fieldValue.Kind() == reflect.Bool { + val, err := strconv.ParseBool(data[tagItem]) + if err != nil { + continue + } + fieldValue.SetBool(val) + } + + if fieldValue.Kind() == reflect.Float64 { + val, err := strconv.ParseFloat(data[tagItem], 64) + if err != nil { + continue + } + fieldValue.SetFloat(val) + } + + if fieldValue.Kind() == reflect.Int64 { + val, err := strconv.ParseInt(data[tagItem], 0, 64) + if err != nil { + continue + } + fieldValue.SetInt(val) + } + + if field.Type.Name() == "URL" { + val, err := url.Parse(data[tagItem]) + if err != nil { + continue + } + fieldValue.Set(reflect.ValueOf(*val)) + } + + if field.Type.Name() == "Time" { + val, err := time.Parse(time.RFC3339, data[tagItem]) + if err != nil { + continue + } + fieldValue.Set(reflect.ValueOf(val)) + } + } + } + + return nil +} + +func ParseDocument(doc io.Reader) (MetaData, error) { + data := MetaData{} + tokenizer := html.NewTokenizer(doc) + for { + tt := tokenizer.Next() + if tt == html.ErrorToken { + if tokenizer.Err() == io.EOF { + return data, nil + } + return nil, tokenizer.Err() + } + + token := tokenizer.Token() + + if token.Type == html.EndTagToken && token.Data == htmlRegion { + return data, nil + } + + if token.Data == htmlTag { + var property, content string + for _, attr := range token.Attr { + switch attr.Key { + case "property", "name": + property = strings.ToLower(attr.Val) + case "content": + content = attr.Val + } + } + + if property != "" { + data[strings.TrimSpace(property)] = content + } + + } + } + return data, nil +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 8468f1027..479f8589e 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -191,6 +191,8 @@ github.com/jbenet/goprocess/periodic github.com/jinzhu/copier # github.com/karalabe/usb v0.0.0-20191104083709-911d15fe12a9 github.com/karalabe/usb +# github.com/keighl/metabolize v0.0.0-20150915210303-97ab655d4034 +github.com/keighl/metabolize # github.com/kilic/bls12-381 v0.0.0-20200607163746-32e1441c8a9f github.com/kilic/bls12-381 # github.com/koron/go-ssdp v0.0.0-20191105050749-2e1c40ed0b5d