Wim 04567c765e
Add support for markdown to HTML conversion (matrix). Closes #663 (#670)
This uses our own gomatrix lib with the SendHTML function which
adds HTML to formatted_body in matrix.
golang-commonmark is used to convert markdown into valid HTML.
2019-01-06 22:25:19 +01:00

265 lines
4.3 KiB
Go

// Copyright 2015 The Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package markdown
import (
"bytes"
"regexp"
"strings"
"unicode"
"gitlab.com/golang-commonmark/html"
"gitlab.com/golang-commonmark/mdurl"
"gitlab.com/golang-commonmark/puny"
)
func runeIsSpace(r rune) bool {
return r == ' ' || r == '\t'
}
func byteIsSpace(b byte) bool {
return b == ' ' || b == '\t'
}
func isLetter(b byte) bool {
return b >= 'a' && b <= 'z' || b >= 'A' && b <= 'Z'
}
func isUppercaseLetter(b byte) bool {
return b >= 'A' && b <= 'Z'
}
func mdpunct(b byte) bool {
return strings.IndexByte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", b) != -1
}
func isMdAsciiPunct(r rune) bool {
if r > 0x7e {
return false
}
return mdpunct(byte(r))
}
func recodeHostnameFor(proto string) bool {
switch proto {
case "http", "https", "mailto":
return true
}
return false
}
func normalizeLink(url string) string {
parsed, err := mdurl.Parse(url)
if err != nil {
return ""
}
if parsed.Host != "" && (parsed.Scheme == "" || recodeHostnameFor(parsed.Scheme)) {
parsed.Host = puny.ToASCII(parsed.Host)
}
parsed.Scheme = parsed.RawScheme
return mdurl.Encode(parsed.String())
}
func normalizeLinkText(url string) string {
parsed, err := mdurl.Parse(url)
if err != nil {
return ""
}
if parsed.Host != "" && (parsed.Scheme == "" || recodeHostnameFor(parsed.Scheme)) {
parsed.Host = puny.ToUnicode(parsed.Host)
}
parsed.Scheme = parsed.RawScheme
return mdurl.Decode(parsed.String())
}
var badProtos = []string{"file", "javascript", "vbscript"}
var rGoodData = regexp.MustCompile(`^data:image/(gif|png|jpeg|webp);`)
func removeSpecial(s string) string {
i := 0
for i < len(s) && !(s[i] <= 0x20 || s[i] == 0x7f) {
i++
}
if i >= len(s) {
return s
}
buf := make([]byte, len(s))
j := 0
for i := 0; i < len(s); i++ {
if !(s[i] <= 0x20 || s[i] == 0x7f) {
buf[j] = s[i]
j++
}
}
return string(buf[:j])
}
func validateLink(url string) bool {
str := strings.TrimSpace(url)
str = strings.ToLower(str)
if strings.IndexByte(str, ':') >= 0 {
proto := strings.SplitN(str, ":", 2)[0]
proto = removeSpecial(proto)
for _, p := range badProtos {
if proto == p {
return false
}
}
if proto == "data" && !rGoodData.MatchString(str) {
return false
}
}
return true
}
func unescapeAll(s string) string {
anyChanges := false
i := 0
for i < len(s)-1 {
b := s[i]
if b == '\\' {
if mdpunct(s[i+1]) {
anyChanges = true
break
}
} else if b == '&' {
if e, n := html.ParseEntity(s[i:]); n > 0 && e != html.BadEntity {
anyChanges = true
break
}
}
i++
}
if !anyChanges {
return s
}
buf := make([]byte, len(s))
copy(buf[:i], s)
j := i
for i < len(s) {
b := s[i]
if b == '\\' {
if i+1 < len(s) {
b = s[i+1]
if mdpunct(b) {
buf[j] = b
j++
} else {
buf[j] = '\\'
j++
buf[j] = b
j++
}
i += 2
continue
}
} else if b == '&' {
if e, n := html.ParseEntity(s[i:]); n > 0 && e != html.BadEntity {
if len(e) > n && len(buf) == len(s) {
newBuf := make([]byte, cap(buf)*2)
copy(newBuf[:j], buf)
buf = newBuf
}
j += copy(buf[j:], e)
i += n
continue
}
}
buf[j] = b
j++
i++
}
return string(buf[:j])
}
func normalizeInlineCode(s string) string {
if s == "" {
return ""
}
byteFuckery := false
i := 0
for i < len(s)-1 {
b := s[i]
if b == '\n' {
byteFuckery = true
break
}
if b == ' ' {
i++
b = s[i]
if b == ' ' || b == '\n' {
i--
byteFuckery = true
break
}
}
i++
}
if !byteFuckery {
return strings.TrimSpace(s)
}
buf := make([]byte, len(s))
copy(buf[:i], s)
buf[i] = ' '
i++
j := i
lastSpace := true
for i < len(s) {
b := s[i]
switch b {
case ' ', '\n':
if lastSpace {
break
}
buf[j] = ' '
lastSpace = true
j++
default:
buf[j] = b
lastSpace = false
j++
}
i++
}
return string(bytes.TrimSpace(buf[:j]))
}
func normalizeReference(s string) string {
var buf bytes.Buffer
lastSpace := false
for _, r := range s {
if unicode.IsSpace(r) {
if !lastSpace {
buf.WriteByte(' ')
lastSpace = true
}
continue
}
buf.WriteRune(unicode.ToLower(r))
lastSpace = false
}
return string(bytes.TrimSpace(buf.Bytes()))
}