147 lines
2.8 KiB
Go
Raw Normal View History

// Copyright 2015 The Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package mdurl
import (
"errors"
"strings"
)
// ErrMissingScheme error is returned by Parse if the passed URL starts with a colon.
var ErrMissingScheme = errors.New("missing protocol scheme")
var slashedProtocol = map[string]bool{
"http": true,
"https": true,
"ftp": true,
"gopher": true,
"file": true,
}
func split(s string, c byte) (string, string, bool) {
i := strings.IndexByte(s, c)
if i < 0 {
return s, "", false
}
return s[:i], s[i+1:], true
}
func findScheme(s string) (int, error) {
if s == "" {
return 0, nil
}
b := s[0]
if b == ':' {
return 0, ErrMissingScheme
}
if !letter(b) {
return 0, nil
}
for i := 1; i < len(s); i++ {
b := s[i]
switch {
case b == ':':
return i, nil
case strings.IndexByte("+-.0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", b) != -1:
// do nothing
default:
return 0, nil
}
}
return 0, nil
}
// Parse parses rawurl into a URL structure.
func Parse(rawurl string) (*URL, error) {
n, err := findScheme(rawurl)
if err != nil {
return nil, err
}
var url URL
rest := rawurl
hostless := false
if n > 0 {
url.RawScheme = rest[:n]
url.Scheme, rest = strings.ToLower(rest[:n]), rest[n+1:]
if url.Scheme == "javascript" {
hostless = true
}
}
if !hostless && strings.HasPrefix(rest, "//") {
url.Slashes, rest = true, rest[2:]
}
if !hostless && (url.Slashes || (url.Scheme != "" && !slashedProtocol[url.Scheme])) {
hostEnd := strings.IndexAny(rest, "#/?")
atSign := -1
i := hostEnd
if i == -1 {
i = len(rest) - 1
}
for i >= 0 {
if rest[i] == '@' {
atSign = i
break
}
i--
}
if atSign != -1 {
url.Auth, rest = rest[:atSign], rest[atSign+1:]
}
hostEnd = strings.IndexAny(rest, "\t\r\n \"#%'/;<>?\\^`{|}")
if hostEnd == -1 {
hostEnd = len(rest)
}
if hostEnd > 0 && hostEnd < len(rest) && rest[hostEnd-1] == ':' {
hostEnd--
}
host := rest[:hostEnd]
if len(host) > 1 {
b := host[hostEnd-1]
if digit(b) {
for i := len(host) - 2; i >= 0; i-- {
b := host[i]
if b == ':' {
url.Host, url.Port = host[:i], host[i+1:]
break
}
if !digit(b) {
break
}
}
} else if b == ':' {
host = host[:hostEnd-1]
hostEnd--
}
}
if url.Port == "" {
url.Host = host
}
rest = rest[hostEnd:]
if ipv6 := len(url.Host) > 2 &&
url.Host[0] == '[' &&
url.Host[len(url.Host)-1] == ']'; ipv6 {
url.Host = url.Host[1 : len(url.Host)-1]
url.IPv6 = true
} else if i := strings.IndexByte(url.Host, ':'); i >= 0 {
url.Host, rest = url.Host[:i], url.Host[i:]+rest
}
}
rest, url.Fragment, url.HasFragment = split(rest, '#')
url.Path, url.RawQuery, url.HasQuery = split(rest, '?')
return &url, nil
}