package html import ( "bytes" "fmt" "io" "regexp" "sort" "strconv" "strings" "github.com/gomarkdown/markdown/ast" ) // Flags control optional behavior of HTML renderer. type Flags int // IDTag is the tag used for tag identification, it defaults to "id", some renderers // may with to override this an use "anchor". var IDTag = "id" // HTML renderer configuration options. const ( FlagsNone Flags = 0 SkipHTML Flags = 1 << iota // Skip preformatted HTML blocks SkipImages // Skip embedded images SkipLinks // Skip all links Safelink // Only link to trusted protocols NofollowLinks // Only link with rel="nofollow" NoreferrerLinks // Only link with rel="noreferrer" HrefTargetBlank // Add a blank target CompletePage // Generate a complete HTML page UseXHTML // Generate XHTML output instead of HTML FootnoteReturnLinks // Generate a link at the end of a footnote to return to the source Smartypants // Enable smart punctuation substitutions SmartypantsFractions // Enable smart fractions (with Smartypants) SmartypantsDashes // Enable smart dashes (with Smartypants) SmartypantsLatexDashes // Enable LaTeX-style dashes (with Smartypants) SmartypantsAngledQuotes // Enable angled double quotes (with Smartypants) for double quotes rendering SmartypantsQuotesNBSP // Enable « French guillemets » (with Smartypants) TOC // Generate a table of contents CommonFlags Flags = Smartypants | SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes ) var ( htmlTagRe = regexp.MustCompile("(?i)^" + htmlTag) ) const ( htmlTag = "(?:" + openTag + "|" + closeTag + "|" + htmlComment + "|" + processingInstruction + "|" + declaration + "|" + cdata + ")" closeTag = "" + tagName + "\\s*[>]" openTag = "<" + tagName + attribute + "*" + "\\s*/?>" attribute = "(?:" + "\\s+" + attributeName + attributeValueSpec + "?)" attributeValue = "(?:" + unquotedValue + "|" + singleQuotedValue + "|" + doubleQuotedValue + ")" attributeValueSpec = "(?:" + "\\s*=" + "\\s*" + attributeValue + ")" attributeName = "[a-zA-Z_:][a-zA-Z0-9:._-]*" cdata = "" declaration = "]*>" doubleQuotedValue = "\"[^\"]*\"" htmlComment = "|" processingInstruction = "[<][?].*?[?][>]" singleQuotedValue = "'[^']*'" tagName = "[A-Za-z][A-Za-z0-9-]*" unquotedValue = "[^\"'=<>`\\x00-\\x20]+" ) // RenderNodeFunc allows reusing most of Renderer logic and replacing // rendering of some nodes. If it returns false, Renderer.RenderNode // will execute its logic. If it returns true, Renderer.RenderNode will // skip rendering this node and will return WalkStatus type RenderNodeFunc func(w io.Writer, node ast.Node, entering bool) (ast.WalkStatus, bool) // RendererOptions is a collection of supplementary parameters tweaking // the behavior of various parts of HTML renderer. type RendererOptions struct { // Prepend this text to each relative URL. AbsolutePrefix string // Add this text to each footnote anchor, to ensure uniqueness. FootnoteAnchorPrefix string // Show this text inside the tag for a footnote return link, if the // FootnoteReturnLinks flag is enabled. If blank, the string // [return] is used. FootnoteReturnLinkContents string // If set, add this text to the front of each Heading ID, to ensure // uniqueness. HeadingIDPrefix string // If set, add this text to the back of each Heading ID, to ensure uniqueness. HeadingIDSuffix string Title string // Document title (used if CompletePage is set) CSS string // Optional CSS file URL (used if CompletePage is set) Icon string // Optional icon file URL (used if CompletePage is set) Head []byte // Optional head data injected in the section (used if CompletePage is set) Flags Flags // Flags allow customizing this renderer's behavior // if set, called at the start of RenderNode(). Allows replacing // rendering of some nodes RenderNodeHook RenderNodeFunc // Comments is a list of comments the renderer should detect when // parsing code blocks and detecting callouts. Comments [][]byte } // Renderer implements Renderer interface for HTML output. // // Do not create this directly, instead use the NewRenderer function. type Renderer struct { opts RendererOptions closeTag string // how to end singleton tags: either " />" or ">" // Track heading IDs to prevent ID collision in a single generation. headingIDs map[string]int lastOutputLen int disableTags int sr *SPRenderer documentMatter ast.DocumentMatters // keep track of front/main/back matter. } // NewRenderer creates and configures an Renderer object, which // satisfies the Renderer interface. func NewRenderer(opts RendererOptions) *Renderer { // configure the rendering engine closeTag := ">" if opts.Flags&UseXHTML != 0 { closeTag = " />" } if opts.FootnoteReturnLinkContents == "" { opts.FootnoteReturnLinkContents = `[return]` } return &Renderer{ opts: opts, closeTag: closeTag, headingIDs: make(map[string]int), sr: NewSmartypantsRenderer(opts.Flags), } } func isHTMLTag(tag []byte, tagname string) bool { found, _ := findHTMLTagPos(tag, tagname) return found } // Look for a character, but ignore it when it's in any kind of quotes, it // might be JavaScript func skipUntilCharIgnoreQuotes(html []byte, start int, char byte) int { inSingleQuote := false inDoubleQuote := false inGraveQuote := false i := start for i < len(html) { switch { case html[i] == char && !inSingleQuote && !inDoubleQuote && !inGraveQuote: return i case html[i] == '\'': inSingleQuote = !inSingleQuote case html[i] == '"': inDoubleQuote = !inDoubleQuote case html[i] == '`': inGraveQuote = !inGraveQuote } i++ } return start } func findHTMLTagPos(tag []byte, tagname string) (bool, int) { i := 0 if i < len(tag) && tag[0] != '<' { return false, -1 } i++ i = skipSpace(tag, i) if i < len(tag) && tag[i] == '/' { i++ } i = skipSpace(tag, i) j := 0 for ; i < len(tag); i, j = i+1, j+1 { if j >= len(tagname) { break } if strings.ToLower(string(tag[i]))[0] != tagname[j] { return false, -1 } } if i == len(tag) { return false, -1 } rightAngle := skipUntilCharIgnoreQuotes(tag, i, '>') if rightAngle >= i { return true, rightAngle } return false, -1 } func isRelativeLink(link []byte) (yes bool) { // a tag begin with '#' if link[0] == '#' { return true } // link begin with '/' but not '//', the second maybe a protocol relative link if len(link) >= 2 && link[0] == '/' && link[1] != '/' { return true } // only the root '/' if len(link) == 1 && link[0] == '/' { return true } // current directory : begin with "./" if bytes.HasPrefix(link, []byte("./")) { return true } // parent directory : begin with "../" if bytes.HasPrefix(link, []byte("../")) { return true } return false } func (r *Renderer) ensureUniqueHeadingID(id string) string { for count, found := r.headingIDs[id]; found; count, found = r.headingIDs[id] { tmp := fmt.Sprintf("%s-%d", id, count+1) if _, tmpFound := r.headingIDs[tmp]; !tmpFound { r.headingIDs[id] = count + 1 id = tmp } else { id = id + "-1" } } if _, found := r.headingIDs[id]; !found { r.headingIDs[id] = 0 } return id } func (r *Renderer) addAbsPrefix(link []byte) []byte { if r.opts.AbsolutePrefix != "" && isRelativeLink(link) && link[0] != '.' { newDest := r.opts.AbsolutePrefix if link[0] != '/' { newDest += "/" } newDest += string(link) return []byte(newDest) } return link } func appendLinkAttrs(attrs []string, flags Flags, link []byte) []string { if isRelativeLink(link) { return attrs } var val []string if flags&NofollowLinks != 0 { val = append(val, "nofollow") } if flags&NoreferrerLinks != 0 { val = append(val, "noreferrer") } if flags&HrefTargetBlank != 0 { attrs = append(attrs, `target="_blank"`) } if len(val) == 0 { return attrs } attr := fmt.Sprintf("rel=%q", strings.Join(val, " ")) return append(attrs, attr) } func isMailto(link []byte) bool { return bytes.HasPrefix(link, []byte("mailto:")) } func needSkipLink(flags Flags, dest []byte) bool { if flags&SkipLinks != 0 { return true } return flags&Safelink != 0 && !isSafeLink(dest) && !isMailto(dest) } func isSmartypantable(node ast.Node) bool { switch node.GetParent().(type) { case *ast.Link, *ast.CodeBlock, *ast.Code: return false } return true } func appendLanguageAttr(attrs []string, info []byte) []string { if len(info) == 0 { return attrs } endOfLang := bytes.IndexAny(info, "\t ") if endOfLang < 0 { endOfLang = len(info) } s := `class="language-` + string(info[:endOfLang]) + `"` return append(attrs, s) } func (r *Renderer) outTag(w io.Writer, name string, attrs []string) { s := name if len(attrs) > 0 { s += " " + strings.Join(attrs, " ") } io.WriteString(w, s+">") r.lastOutputLen = 1 } func footnoteRef(prefix string, node *ast.Link) string { urlFrag := prefix + string(slugify(node.Destination)) nStr := strconv.Itoa(node.NoteID) anchor := `` + nStr + `` return `` + anchor + `` } func footnoteItem(prefix string, slug []byte) string { return `
")
if !(isListItem(para.Parent) && ast.GetNextNode(para) == nil) {
r.cr(w)
}
}
func (r *Renderer) paragraph(w io.Writer, para *ast.Paragraph, entering bool) {
if skipParagraphTags(para) {
return
}
if entering {
r.paragraphEnter(w, para)
} else {
r.paragraphExit(w, para)
}
}
func (r *Renderer) image(w io.Writer, node *ast.Image, entering bool) {
if entering {
r.imageEnter(w, node)
} else {
r.imageExit(w, node)
}
}
func (r *Renderer) code(w io.Writer, node *ast.Code) {
r.outs(w, "")
EscapeHTML(w, node.Literal)
r.outs(w, "
")
}
func (r *Renderer) htmlBlock(w io.Writer, node *ast.HTMLBlock) {
if r.opts.Flags&SkipHTML != 0 {
return
}
r.cr(w)
r.out(w, node.Literal)
r.cr(w)
}
func (r *Renderer) headingEnter(w io.Writer, nodeData *ast.Heading) {
var attrs []string
var class string
// TODO(miek): add helper functions for coalescing these classes.
if nodeData.IsTitleblock {
class = "title"
}
if nodeData.IsSpecial {
if class != "" {
class += " special"
} else {
class = "special"
}
}
if class != "" {
attrs = []string{`class="` + class + `"`}
}
if nodeData.HeadingID != "" {
id := r.ensureUniqueHeadingID(nodeData.HeadingID)
if r.opts.HeadingIDPrefix != "" {
id = r.opts.HeadingIDPrefix + id
}
if r.opts.HeadingIDSuffix != "" {
id = id + r.opts.HeadingIDSuffix
}
attrID := `id="` + id + `"`
attrs = append(attrs, attrID)
}
attrs = append(attrs, BlockAttrs(nodeData)...)
r.cr(w)
r.outTag(w, headingOpenTagFromLevel(nodeData.Level), attrs)
}
func (r *Renderer) headingExit(w io.Writer, heading *ast.Heading) {
r.outs(w, headingCloseTagFromLevel(heading.Level))
if !(isListItem(heading.Parent) && ast.GetNextNode(heading) == nil) {
r.cr(w)
}
}
func (r *Renderer) heading(w io.Writer, node *ast.Heading, entering bool) {
if entering {
r.headingEnter(w, node)
} else {
r.headingExit(w, node)
}
}
func (r *Renderer) horizontalRule(w io.Writer, node *ast.HorizontalRule) {
r.cr(w)
r.outHRTag(w, BlockAttrs(node))
r.cr(w)
}
func (r *Renderer) listEnter(w io.Writer, nodeData *ast.List) {
// TODO: attrs don't seem to be set
var attrs []string
if nodeData.IsFootnotesList {
r.outs(w, "\n
")
code := tagWithAttributes("")
r.outs(w, "
")
if !isListItem(codeBlock.Parent) {
r.cr(w)
}
}
func (r *Renderer) caption(w io.Writer, caption *ast.Caption, entering bool) {
if entering {
r.outs(w, "") case *ast.Aside: tag := tagWithAttributes("