// Copyright © 2011 Russ Ross . // Distributed under the Simplified BSD License. // HTMLRenderer converts AST of parsed markdown document into HTML text package markdown import ( "bytes" "fmt" "io" "regexp" "strings" ) // HTMLFlags control optional behavior of HTML renderer. type HTMLFlags int // HTML renderer configuration options. const ( HTMLFlagsNone HTMLFlags = 0 SkipHTML HTMLFlags = 1 << iota // Skip preformatted HTML blocks SkipImages // Skip embedded images SkipLinks // Skip all links Safelink // Only link to trusted protocols NofollowLinks // Only link with rel="nofollow" NoreferrerLinks // Only link with rel="noreferrer" HrefTargetBlank // Add a blank target CompletePage // Generate a complete HTML page UseXHTML // Generate XHTML output instead of HTML FootnoteReturnLinks // Generate a link at the end of a footnote to return to the source Smartypants // Enable smart punctuation substitutions SmartypantsFractions // Enable smart fractions (with Smartypants) SmartypantsDashes // Enable smart dashes (with Smartypants) SmartypantsLatexDashes // Enable LaTeX-style dashes (with Smartypants) SmartypantsAngledQuotes // Enable angled double quotes (with Smartypants) for double quotes rendering SmartypantsQuotesNBSP // Enable « French guillemets » (with Smartypants) TOC // Generate a table of contents CommonHTMLFlags HTMLFlags = Smartypants | SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes ) var ( htmlTagRe = regexp.MustCompile("(?i)^" + htmlTag) ) const ( htmlTag = "(?:" + openTag + "|" + closeTag + "|" + htmlComment + "|" + processingInstruction + "|" + declaration + "|" + cdata + ")" closeTag = "]" openTag = "<" + tagName + attribute + "*" + "\\s*/?>" attribute = "(?:" + "\\s+" + attributeName + attributeValueSpec + "?)" attributeValue = "(?:" + unquotedValue + "|" + singleQuotedValue + "|" + doubleQuotedValue + ")" attributeValueSpec = "(?:" + "\\s*=" + "\\s*" + attributeValue + ")" attributeName = "[a-zA-Z_:][a-zA-Z0-9:._-]*" cdata = "" declaration = "]*>" doubleQuotedValue = "\"[^\"]*\"" htmlComment = "|" processingInstruction = "[<][?].*?[?][>]" singleQuotedValue = "'[^']*'" tagName = "[A-Za-z][A-Za-z0-9-]*" unquotedValue = "[^\"'=<>`\\x00-\\x20]+" ) // HTMLRendererParameters is a collection of supplementary parameters tweaking // the behavior of various parts of HTML renderer. type HTMLRendererParameters struct { // Prepend this text to each relative URL. AbsolutePrefix string // Add this text to each footnote anchor, to ensure uniqueness. FootnoteAnchorPrefix string // Show this text inside the tag for a footnote return link, if the // HTML_FOOTNOTE_RETURN_LINKS flag is enabled. If blank, the string // [return] is used. FootnoteReturnLinkContents string // If set, add this text to the front of each Heading ID, to ensure // uniqueness. HeadingIDPrefix string // If set, add this text to the back of each Heading ID, to ensure uniqueness. HeadingIDSuffix string Title string // Document title (used if CompletePage is set) CSS string // Optional CSS file URL (used if CompletePage is set) Icon string // Optional icon file URL (used if CompletePage is set) Flags HTMLFlags // Flags allow customizing this renderer's behavior } // HTMLRenderer implements Renderer interface for HTML output. // // Do not create this directly, instead use the NewHTMLRenderer function. type HTMLRenderer struct { params HTMLRendererParameters closeTag string // how to end singleton tags: either " />" or ">" // Track heading IDs to prevent ID collision in a single generation. headingIDs map[string]int lastOutputLen int disableTags int sr *SPRenderer } const ( xhtmlClose = " />" htmlClose = ">" ) // NewHTMLRenderer creates and configures an HTMLRenderer object, which // satisfies the Renderer interface. func NewHTMLRenderer(params HTMLRendererParameters) *HTMLRenderer { // configure the rendering engine closeTag := htmlClose if params.Flags&UseXHTML != 0 { closeTag = xhtmlClose } if params.FootnoteReturnLinkContents == "" { params.FootnoteReturnLinkContents = `[return]` } return &HTMLRenderer{ params: params, closeTag: closeTag, headingIDs: make(map[string]int), sr: NewSmartypantsRenderer(params.Flags), } } func isHTMLTag(tag []byte, tagname string) bool { found, _ := findHTMLTagPos(tag, tagname) return found } // Look for a character, but ignore it when it's in any kind of quotes, it // might be JavaScript func skipUntilCharIgnoreQuotes(html []byte, start int, char byte) int { inSingleQuote := false inDoubleQuote := false inGraveQuote := false i := start for i < len(html) { switch { case html[i] == char && !inSingleQuote && !inDoubleQuote && !inGraveQuote: return i case html[i] == '\'': inSingleQuote = !inSingleQuote case html[i] == '"': inDoubleQuote = !inDoubleQuote case html[i] == '`': inGraveQuote = !inGraveQuote } i++ } return start } func findHTMLTagPos(tag []byte, tagname string) (bool, int) { i := 0 if i < len(tag) && tag[0] != '<' { return false, -1 } i++ i = skipSpace(tag, i) if i < len(tag) && tag[i] == '/' { i++ } i = skipSpace(tag, i) j := 0 for ; i < len(tag); i, j = i+1, j+1 { if j >= len(tagname) { break } if strings.ToLower(string(tag[i]))[0] != tagname[j] { return false, -1 } } if i == len(tag) { return false, -1 } rightAngle := skipUntilCharIgnoreQuotes(tag, i, '>') if rightAngle >= i { return true, rightAngle } return false, -1 } func isRelativeLink(link []byte) (yes bool) { // a tag begin with '#' if link[0] == '#' { return true } // link begin with '/' but not '//', the second maybe a protocol relative link if len(link) >= 2 && link[0] == '/' && link[1] != '/' { return true } // only the root '/' if len(link) == 1 && link[0] == '/' { return true } // current directory : begin with "./" if bytes.HasPrefix(link, []byte("./")) { return true } // parent directory : begin with "../" if bytes.HasPrefix(link, []byte("../")) { return true } return false } func (r *HTMLRenderer) ensureUniqueHeadingID(id string) string { for count, found := r.headingIDs[id]; found; count, found = r.headingIDs[id] { tmp := fmt.Sprintf("%s-%d", id, count+1) if _, tmpFound := r.headingIDs[tmp]; !tmpFound { r.headingIDs[id] = count + 1 id = tmp } else { id = id + "-1" } } if _, found := r.headingIDs[id]; !found { r.headingIDs[id] = 0 } return id } func (r *HTMLRenderer) addAbsPrefix(link []byte) []byte { if r.params.AbsolutePrefix != "" && isRelativeLink(link) && link[0] != '.' { newDest := r.params.AbsolutePrefix if link[0] != '/' { newDest += "/" } newDest += string(link) return []byte(newDest) } return link } func appendLinkAttrs(attrs []string, flags HTMLFlags, link []byte) []string { if isRelativeLink(link) { return attrs } val := []string{} if flags&NofollowLinks != 0 { val = append(val, "nofollow") } if flags&NoreferrerLinks != 0 { val = append(val, "noreferrer") } if flags&HrefTargetBlank != 0 { attrs = append(attrs, "target=\"_blank\"") } if len(val) == 0 { return attrs } attr := fmt.Sprintf("rel=%q", strings.Join(val, " ")) return append(attrs, attr) } func isMailto(link []byte) bool { return bytes.HasPrefix(link, []byte("mailto:")) } func needSkipLink(flags HTMLFlags, dest []byte) bool { if flags&SkipLinks != 0 { return true } return flags&Safelink != 0 && !isSafeLink(dest) && !isMailto(dest) } func isSmartypantable(node *Node) bool { switch node.Parent.Data.(type) { case *LinkData, *CodeBlockData, *CodeData: return false } return true } func appendLanguageAttr(attrs []string, info []byte) []string { if len(info) == 0 { return attrs } endOfLang := bytes.IndexAny(info, "\t ") if endOfLang < 0 { endOfLang = len(info) } return append(attrs, fmt.Sprintf("class=\"language-%s\"", info[:endOfLang])) } func (r *HTMLRenderer) tag(w io.Writer, name string, attrs []string) { io.WriteString(w, name) if len(attrs) > 0 { w.Write(spaceBytes) io.WriteString(w, strings.Join(attrs, " ")) } w.Write(gtBytes) r.lastOutputLen = 1 } func footnoteRef(prefix string, node *LinkData) string { urlFrag := prefix + string(slugify(node.Destination)) anchor := fmt.Sprintf(`%d`, urlFrag, node.NoteID) return fmt.Sprintf(`%s`, urlFrag, anchor) } func footnoteItem(prefix string, slug []byte) string { return fmt.Sprintf(`
  • `, prefix, slug) } func footnoteReturnLink(prefix, returnLink string, slug []byte) string { const format = ` %s` return fmt.Sprintf(format, prefix, slug, returnLink) } func itemOpenCR(node *Node) bool { if node.Prev == nil { return false } ld := node.Parent.Data.(*ListData) return !ld.Tight && ld.ListFlags&ListTypeDefinition == 0 } func skipParagraphTags(node *Node) bool { parent := node.Parent grandparent := parent.Parent if grandparent == nil || !isListData(grandparent.Data) { return false } isParentTerm := isListItemTerm(parent) grandparentListData := grandparent.Data.(*ListData) tightOrTerm := grandparentListData.Tight || isParentTerm return tightOrTerm } func cellAlignment(align CellAlignFlags) string { switch align { case TableAlignmentLeft: return "left" case TableAlignmentRight: return "right" case TableAlignmentCenter: return "center" default: return "" } } func (r *HTMLRenderer) out(w io.Writer, d []byte) { r.lastOutputLen = len(d) if r.disableTags > 0 { d = htmlTagRe.ReplaceAll(d, []byte{}) } w.Write(d) } func (r *HTMLRenderer) outs(w io.Writer, s string) { r.lastOutputLen = len(s) if r.disableTags > 0 { s = htmlTagRe.ReplaceAllString(s, "") } io.WriteString(w, s) } func (r *HTMLRenderer) cr(w io.Writer) { if r.lastOutputLen > 0 { r.out(w, nlBytes) } } var ( nlBytes = []byte{'\n'} gtBytes = []byte{'>'} spaceBytes = []byte{' '} ) func headingOpenTagFromLevel(level int) string { switch level { case 1: return "" case 2: return "" case 3: return "" case 4: return "" case 5: return "" default: return "" } } func (r *HTMLRenderer) outHRTag(w io.Writer) { if r.params.Flags&UseXHTML == 0 { r.out(w, []byte("
    ")) } else { r.out(w, []byte("
    ")) } } func (r *HTMLRenderer) text(w io.Writer, node *Node, nodeData *TextData) { if r.params.Flags&Smartypants != 0 { var tmp bytes.Buffer escapeHTML(&tmp, node.Literal) r.sr.Process(w, tmp.Bytes()) } else { if isLinkData(node.Parent.Data) { escLink(w, node.Literal) } else { escapeHTML(w, node.Literal) } } } func (r *HTMLRenderer) hardBreak(w io.Writer, node *Node, nodeData *HardbreakData) { s := "
    " if r.params.Flags&UseXHTML != 0 { s = "
    " } r.outs(w, s) r.cr(w) } func (r *HTMLRenderer) openOrCloseTag(w io.Writer, isOpen bool, openTag string, closeTag string) { if isOpen { r.outs(w, openTag) } else { r.outs(w, closeTag) } } func (r *HTMLRenderer) crOpenOrCloseTag(w io.Writer, isOpen bool, openTag string, closeTag string) { if isOpen { r.cr(w) r.outs(w, openTag) } else { r.outs(w, closeTag) r.cr(w) } } func (r *HTMLRenderer) span(w io.Writer, node *Node, nodeData *HTMLSpanData) { if r.params.Flags&SkipHTML != 0 { return } r.out(w, node.Literal) } func (r *HTMLRenderer) link(w io.Writer, node *Node, nodeData *LinkData, entering bool) { var attrs []string // mark it but don't link it if it is not a safe link: no smartypants dest := nodeData.Destination if needSkipLink(r.params.Flags, dest) { r.openOrCloseTag(w, entering, "", "") return } if !entering { if nodeData.NoteID == 0 { r.out(w, []byte("")) } return } // entering dest = r.addAbsPrefix(dest) var hrefBuf bytes.Buffer hrefBuf.WriteString("href=\"") escLink(&hrefBuf, dest) hrefBuf.WriteByte('"') attrs = append(attrs, hrefBuf.String()) if nodeData.NoteID != 0 { r.outs(w, footnoteRef(r.params.FootnoteAnchorPrefix, nodeData)) return } attrs = appendLinkAttrs(attrs, r.params.Flags, dest) if len(nodeData.Title) > 0 { var titleBuff bytes.Buffer titleBuff.WriteString("title=\"") escapeHTML(&titleBuff, nodeData.Title) titleBuff.WriteByte('"') attrs = append(attrs, titleBuff.String()) } r.tag(w, "`)) } } func (r *HTMLRenderer) paragraphEnter(w io.Writer, node *Node, nodeData *ParagraphData) { // TODO: untangle this clusterfuck about when the newlines need // to be added and when not. if node.Prev != nil { switch node.Prev.Data.(type) { case *HTMLBlockData, *ListData, *ParagraphData, *HeadingData, *CodeBlockData, *BlockQuoteData, *HorizontalRuleData: r.cr(w) } } if isBlockQuoteData(node.Parent.Data) && node.Prev == nil { r.cr(w) } r.out(w, []byte("

    ")) } func (r *HTMLRenderer) paragraphExit(w io.Writer, node *Node, nodeData *ParagraphData) { r.outs(w, "

    ") if !(isListItemData(node.Parent.Data) && node.Next == nil) { r.cr(w) } } func (r *HTMLRenderer) paragraph(w io.Writer, node *Node, nodeData *ParagraphData, entering bool) { if skipParagraphTags(node) { return } if entering { r.paragraphEnter(w, node, nodeData) } else { r.paragraphExit(w, node, nodeData) } } func (r *HTMLRenderer) image(w io.Writer, node *Node, nodeData *ImageData, entering bool) { if entering { r.imageEnter(w, node, nodeData) } else { r.imageExit(w, node, nodeData) } } func (r *HTMLRenderer) code(w io.Writer, node *Node, nodeData *CodeData) { r.outs(w, "") escapeHTML(w, node.Literal) r.outs(w, "") } func (r *HTMLRenderer) htmlBlock(w io.Writer, node *Node, nodeData *HTMLBlockData) { if r.params.Flags&SkipHTML != 0 { return } r.cr(w) r.out(w, node.Literal) r.cr(w) } func (r *HTMLRenderer) heading(w io.Writer, node *Node, nodeData *HeadingData, entering bool) { if !entering { closeTag := headingCloseTagFromLevel(nodeData.Level) r.outs(w, closeTag) if !(isListItemData(node.Parent.Data) && node.Next == nil) { r.cr(w) } return } // entering var attrs []string if nodeData.IsTitleblock { attrs = append(attrs, `class="title"`) } if nodeData.HeadingID != "" { id := r.ensureUniqueHeadingID(nodeData.HeadingID) if r.params.HeadingIDPrefix != "" { id = r.params.HeadingIDPrefix + id } if r.params.HeadingIDSuffix != "" { id = id + r.params.HeadingIDSuffix } attrID := `id="` + id + `"` attrs = append(attrs, attrID) } r.cr(w) openTag := headingOpenTagFromLevel(nodeData.Level) r.tag(w, openTag, attrs) } func (r *HTMLRenderer) horizontalRule(w io.Writer) { r.cr(w) r.outHRTag(w) r.cr(w) } func (r *HTMLRenderer) listEnter(w io.Writer, node *Node, nodeData *ListData) { // TODO: attrs don't seem to be set var attrs []string openTag := "\n\n") r.outHRTag(w) r.cr(w) } r.cr(w) if isListItemData(node.Parent.Data) { grand := node.Parent.Parent if isListTight(grand.Data) { r.cr(w) } } r.tag(w, openTag, attrs) r.cr(w) } func (r *HTMLRenderer) listExit(w io.Writer, node *Node, nodeData *ListData) { closeTag := "" if nodeData.ListFlags&ListTypeOrdered != 0 { closeTag = "" } if nodeData.ListFlags&ListTypeDefinition != 0 { closeTag = "" } r.outs(w, closeTag) //cr(w) //if node.parent.Type != Item { // cr(w) //} if isListItemData(node.Parent.Data) && node.Next != nil { r.cr(w) } if isDocumentData(node.Parent.Data) || isBlockQuoteData(node.Parent.Data) { r.cr(w) } if nodeData.IsFootnotesList { r.outs(w, "\n\n") } } func (r *HTMLRenderer) list(w io.Writer, node *Node, nodeData *ListData, entering bool) { if entering { r.listEnter(w, node, nodeData) } else { r.listExit(w, node, nodeData) } } func (r *HTMLRenderer) listItem(w io.Writer, node *Node, nodeData *ListItemData, entering bool) { if entering { openTag := "
  • " if nodeData.ListFlags&ListTypeDefinition != 0 { openTag = "
    " } if nodeData.ListFlags&ListTypeTerm != 0 { openTag = "
    " } if itemOpenCR(node) { r.cr(w) } if nodeData.RefLink != nil { slug := slugify(nodeData.RefLink) r.outs(w, footnoteItem(r.params.FootnoteAnchorPrefix, slug)) return } r.outs(w, openTag) } else { closeTag := "
  • " if nodeData.ListFlags&ListTypeDefinition != 0 { closeTag = "" } if nodeData.ListFlags&ListTypeTerm != 0 { closeTag = "" } if nodeData.RefLink != nil { slug := slugify(nodeData.RefLink) if r.params.Flags&FootnoteReturnLinks != 0 { r.outs(w, footnoteReturnLink(r.params.FootnoteAnchorPrefix, r.params.FootnoteReturnLinkContents, slug)) } } r.outs(w, closeTag) r.cr(w) } } func (r *HTMLRenderer) codeBlock(w io.Writer, node *Node, nodeData *CodeBlockData) { var attrs []string attrs = appendLanguageAttr(attrs, nodeData.Info) r.cr(w) r.outs(w, "
    ")
    	r.tag(w, "")
    	r.outs(w, "
    ") if !isListItemData(node.Parent.Data) { r.cr(w) } } func (r *HTMLRenderer) tableCell(w io.Writer, node *Node, nodeData *TableCellData, entering bool) { if !entering { closeTag := "" if nodeData.IsHeader { closeTag = "" } r.outs(w, closeTag) r.cr(w) return } // entering var attrs []string openTag := "") // XXX: this is to adhere to a rather silly test. Should fix test. if node.FirstChild == nil { r.cr(w) } } else { r.outs(w, "") r.cr(w) } } // RenderNode is a default renderer of a single node of a syntax tree. For // block nodes it will be called twice: first time with entering=true, second // time with entering=false, so that it could know when it's working on an open // tag and when on close. It writes the result to w. // // The return value is a way to tell the calling walker to adjust its walk // pattern: e.g. it can terminate the traversal by returning Terminate. Or it // can ask the walker to skip a subtree of this node by returning SkipChildren. // The typical behavior is to return GoToNext, which asks for the usual // traversal to the next node. func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkStatus { switch nodeData := node.Data.(type) { case *TextData: r.text(w, node, nodeData) case *SoftbreakData: r.cr(w) // TODO: make it configurable via out(renderer.softbreak) case *HardbreakData: r.hardBreak(w, node, nodeData) case *EmphData: r.openOrCloseTag(w, entering, "", "") case *StrongData: r.openOrCloseTag(w, entering, "", "") case *DelData: r.openOrCloseTag(w, entering, "", "") case *HTMLSpanData: r.span(w, node, nodeData) case *LinkData: r.link(w, node, nodeData, entering) case *ImageData: if r.params.Flags&SkipImages != 0 { return SkipChildren } r.image(w, node, nodeData, entering) case *CodeData: r.code(w, node, nodeData) case *DocumentData: // do nothing case *ParagraphData: r.paragraph(w, node, nodeData, entering) case *BlockQuoteData: r.crOpenOrCloseTag(w, entering, "
    ", "
    ") case *HTMLBlockData: r.htmlBlock(w, node, nodeData) case *HeadingData: r.heading(w, node, nodeData, entering) case *HorizontalRuleData: r.horizontalRule(w) case *ListData: r.list(w, node, nodeData, entering) case *ListItemData: r.listItem(w, node, nodeData, entering) case *CodeBlockData: r.codeBlock(w, node, nodeData) case *TableData: r.crOpenOrCloseTag(w, entering, "", "
    ") case *TableCellData: r.tableCell(w, node, nodeData, entering) case *TableHeadData: r.crOpenOrCloseTag(w, entering, "", "") case *TableBodyData: r.tableBody(w, node, nodeData, entering) case *TableRowData: r.crOpenOrCloseTag(w, entering, "", "") default: //panic("Unknown node type " + node.Type.String()) panic(fmt.Sprintf("Unknown node type %T", node.Data)) } return GoToNext } // RenderHeader writes HTML document preamble and TOC if requested. func (r *HTMLRenderer) RenderHeader(w io.Writer, ast *Node) { r.writeDocumentHeader(w) if r.params.Flags&TOC != 0 { r.writeTOC(w, ast) } } // RenderFooter writes HTML document footer. func (r *HTMLRenderer) RenderFooter(w io.Writer, ast *Node) { if r.params.Flags&CompletePage == 0 { return } io.WriteString(w, "\n\n\n") } func (r *HTMLRenderer) writeDocumentHeader(w io.Writer) { if r.params.Flags&CompletePage == 0 { return } ending := "" if r.params.Flags&UseXHTML != 0 { io.WriteString(w, "\n") io.WriteString(w, "\n") ending = " /" } else { io.WriteString(w, "\n") io.WriteString(w, "\n") } io.WriteString(w, "\n") io.WriteString(w, " ") if r.params.Flags&Smartypants != 0 { r.sr.Process(w, []byte(r.params.Title)) } else { escapeHTML(w, []byte(r.params.Title)) } io.WriteString(w, "\n") io.WriteString(w, " \n") io.WriteString(w, " \n") if r.params.CSS != "" { io.WriteString(w, " \n") } if r.params.Icon != "" { io.WriteString(w, " \n") } io.WriteString(w, "\n") io.WriteString(w, "\n\n") } func (r *HTMLRenderer) writeTOC(w io.Writer, ast *Node) { buf := bytes.Buffer{} inHeading := false tocLevel := 0 headingCount := 0 ast.WalkFunc(func(node *Node, entering bool) WalkStatus { if nodeData, ok := node.Data.(*HeadingData); ok && !nodeData.IsTitleblock { inHeading = entering if entering { nodeData.HeadingID = fmt.Sprintf("toc_%d", headingCount) if nodeData.Level == tocLevel { buf.WriteString("\n\n
  • ") } else if nodeData.Level < tocLevel { for nodeData.Level < tocLevel { tocLevel-- buf.WriteString("
  • \n") } buf.WriteString("\n\n
  • ") } else { for nodeData.Level > tocLevel { tocLevel++ buf.WriteString("\n") } if buf.Len() > 0 { io.WriteString(w, "\n") } r.lastOutputLen = buf.Len() }