fix: Handle all different token types that the parser can emit (d'oh).

This commit is contained in:
Martin Probst 2014-05-01 20:55:53 +02:00
parent b44be78459
commit f9b7593e65
2 changed files with 18 additions and 1 deletions

View File

@ -201,6 +201,16 @@ func TestRawHtmlTag(t *testing.T) {
"<iframe src=http://ha.ckers.org/scriptlet.html <",
// The hyperlink gets linkified, the <iframe> gets escaped
"<p>&lt;iframe src=<a href=\"http://ha.ckers.org/scriptlet.html\">http://ha.ckers.org/scriptlet.html</a> &lt;</p>\n",
// Additonal token types: SelfClosing, Comment, DocType.
"<br/>",
"<p><br></p>\n",
"<!-- Comment -->",
"<!-- Comment -->\n",
"<!DOCTYPE test>",
"<p>&lt;!DOCTYPE test&gt;</p>\n",
}
doTestsInlineParam(t, tests, 0, HTML_SKIP_STYLE|HTML_SANITIZE_OUTPUT)
}

View File

@ -64,7 +64,7 @@ func sanitizeHtmlSafe(input []byte) []byte {
case html.TextToken:
// Text is written escaped.
wr.WriteString(tokenizer.Token().String())
case html.StartTagToken:
case html.SelfClosingTagToken, html.StartTagToken:
// HTML tags are escaped unless whitelisted.
tag, hasAttributes := tokenizer.TagName()
tagName := string(tag)
@ -105,7 +105,14 @@ func sanitizeHtmlSafe(input []byte) []byte {
} else {
wr.WriteString(html.EscapeString(string(tokenizer.Raw())))
}
case html.CommentToken:
// Comments are not really expected, but harmless.
wr.Write(tokenizer.Raw())
case html.DoctypeToken:
// Escape DOCTYPES, entities etc can be dangerous
wr.WriteString(html.EscapeString(string(tokenizer.Raw())))
default:
tokenizer.Token()
panic(fmt.Errorf("Unexpected token type %v", t))
}
}