mirror of
https://github.com/status-im/markdown.git
synced 2025-02-23 00:28:25 +00:00
Merge pull request #75 from mprobst/sanitize_test
Avoid raw mode parsing so that tags like <script> don't cause escaping
This commit is contained in:
commit
643477a051
@ -135,7 +135,7 @@ func TestRawHtmlTag(t *testing.T) {
|
||||
"<p><a>xss link</a></p>\n",
|
||||
|
||||
`<IMG """><SCRIPT>alert("XSS")</SCRIPT>">`,
|
||||
"<p><img><script>alert(&quot;XSS&quot;)</script>"></p>\n",
|
||||
"<p><img><script>alert("XSS")</script>"></p>\n",
|
||||
|
||||
"<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>",
|
||||
"<p><img></p>\n",
|
||||
@ -182,18 +182,14 @@ func TestRawHtmlTag(t *testing.T) {
|
||||
`<SCRIPT/SRC="http://ha.ckers.org/xss.js"></SCRIPT>`,
|
||||
"<p><script/SRC="http://ha.ckers.org/xss.js"></script></p>\n",
|
||||
|
||||
// HTML5 interprets the <script> tag contents as raw test, thus the end
|
||||
// result has double-escaped &quot;
|
||||
`<<SCRIPT>alert("XSS");//<</SCRIPT>`,
|
||||
"<p><<script>alert(&quot;XSS&quot;);//&lt;</script></p>\n",
|
||||
"<p><<script>alert("XSS");//<</script></p>\n",
|
||||
|
||||
// HTML5 parses the </p> within an unclosed <script> tag as text.
|
||||
// Same for the following tests.
|
||||
"<SCRIPT SRC=http://ha.ckers.org/xss.js?< B >",
|
||||
"<p><script SRC=http://ha.ckers.org/xss.js?< B ></p>\n",
|
||||
"<p><script SRC=http://ha.ckers.org/xss.js?< B ></p>\n",
|
||||
|
||||
"<SCRIPT SRC=//ha.ckers.org/.j>",
|
||||
"<p><script SRC=//ha.ckers.org/.j></p>\n",
|
||||
"<p><script SRC=//ha.ckers.org/.j></p>\n",
|
||||
|
||||
`<IMG SRC="javascript:alert('XSS')"`,
|
||||
"<p><IMG SRC="javascript:alert('XSS')"</p>\n",
|
||||
@ -220,11 +216,23 @@ func TestRawHtmlTag(t *testing.T) {
|
||||
|
||||
func TestQuoteEscaping(t *testing.T) {
|
||||
tests := []string{
|
||||
// Make sure quotes are transported correctly (different entities or
|
||||
// unicode, but correct semantics)
|
||||
"<p>Here are some "quotes".</p>\n",
|
||||
"<p>Here are some "quotes".</p>\n",
|
||||
|
||||
"<p>Here are some “quotes”.</p>\n",
|
||||
"<p>Here are some \u201Cquotes\u201D.</p>\n",
|
||||
|
||||
// Within a <script> tag, content gets parsed by the raw text parsing rules.
|
||||
// This test makes sure we correctly disable those parsing rules and do not
|
||||
// escape e.g. the closing </p>.
|
||||
`Here are <script> some "quotes".`,
|
||||
"<p>Here are <script> some "quotes".</p>\n",
|
||||
|
||||
// Same test for an unknown element that does not switch into raw mode.
|
||||
`Here are <eviltag> some "quotes".`,
|
||||
"<p>Here are <eviltag> some "quotes".</p>\n",
|
||||
}
|
||||
doTestsInlineParam(t, tests, 0, HTML_SKIP_STYLE|HTML_SANITIZE_OUTPUT)
|
||||
}
|
||||
|
@ -107,6 +107,10 @@ func sanitizeHtmlSafe(input []byte) []byte {
|
||||
} else {
|
||||
wr.WriteString(html.EscapeString(string(tokenizer.Raw())))
|
||||
}
|
||||
// Make sure that tags like <script> that switch the parser into raw mode
|
||||
// do not destroy the parse mode for following HTML text (the point is to
|
||||
// escape them anyway). For that, switch off raw mode in the tokenizer.
|
||||
tokenizer.NextIsNotRawText()
|
||||
case html.EndTagToken:
|
||||
// Whitelisted tokens can be written in raw.
|
||||
tag, _ := tokenizer.TagName()
|
||||
|
Loading…
x
Reference in New Issue
Block a user