Improve fence line detection.

Rename isFenceCode to isFenceLine, document it, add tests. Add support for making newline optional, this will be needed in future commits.
2025-02-23 08:38:23 +00:00 · 2016-07-15 14:41:27 -04:00 · 2016-07-15 14:41:27 -04:00 · 0049676599
commit 0049676599
parent 96537c6eaa
2 changed files with 92 additions and 27 deletions
--- a/block.go
+++ b/block.go
@ -559,21 +559,24 @@ func (*parser) isHRule(data []byte) bool {
 	return n >= 3
 }

-func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (skip int, marker string) {
+// isFenceLine checks if there's a fence line (e.g., ``` or ``` go) at the beginning of data,
+// and returns the end index if so, or 0 otherwise. It also returns the marker found.
+// If syntax is not nil, it gets set to the syntax specified in the fence line.
+// A final newline is mandatory to recognize the fence line, unless newlineOptional is true.
+func isFenceLine(data []byte, syntax *string, oldmarker string, newlineOptional bool) (end int, marker string) {
 	i, size := 0, 0
-	skip = 0

 	// skip up to three spaces
 	for i < len(data) && i < 3 && data[i] == ' ' {
 		i++
 	}
-	if i >= len(data) {
-		return
-	}

 	// check for the marker characters: ~ or `
+	if i >= len(data) {
+		return 0, ""
+	}
 	if data[i] != '~' && data[i] != '`' {
-		return
+		return 0, ""
 	}

 	c := data[i]
@ -584,27 +587,28 @@ func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (s
 		i++
 	}

-	if i >= len(data) {
-		return
-	}
-
 	// the marker char must occur at least 3 times
 	if size < 3 {
-		return
+		return 0, ""
 	}
 	marker = string(data[i-size : i])

 	// if this is the end marker, it must match the beginning marker
 	if oldmarker != "" && marker != oldmarker {
-		return
+		return 0, ""
 	}

+	// TODO(shurcooL): It's probably a good idea to simplify the 2 code paths here
+	// into one, always get the syntax, and discard it if the caller doesn't care.
 	if syntax != nil {
 		syn := 0
 		i = skipChar(data, i, ' ')

 		if i >= len(data) {
-			return
+			if newlineOptional && i == len(data) {
+				return i, marker
+			}
+			return 0, ""
 		}

 		syntaxStart := i
@ -619,7 +623,7 @@ func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (s
 			}

 			if i >= len(data) || data[i] != '}' {
-				return
+				return 0, ""
 			}

 			// strip all whitespace at the beginning and the end
@ -641,22 +645,23 @@ func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (s
 			}
 		}

-		language := string(data[syntaxStart : syntaxStart+syn])
-		*syntax = &language
+		*syntax = string(data[syntaxStart : syntaxStart+syn])
 	}

 	i = skipChar(data, i, ' ')
 	if i >= len(data) || data[i] != '\n' {
-		return
+		if newlineOptional && i == len(data) {
+			return i, marker
+		}
+		return 0, ""
 	}

-	skip = i + 1
-	return
+	return i + 1, marker // Take newline into account.
 }

 func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int {
-	var lang *string
-	beg, marker := p.isFencedCode(data, &lang, "")
+	var syntax string
+	beg, marker := isFenceLine(data, &syntax, "", true)
 	if beg == 0 || beg >= len(data) {
 		return 0
 	}
@ -667,7 +672,7 @@ func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int {
 		// safe to assume beg < len(data)

 		// check for the end of the code block
-		fenceEnd, _ := p.isFencedCode(data[beg:], nil, marker)
+		fenceEnd, _ := isFenceLine(data[beg:], nil, marker, true)
 		if fenceEnd != 0 {
 			beg += fenceEnd
 			break
@ -688,11 +693,6 @@ func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int {
 		beg = end
 	}

-	syntax := ""
-	if lang != nil {
-		syntax = *lang
-	}
-
 	if doRender {
 		p.r.BlockCode(out, work.Bytes(), syntax)
 	}
--- a/block_test.go
+++ b/block_test.go
@ -1636,3 +1636,68 @@ func TestCDATA(t *testing.T) {
 `,
 	}, EXTENSION_FENCED_CODE)
 }
+
+func TestIsFenceLine(t *testing.T) {
+	tests := []struct {
+		data            []byte
+		syntaxRequested bool
+		newlineOptional bool
+		wantEnd         int
+		wantMarker      string
+		wantSyntax      string
+	}{
+		{
+			data:    []byte("```"),
+			wantEnd: 0,
+		},
+		{
+			data:       []byte("```\nstuff here\n"),
+			wantEnd:    4,
+			wantMarker: "```",
+		},
+		{
+			data:    []byte("stuff here\n```\n"),
+			wantEnd: 0,
+		},
+		{
+			data:            []byte("```"),
+			newlineOptional: true,
+			wantEnd:         3,
+			wantMarker:      "```",
+		},
+		{
+			data:            []byte("```"),
+			syntaxRequested: true,
+			newlineOptional: true,
+			wantEnd:         3,
+			wantMarker:      "```",
+		},
+		{
+			data:            []byte("``` go"),
+			syntaxRequested: true,
+			newlineOptional: true,
+			wantEnd:         6,
+			wantMarker:      "```",
+			wantSyntax:      "go",
+		},
+	}
+
+	for _, test := range tests {
+		var syntax *string
+		if test.syntaxRequested {
+			syntax = new(string)
+		}
+		end, marker := isFenceLine(test.data, syntax, "```", test.newlineOptional)
+		if got, want := end, test.wantEnd; got != want {
+			t.Errorf("got end %v, want %v", got, want)
+		}
+		if got, want := marker, test.wantMarker; got != want {
+			t.Errorf("got marker %q, want %q", got, want)
+		}
+		if test.syntaxRequested {
+			if got, want := *syntax, test.wantSyntax; got != want {
+				t.Errorf("got syntax %q, want %q", got, want)
+			}
+		}
+	}
+}