From 6e4a912b1196de10c1516ffd6279476f7fb4de81 Mon Sep 17 00:00:00 2001
From: Bernd Ahlers
Date: Fri, 31 Mar 2023 10:51:50 +0200
Subject: [PATCH 1/2] Implement a custom parser for tags
This replaces the workaround to replace colons in tags with a
magic string with a custom parser for these tags to parse them as
ast.KindRawHtml.
The custom parser is a stripped down version of goldmark's rawHTMLParser.
---
pkg/mark/ac_tag_parser.go | 112 +++++++++++++++++++++++++++
pkg/mark/markdown.go | 22 ++----
pkg/mark/testdata/macro-include.html | 4 +-
3 files changed, 121 insertions(+), 17 deletions(-)
create mode 100644 pkg/mark/ac_tag_parser.go
diff --git a/pkg/mark/ac_tag_parser.go b/pkg/mark/ac_tag_parser.go
new file mode 100644
index 0000000..763a1ca
--- /dev/null
+++ b/pkg/mark/ac_tag_parser.go
@@ -0,0 +1,112 @@
+package mark
+
+import (
+ "bytes"
+ "github.com/yuin/goldmark/ast"
+ "github.com/yuin/goldmark/parser"
+ "github.com/yuin/goldmark/text"
+ "github.com/yuin/goldmark/util"
+ "regexp"
+)
+
+// NewACTagParser returns an inline parser that parses tags to ensure that Confluence specific tags are parsed
+// as ast.KindRawHtml so they are not escaped at render time. The parser must be registered with a higher priority
+// than goldmark's linkParser. Otherwise, the linkParser would parse the tags.
+func NewACTagParser() parser.InlineParser {
+ return &acTagParser{}
+}
+
+var _ parser.InlineParser = (*acTagParser)(nil)
+
+// acTagParser is a stripped down version of goldmark's rawHTMLParser.
+// See: https://github.com/yuin/goldmark/blob/master/parser/raw_html.go
+type acTagParser struct {
+}
+
+func (s *acTagParser) Trigger() []byte {
+ return []byte{'<'}
+}
+
+func (s *acTagParser) Parse(_ ast.Node, block text.Reader, pc parser.Context) ast.Node {
+ line, _ := block.PeekLine()
+ if len(line) > 1 && util.IsAlphaNumeric(line[1]) {
+ return s.parseMultiLineRegexp(openTagRegexp, block, pc)
+ }
+ if len(line) > 2 && line[1] == '/' && util.IsAlphaNumeric(line[2]) {
+ return s.parseMultiLineRegexp(closeTagRegexp, block, pc)
+ }
+ if len(line) > 2 && line[1] == '!' && line[2] >= 'A' && line[2] <= 'Z' {
+ return s.parseUntil(block, closeDecl, pc)
+ }
+ if bytes.HasPrefix(line, openCDATA) {
+ return s.parseUntil(block, closeCDATA, pc)
+ }
+ return nil
+}
+
+var tagnamePattern = `([A-Za-z][A-Za-z0-9-]*)`
+
+var attributePattern = `(?:[\r\n \t]+[a-zA-Z_:][a-zA-Z0-9:._-]*(?:[\r\n \t]*=[\r\n \t]*(?:[^\"'=<>` + "`" + `\x00-\x20]+|'[^']*'|"[^"]*"))?)`
+
+// Only match tags
+var openTagRegexp = regexp.MustCompile("^`)
+var closeTagRegexp = regexp.MustCompile("^`)
+
+var openCDATA = []byte("")
+var closeDecl = []byte(">")
+
+func (s *acTagParser) parseUntil(block text.Reader, closer []byte, _ parser.Context) ast.Node {
+ savedLine, savedSegment := block.Position()
+ node := ast.NewRawHTML()
+ for {
+ line, segment := block.PeekLine()
+ if line == nil {
+ break
+ }
+ index := bytes.Index(line, closer)
+ if index > -1 {
+ node.Segments.Append(segment.WithStop(segment.Start + index + len(closer)))
+ block.Advance(index + len(closer))
+ return node
+ }
+ node.Segments.Append(segment)
+ block.AdvanceLine()
+ }
+ block.SetPosition(savedLine, savedSegment)
+ return nil
+}
+
+func (s *acTagParser) parseMultiLineRegexp(reg *regexp.Regexp, block text.Reader, _ parser.Context) ast.Node {
+ sline, ssegment := block.Position()
+ if block.Match(reg) {
+ node := ast.NewRawHTML()
+ eline, esegment := block.Position()
+ block.SetPosition(sline, ssegment)
+ for {
+ line, segment := block.PeekLine()
+ if line == nil {
+ break
+ }
+ l, _ := block.Position()
+ start := segment.Start
+ if l == sline {
+ start = ssegment.Start
+ }
+ end := segment.Stop
+ if l == eline {
+ end = esegment.Start
+ }
+
+ node.Segments.Append(text.NewSegment(start, end))
+ if l == eline {
+ block.Advance(end - start)
+ break
+ } else {
+ block.AdvanceLine()
+ }
+ }
+ return node
+ }
+ return nil
+}
diff --git a/pkg/mark/markdown.go b/pkg/mark/markdown.go
index 2ff2b71..3c44ab8 100644
--- a/pkg/mark/markdown.go
+++ b/pkg/mark/markdown.go
@@ -430,22 +430,9 @@ func (r *ConfluenceRenderer) renderCodeBlock(writer util.BufWriter, source []byt
return ast.WalkContinue, nil
}
-// compileMarkdown will replace tags like with escaped
-// equivalent, because goldmark markdown parser replaces that tags with
-// ac:rich-text-body because of the autolink
-// rule.
func CompileMarkdown(markdown []byte, stdlib *stdlib.Lib) string {
log.Tracef(nil, "rendering markdown:\n%s", string(markdown))
- colon := []byte("---bf-COLON---")
-
- tags := regexp.MustCompile(`?ac:[^>]+>`)
-
- for _, match := range tags.FindAll(markdown, -1) {
- // Replace the colon in all "" tags with the colon bytes to avoid having Goldmark escape the HTML output.
- markdown = bytes.ReplaceAll(markdown, match, bytes.ReplaceAll(match, []byte(":"), colon))
- }
-
converter := goldmark.New(
goldmark.WithExtensions(
extension.GFM,
@@ -461,6 +448,12 @@ func CompileMarkdown(markdown []byte, stdlib *stdlib.Lib) string {
html.WithUnsafe(),
))
+ converter.Parser().AddOptions(parser.WithInlineParsers(
+ // Must be registered with a higher priority than goldmark's linkParser to make sure goldmark doesn't parse
+ // the tags.
+ util.Prioritized(NewACTagParser(), 199),
+ ))
+
converter.Renderer().AddOptions(renderer.WithNodeRenderers(
util.Prioritized(NewConfluenceRenderer(stdlib), 100),
))
@@ -472,8 +465,7 @@ func CompileMarkdown(markdown []byte, stdlib *stdlib.Lib) string {
panic(err)
}
- // Restore all the colons we previously replaced.
- html := bytes.ReplaceAll(buf.Bytes(), colon, []byte(":"))
+ html := buf.Bytes()
log.Tracef(nil, "rendered markdown to html:\n%s", string(html))
diff --git a/pkg/mark/testdata/macro-include.html b/pkg/mark/testdata/macro-include.html
index 55f1496..8d8ba05 100644
--- a/pkg/mark/testdata/macro-include.html
+++ b/pkg/mark/testdata/macro-include.html
@@ -1,6 +1,6 @@
bar
-
+
true
Attention
This is an info!
-
\ No newline at end of file
+
From 80d906417c230c86e539e3765b36df6a6ae37af8 Mon Sep 17 00:00:00 2001
From: Bernd Ahlers
Date: Fri, 31 Mar 2023 11:09:58 +0200
Subject: [PATCH 2/2] Fix custom link renderer and add tests for Confluence
links
Since we now have a custom parser for tags, the custom link
renderer added an additional tag at the end of each internal
Confluence link.
Add tests for internal links and add an example for internal links with
spaces in page titles to the README file.
---
README.md | 2 ++
pkg/mark/markdown.go | 3 +--
pkg/mark/testdata/links.html | 4 ++++
pkg/mark/testdata/links.md | 8 ++++++++
4 files changed, 15 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 78cb49a..c8f3b8f 100644
--- a/README.md
+++ b/README.md
@@ -517,6 +517,8 @@ See task MYJIRA-123.
This is a [link to an existing confluence page](ac:Pagetitle)
And this is how to link when the linktext is the same as the [Pagetitle](ac:)
+
+Link to a [page title with space]()
```
### Add width for an image
diff --git a/pkg/mark/markdown.go b/pkg/mark/markdown.go
index 3c44ab8..680375d 100644
--- a/pkg/mark/markdown.go
+++ b/pkg/mark/markdown.go
@@ -276,9 +276,8 @@ func (r *ConfluenceRenderer) renderLink(writer util.BufWriter, source []byte, no
if err != nil {
return ast.WalkStop, err
}
-
- return ast.WalkSkipChildren, nil
}
+ return ast.WalkSkipChildren, nil
}
return r.goldmarkRenderLink(writer, source, node, entering)
}
diff --git a/pkg/mark/testdata/links.html b/pkg/mark/testdata/links.html
index 3b1f468..625f00e 100644
--- a/pkg/mark/testdata/links.html
+++ b/pkg/mark/testdata/links.html
@@ -1,5 +1,9 @@
Use https://example.com
Use aaa
+Use
+Use
+Use
+Use
Use footnotes link