2024-09-26 15:24:39 +02:00
|
|
|
package page
|
2020-11-30 09:47:46 +02:00
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"bytes"
|
|
|
|
|
"fmt"
|
2025-12-08 21:32:28 +01:00
|
|
|
"net/http"
|
2020-11-30 09:47:46 +02:00
|
|
|
"net/url"
|
|
|
|
|
"os"
|
|
|
|
|
"path/filepath"
|
|
|
|
|
"regexp"
|
2025-12-08 21:32:28 +01:00
|
|
|
"strings"
|
2020-11-30 09:47:46 +02:00
|
|
|
|
2024-09-26 15:24:39 +02:00
|
|
|
"github.com/kovetskiy/mark/confluence"
|
|
|
|
|
"github.com/kovetskiy/mark/metadata"
|
2020-12-04 00:28:52 +03:00
|
|
|
"github.com/reconquest/karma-go"
|
|
|
|
|
"github.com/reconquest/pkg/log"
|
2020-11-30 09:47:46 +02:00
|
|
|
)
|
|
|
|
|
|
2020-12-04 00:28:52 +03:00
|
|
|
type LinkSubstitution struct {
|
|
|
|
|
From string
|
|
|
|
|
To string
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type markdownLink struct {
|
|
|
|
|
full string
|
|
|
|
|
filename string
|
|
|
|
|
hash string
|
2020-11-30 09:47:46 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func ResolveRelativeLinks(
|
|
|
|
|
api *confluence.API,
|
2024-09-26 15:24:39 +02:00
|
|
|
meta *metadata.Meta,
|
2020-11-30 09:47:46 +02:00
|
|
|
markdown []byte,
|
|
|
|
|
base string,
|
2023-03-20 22:54:11 +01:00
|
|
|
spaceFromCli string,
|
2023-03-20 19:19:31 +01:00
|
|
|
titleFromH1 bool,
|
2025-08-29 14:37:59 +02:00
|
|
|
titleFromFilename bool,
|
2023-08-09 13:06:31 +02:00
|
|
|
parents []string,
|
2024-09-30 21:00:49 -04:00
|
|
|
titleAppendGeneratedHash bool,
|
2020-12-04 00:28:52 +03:00
|
|
|
) ([]LinkSubstitution, error) {
|
|
|
|
|
matches := parseLinks(string(markdown))
|
|
|
|
|
|
2025-12-19 12:34:29 +01:00
|
|
|
// If the user didn't provide --space, inherit the current document's space so
|
|
|
|
|
// relative links can be resolved within the same space.
|
|
|
|
|
spaceForLinks := spaceFromCli
|
|
|
|
|
if spaceForLinks == "" && meta != nil {
|
|
|
|
|
spaceForLinks = meta.Space
|
|
|
|
|
}
|
|
|
|
|
|
2020-12-04 00:28:52 +03:00
|
|
|
links := []LinkSubstitution{}
|
|
|
|
|
for _, match := range matches {
|
|
|
|
|
log.Tracef(
|
|
|
|
|
nil,
|
|
|
|
|
"found a relative link: full=%s filename=%s hash=%s",
|
|
|
|
|
match.full,
|
|
|
|
|
match.filename,
|
|
|
|
|
match.hash,
|
|
|
|
|
)
|
2025-12-19 12:34:29 +01:00
|
|
|
resolved, err := resolveLink(api, base, match, spaceForLinks, titleFromH1, titleFromFilename, parents, titleAppendGeneratedHash)
|
2020-12-04 00:28:52 +03:00
|
|
|
if err != nil {
|
|
|
|
|
return nil, karma.Format(err, "resolve link: %q", match.full)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if resolved == "" {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
links = append(links, LinkSubstitution{
|
|
|
|
|
From: match.full,
|
|
|
|
|
To: resolved,
|
|
|
|
|
})
|
2020-11-30 09:47:46 +02:00
|
|
|
}
|
|
|
|
|
|
2020-12-04 00:28:52 +03:00
|
|
|
return links, nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func resolveLink(
|
|
|
|
|
api *confluence.API,
|
|
|
|
|
base string,
|
|
|
|
|
link markdownLink,
|
2025-12-19 12:34:29 +01:00
|
|
|
spaceForLinks string,
|
2023-03-20 19:19:31 +01:00
|
|
|
titleFromH1 bool,
|
2025-08-29 14:37:59 +02:00
|
|
|
titleFromFilename bool,
|
2023-08-09 13:06:31 +02:00
|
|
|
parents []string,
|
2024-09-30 21:00:49 -04:00
|
|
|
titleAppendGeneratedHash bool,
|
2020-12-04 00:28:52 +03:00
|
|
|
) (string, error) {
|
|
|
|
|
var result string
|
|
|
|
|
|
|
|
|
|
if len(link.filename) > 0 {
|
2023-03-22 19:10:19 -04:00
|
|
|
filepath := filepath.Join(base, link.filename)
|
2021-09-11 14:37:45 +03:00
|
|
|
|
2023-03-22 19:10:19 -04:00
|
|
|
log.Tracef(nil, "filepath: %s", filepath)
|
|
|
|
|
stat, err := os.Stat(filepath)
|
2021-09-11 14:37:45 +03:00
|
|
|
if err != nil {
|
|
|
|
|
return "", nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if stat.IsDir() {
|
2020-12-04 00:28:52 +03:00
|
|
|
return "", nil
|
|
|
|
|
}
|
|
|
|
|
|
2023-03-22 19:10:19 -04:00
|
|
|
linkContents, err := os.ReadFile(filepath)
|
|
|
|
|
|
2025-12-08 21:32:28 +01:00
|
|
|
contentType := http.DetectContentType(linkContents)
|
|
|
|
|
// Check if the MIME type starts with "text/"
|
|
|
|
|
if !strings.HasPrefix(contentType, "text/") {
|
|
|
|
|
log.Debugf(nil, "Ignoring link to file %q: detected content type %v", filepath, contentType)
|
2023-03-22 19:10:19 -04:00
|
|
|
return "", nil
|
|
|
|
|
}
|
|
|
|
|
|
2020-12-04 00:28:52 +03:00
|
|
|
if err != nil {
|
2023-03-22 19:10:19 -04:00
|
|
|
return "", karma.Format(err, "read file: %s", filepath)
|
2020-12-04 00:28:52 +03:00
|
|
|
}
|
2020-11-30 09:47:46 +02:00
|
|
|
|
2021-11-08 20:15:59 +06:00
|
|
|
linkContents = bytes.ReplaceAll(
|
|
|
|
|
linkContents,
|
|
|
|
|
[]byte("\r\n"),
|
|
|
|
|
[]byte("\n"),
|
|
|
|
|
)
|
|
|
|
|
|
2020-12-04 00:28:52 +03:00
|
|
|
// This helps to determine if found link points to file that's
|
|
|
|
|
// not markdown or have mark required metadata
|
2025-12-19 12:34:29 +01:00
|
|
|
linkMeta, _, err := metadata.ExtractMeta(linkContents, spaceForLinks, titleFromH1, titleFromFilename, filepath, parents, titleAppendGeneratedHash)
|
2020-12-04 00:28:52 +03:00
|
|
|
if err != nil {
|
|
|
|
|
log.Errorf(
|
|
|
|
|
err,
|
|
|
|
|
"unable to extract metadata from %q; ignoring the relative link",
|
2023-03-22 19:10:19 -04:00
|
|
|
filepath,
|
2020-12-04 00:28:52 +03:00
|
|
|
)
|
2020-11-30 09:47:46 +02:00
|
|
|
|
2020-12-04 00:28:52 +03:00
|
|
|
return "", nil
|
2020-11-30 09:47:46 +02:00
|
|
|
}
|
2020-12-04 00:28:52 +03:00
|
|
|
|
|
|
|
|
if linkMeta == nil {
|
|
|
|
|
return "", nil
|
2020-11-30 09:47:46 +02:00
|
|
|
}
|
|
|
|
|
|
2023-03-20 19:19:31 +01:00
|
|
|
log.Tracef(
|
|
|
|
|
nil,
|
|
|
|
|
"extracted metadata: space=%s title=%s",
|
|
|
|
|
linkMeta.Space,
|
|
|
|
|
linkMeta.Title,
|
|
|
|
|
)
|
|
|
|
|
|
2020-12-04 00:28:52 +03:00
|
|
|
result, err = getConfluenceLink(api, linkMeta.Space, linkMeta.Title)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return "", karma.Format(
|
|
|
|
|
err,
|
|
|
|
|
"find confluence page: %s / %s / %s",
|
2023-03-22 19:10:19 -04:00
|
|
|
filepath,
|
2020-12-04 00:28:52 +03:00
|
|
|
linkMeta.Space,
|
|
|
|
|
linkMeta.Title,
|
|
|
|
|
)
|
2020-11-30 09:47:46 +02:00
|
|
|
}
|
|
|
|
|
|
2020-12-04 00:28:52 +03:00
|
|
|
if result == "" {
|
|
|
|
|
return "", nil
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if len(link.hash) > 0 {
|
|
|
|
|
result = result + "#" + link.hash
|
2020-11-30 09:47:46 +02:00
|
|
|
}
|
2020-12-04 00:28:52 +03:00
|
|
|
|
|
|
|
|
return result, nil
|
2020-11-30 09:47:46 +02:00
|
|
|
}
|
|
|
|
|
|
2020-12-04 00:28:52 +03:00
|
|
|
func SubstituteLinks(markdown []byte, links []LinkSubstitution) []byte {
|
2020-11-30 09:47:46 +02:00
|
|
|
for _, link := range links {
|
2020-12-04 00:28:52 +03:00
|
|
|
if link.From == link.To {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
log.Tracef(nil, "substitute link: %q -> %q", link.From, link.To)
|
|
|
|
|
|
2020-11-30 09:47:46 +02:00
|
|
|
markdown = bytes.ReplaceAll(
|
|
|
|
|
markdown,
|
2020-12-04 00:28:52 +03:00
|
|
|
[]byte(fmt.Sprintf("](%s)", link.From)),
|
|
|
|
|
[]byte(fmt.Sprintf("](%s)", link.To)),
|
2020-11-30 09:47:46 +02:00
|
|
|
)
|
|
|
|
|
}
|
2020-12-04 00:28:52 +03:00
|
|
|
|
2020-11-30 09:47:46 +02:00
|
|
|
return markdown
|
|
|
|
|
}
|
|
|
|
|
|
2020-12-04 00:28:52 +03:00
|
|
|
func parseLinks(markdown string) []markdownLink {
|
2023-04-26 08:02:35 +02:00
|
|
|
// Matches links but not inline images
|
2024-07-25 23:03:21 +02:00
|
|
|
re := regexp.MustCompile(`[^\!]\[.+\]\((([^\)#]+)?#?([^\)]+)?)\)`)
|
2020-12-04 00:28:52 +03:00
|
|
|
matches := re.FindAllStringSubmatch(markdown, -1)
|
|
|
|
|
|
|
|
|
|
links := make([]markdownLink, len(matches))
|
|
|
|
|
for i, match := range matches {
|
|
|
|
|
links[i] = markdownLink{
|
|
|
|
|
full: match[1],
|
|
|
|
|
filename: match[2],
|
|
|
|
|
hash: match[3],
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return links
|
2020-11-30 09:47:46 +02:00
|
|
|
}
|
|
|
|
|
|
2023-04-18 15:06:16 +02:00
|
|
|
// getConfluenceLink build (to be) link for Confluence, and tries to verify from
|
2020-12-04 00:28:52 +03:00
|
|
|
// API if there's real link available
|
2021-09-11 14:37:45 +03:00
|
|
|
func getConfluenceLink(
|
|
|
|
|
api *confluence.API,
|
|
|
|
|
space, title string,
|
|
|
|
|
) (string, error) {
|
2021-03-31 17:49:01 +01:00
|
|
|
page, err := api.FindPage(space, title, "page")
|
2020-11-30 09:47:46 +02:00
|
|
|
if err != nil {
|
2020-12-04 00:28:52 +03:00
|
|
|
return "", karma.Format(err, "api: find page")
|
|
|
|
|
}
|
2025-12-19 12:34:29 +01:00
|
|
|
if page == nil {
|
|
|
|
|
// Without a page ID there is no stable way to produce
|
|
|
|
|
// /wiki/spaces/<space>/pages/<id>/<name>.
|
|
|
|
|
return "", nil
|
|
|
|
|
}
|
2020-12-04 00:28:52 +03:00
|
|
|
|
2025-12-19 12:34:29 +01:00
|
|
|
// Confluence Cloud web UI URLs can be returned either as a path ("/wiki/..." or
|
|
|
|
|
// "/ex/confluence/<cloudId>/wiki/...") or as a full absolute URL.
|
|
|
|
|
absolute, err := makeAbsoluteConfluenceWebUIURL(api.BaseURL, page.Links.Full)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return "", karma.Format(err, "build confluence webui URL")
|
2020-11-30 09:47:46 +02:00
|
|
|
}
|
|
|
|
|
|
2025-12-19 12:34:29 +01:00
|
|
|
return absolute, nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func makeAbsoluteConfluenceWebUIURL(baseURL string, webui string) (string, error) {
|
|
|
|
|
if webui == "" {
|
|
|
|
|
return "", nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
u, err := url.Parse(webui)
|
2024-07-27 19:52:32 +02:00
|
|
|
if err != nil {
|
2025-12-19 12:34:29 +01:00
|
|
|
return "", err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
path := normalizeConfluenceWebUIPath(u.Path)
|
|
|
|
|
if path == "" {
|
|
|
|
|
return "", nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If Confluence returns an absolute URL, trust its host/scheme.
|
|
|
|
|
if u.Scheme != "" && u.Host != "" {
|
|
|
|
|
baseURL = u.Scheme + "://" + u.Host
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
baseURL = strings.TrimSuffix(baseURL, "/")
|
|
|
|
|
if !strings.HasPrefix(path, "/") {
|
|
|
|
|
path = "/" + path
|
2024-07-27 19:52:32 +02:00
|
|
|
}
|
2025-12-19 12:34:29 +01:00
|
|
|
|
|
|
|
|
result := baseURL + path
|
|
|
|
|
if u.RawQuery != "" {
|
|
|
|
|
result += "?" + u.RawQuery
|
|
|
|
|
}
|
|
|
|
|
if u.Fragment != "" {
|
|
|
|
|
result += "#" + u.Fragment
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return result, nil
|
2020-11-30 09:47:46 +02:00
|
|
|
}
|
2025-12-18 15:55:39 +01:00
|
|
|
|
|
|
|
|
// normalizeConfluenceWebUIPath rewrites Confluence Cloud "experience" URLs
|
|
|
|
|
// ("/ex/confluence/<cloudId>/wiki/..."), to canonical wiki paths ("/wiki/...").
|
|
|
|
|
func normalizeConfluenceWebUIPath(path string) string {
|
|
|
|
|
if path == "" {
|
|
|
|
|
return path
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
re := regexp.MustCompile(`^/ex/confluence/[^/]+(/wiki/.*)$`)
|
|
|
|
|
match := re.FindStringSubmatch(path)
|
|
|
|
|
if len(match) == 2 {
|
|
|
|
|
return match[1]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return path
|
|
|
|
|
}
|