mark/metadata/metadata.go

200 lines
4.5 KiB
Go
Raw Permalink Normal View History

2024-09-26 15:24:39 +02:00
package metadata
2019-04-08 22:12:00 +03:00
import (
"bufio"
"bytes"
"crypto/sha256"
"fmt"
2019-04-08 22:12:00 +03:00
"regexp"
"strings"
2019-04-19 10:31:41 +03:00
2020-11-03 17:12:51 +03:00
"github.com/reconquest/pkg/log"
2019-04-19 10:31:41 +03:00
)
2019-04-08 22:12:00 +03:00
const (
HeaderParent = `Parent`
HeaderSpace = `Space`
HeaderType = `Type`
HeaderTitle = `Title`
HeaderLayout = `Layout`
HeaderEmoji = `Emoji`
HeaderAttachment = `Attachment`
HeaderLabel = `Label`
HeaderInclude = `Include`
HeaderSidebar = `Sidebar`
ContentAppearance = `Content-Appearance`
2019-04-08 22:12:00 +03:00
)
type Meta struct {
Parents []string
Space string
Type string
Title string
Layout string
Sidebar string
Emoji string
Attachments []string
Labels []string
ContentAppearance string
2019-04-08 22:12:00 +03:00
}
const (
FullWidthContentAppearance = "full-width"
FixedContentAppearance = "fixed"
)
2019-08-08 23:41:26 +03:00
var (
reHeaderPatternV2 = regexp.MustCompile(`<!--\s*([^:]+):\s*(.*)\s*-->`)
reHeaderPatternMacro = regexp.MustCompile(`<!-- Macro: .*`)
2019-08-08 23:41:26 +03:00
)
func ExtractMeta(data []byte, spaceFromCli string, titleFromH1 bool, parents []string, titleAppendGeneratedHash bool) (*Meta, []byte, error) {
2019-05-01 16:58:28 +03:00
var (
2019-08-08 23:41:26 +03:00
meta *Meta
offset int
2019-05-01 16:58:28 +03:00
)
2019-04-08 22:12:00 +03:00
scanner := bufio.NewScanner(bytes.NewBuffer(data))
for scanner.Scan() {
line := scanner.Text()
if err := scanner.Err(); err != nil {
2019-08-08 23:41:26 +03:00
return nil, nil, err
2019-04-08 22:12:00 +03:00
}
2019-08-13 19:48:11 +03:00
offset += len(line) + 1
2019-08-08 23:41:26 +03:00
matches := reHeaderPatternV2.FindStringSubmatch(line)
2019-04-08 22:12:00 +03:00
if matches == nil {
2024-07-25 23:48:48 +02:00
matches = reHeaderPatternMacro.FindStringSubmatch(line)
// If we have a match, then we started reading a macro.
// We want to keep it in the document for it to be read by ExtractMacros
if matches != nil {
offset -= len(line) + 1
2019-05-01 16:58:28 +03:00
}
2024-07-25 23:48:48 +02:00
break
2019-04-08 22:12:00 +03:00
}
if meta == nil {
meta = &Meta{}
meta.Type = "page" // Default if not specified
meta.ContentAppearance = FullWidthContentAppearance // Default to full-width for backwards compatibility
2019-04-08 22:12:00 +03:00
}
//nolint:staticcheck
2019-04-08 22:12:00 +03:00
header := strings.Title(matches[1])
2019-04-19 10:31:41 +03:00
var value string
if len(matches) > 1 {
value = strings.TrimSpace(matches[2])
}
2019-04-08 22:12:00 +03:00
switch header {
case HeaderParent:
2019-04-19 10:31:41 +03:00
meta.Parents = append(meta.Parents, value)
2019-04-08 22:12:00 +03:00
case HeaderSpace:
meta.Space = strings.TrimSpace(value)
2019-04-08 22:12:00 +03:00
case HeaderType:
meta.Type = strings.TrimSpace(value)
2019-04-08 22:12:00 +03:00
case HeaderTitle:
2019-04-19 10:31:41 +03:00
meta.Title = strings.TrimSpace(value)
2019-04-08 22:12:00 +03:00
case HeaderLayout:
2019-04-19 10:31:41 +03:00
meta.Layout = strings.TrimSpace(value)
2021-06-17 14:56:27 -04:00
case HeaderSidebar:
meta.Layout = "article"
meta.Sidebar = strings.TrimSpace(value)
case HeaderEmoji:
meta.Emoji = strings.TrimSpace(value)
2019-04-19 10:31:41 +03:00
case HeaderAttachment:
meta.Attachments = append(meta.Attachments, value)
2019-04-08 22:12:00 +03:00
2021-01-04 13:08:58 +02:00
case HeaderLabel:
meta.Labels = append(meta.Labels, value)
case HeaderInclude:
// Includes are parsed by a different func
continue
case ContentAppearance:
if strings.TrimSpace(value) == FixedContentAppearance {
meta.ContentAppearance = FixedContentAppearance
} else {
meta.ContentAppearance = FullWidthContentAppearance
}
2019-04-08 22:12:00 +03:00
default:
2019-04-19 10:31:41 +03:00
log.Errorf(
nil,
`encountered unknown header %q line: %#v`,
2019-04-08 22:12:00 +03:00
header,
line,
)
continue
}
}
2023-03-20 22:54:11 +01:00
if titleFromH1 || spaceFromCli != "" {
if meta == nil {
meta = &Meta{}
}
if meta.Type == "" {
meta.Type = "page"
}
if meta.ContentAppearance == "" {
meta.ContentAppearance = FullWidthContentAppearance // Default to full-width for backwards compatibility
}
2023-03-20 22:54:11 +01:00
if titleFromH1 && meta.Title == "" {
meta.Title = ExtractDocumentLeadingH1(data)
}
2023-03-20 22:54:11 +01:00
if spaceFromCli != "" && meta.Space == "" {
meta.Space = spaceFromCli
}
}
2019-04-08 22:12:00 +03:00
if meta == nil {
2019-08-08 23:41:26 +03:00
return nil, data, nil
2019-04-08 22:12:00 +03:00
}
2023-08-09 13:06:31 +02:00
// Prepend parent pages that are defined via the cli flag
if len(parents) > 0 && parents[0] != "" {
meta.Parents = append(parents, meta.Parents...)
}
// deterministically generate a hash from the page's parents, space, and title
if titleAppendGeneratedHash {
path := strings.Join(append(meta.Parents, meta.Space, meta.Title), "/")
pathHash := sha256.Sum256([]byte(path))
// postfix is an 8-character hexadecimal string representation of the first 4 out of 32 bytes of the hash
meta.Title = fmt.Sprintf("%s - %x", meta.Title, pathHash[0:4])
log.Debugf(
nil,
"appended hash to page title: %s",
meta.Title,
)
}
2019-08-13 19:48:11 +03:00
return meta, data[offset:], nil
2019-04-08 22:12:00 +03:00
}
2024-09-26 15:24:39 +02:00
// ExtractDocumentLeadingH1 will extract leading H1 heading
func ExtractDocumentLeadingH1(markdown []byte) string {
h1 := regexp.MustCompile(`#[^#]\s*(.*)\s*\n`)
groups := h1.FindSubmatch(markdown)
if groups == nil {
return ""
} else {
return string(groups[1])
}
}