add vendoring

2022-11-16 12:08:51 +02:00
parent 14751cbf3a
commit c1d33fe3cb
1104 changed files with 759066 additions and 0 deletions
--- a/vendor/maunium.net/go/mautrix/format/htmlparser.go
+++ b/vendor/maunium.net/go/mautrix/format/htmlparser.go
@@ -0,0 +1,365 @@
+// Copyright (c) 2020 Tulir Asokan
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+package format
+
+import (
+	"fmt"
+	"math"
+	"strconv"
+	"strings"
+
+	"golang.org/x/net/html"
+
+	"maunium.net/go/mautrix/id"
+)
+
+type Context map[string]interface{}
+type TextConverter func(string, Context) string
+type SpoilerConverter func(text, reason string, ctx Context) string
+type LinkConverter func(text, href string, ctx Context) string
+type ColorConverter func(text, fg, bg string, ctx Context) string
+type CodeBlockConverter func(code, language string, ctx Context) string
+type PillConverter func(displayname, mxid, eventID string, ctx Context) string
+
+func DefaultPillConverter(displayname, mxid, eventID string, _ Context) string {
+	switch {
+	case len(mxid) == 0, mxid[0] == '@':
+		// User link, always just show the displayname
+		return displayname
+	case len(eventID) > 0:
+		// Event ID link, always just show the link
+		return fmt.Sprintf("https://matrix.to/#/%s/%s", mxid, eventID)
+	case mxid[0] == '!' && displayname == mxid:
+		// Room ID link with no separate display text, just show the link
+		return fmt.Sprintf("https://matrix.to/#/%s", mxid)
+	case mxid[0] == '#':
+		// Room alias link, just show the alias
+		return mxid
+	default:
+		// Other link (e.g. room ID link with display text), show text and link
+		return fmt.Sprintf("%s (https://matrix.to/#/%s)", displayname, mxid)
+	}
+}
+
+// HTMLParser is a somewhat customizable Matrix HTML parser.
+type HTMLParser struct {
+	PillConverter           PillConverter
+	TabsToSpaces            int
+	Newline                 string
+	HorizontalLine          string
+	BoldConverter           TextConverter
+	ItalicConverter         TextConverter
+	StrikethroughConverter  TextConverter
+	UnderlineConverter      TextConverter
+	LinkConverter           LinkConverter
+	SpoilerConverter        SpoilerConverter
+	ColorConverter          ColorConverter
+	MonospaceBlockConverter CodeBlockConverter
+	MonospaceConverter      TextConverter
+	TextConverter           TextConverter
+}
+
+// TaggedString is a string that also contains a HTML tag.
+type TaggedString struct {
+	string
+	tag string
+}
+
+func (parser *HTMLParser) maybeGetAttribute(node *html.Node, attribute string) (string, bool) {
+	for _, attr := range node.Attr {
+		if attr.Key == attribute {
+			return attr.Val, true
+		}
+	}
+	return "", false
+}
+
+func (parser *HTMLParser) getAttribute(node *html.Node, attribute string) string {
+	val, _ := parser.maybeGetAttribute(node, attribute)
+	return val
+}
+
+// Digits counts the number of digits in a non-negative integer.
+func Digits(num int) int {
+	return int(math.Floor(math.Log10(float64(num))) + 1)
+}
+
+func (parser *HTMLParser) listToString(node *html.Node, stripLinebreak bool, ctx Context) string {
+	ordered := node.Data == "ol"
+	taggedChildren := parser.nodeToTaggedStrings(node.FirstChild, stripLinebreak, ctx)
+	counter := 1
+	indentLength := 0
+	if ordered {
+		start := parser.getAttribute(node, "start")
+		if len(start) > 0 {
+			counter, _ = strconv.Atoi(start)
+		}
+
+		longestIndex := (counter - 1) + len(taggedChildren)
+		indentLength = Digits(longestIndex)
+	}
+	indent := strings.Repeat(" ", indentLength+2)
+	var children []string
+	for _, child := range taggedChildren {
+		if child.tag != "li" {
+			continue
+		}
+		var prefix string
+		// TODO make bullets and numbering configurable
+		if ordered {
+			indexPadding := indentLength - Digits(counter)
+			prefix = fmt.Sprintf("%d. %s", counter, strings.Repeat(" ", indexPadding))
+		} else {
+			prefix = "* "
+		}
+		str := prefix + child.string
+		counter++
+		parts := strings.Split(str, "\n")
+		for i, part := range parts[1:] {
+			parts[i+1] = indent + part
+		}
+		str = strings.Join(parts, "\n")
+		children = append(children, str)
+	}
+	return strings.Join(children, "\n")
+}
+
+func (parser *HTMLParser) basicFormatToString(node *html.Node, stripLinebreak bool, ctx Context) string {
+	str := parser.nodeToTagAwareString(node.FirstChild, stripLinebreak, ctx)
+	switch node.Data {
+	case "b", "strong":
+		if parser.BoldConverter != nil {
+			return parser.BoldConverter(str, ctx)
+		}
+		return fmt.Sprintf("**%s**", str)
+	case "i", "em":
+		if parser.ItalicConverter != nil {
+			return parser.ItalicConverter(str, ctx)
+		}
+		return fmt.Sprintf("_%s_", str)
+	case "s", "del", "strike":
+		if parser.StrikethroughConverter != nil {
+			return parser.StrikethroughConverter(str, ctx)
+		}
+		return fmt.Sprintf("~~%s~~", str)
+	case "u", "ins":
+		if parser.UnderlineConverter != nil {
+			return parser.UnderlineConverter(str, ctx)
+		}
+	case "tt", "code":
+		if parser.MonospaceConverter != nil {
+			return parser.MonospaceConverter(str, ctx)
+		}
+		return fmt.Sprintf("`%s`", str)
+	}
+	return str
+}
+
+func (parser *HTMLParser) spanToString(node *html.Node, stripLinebreak bool, ctx Context) string {
+	str := parser.nodeToTagAwareString(node.FirstChild, stripLinebreak, ctx)
+	if node.Data == "span" {
+		reason, isSpoiler := parser.maybeGetAttribute(node, "data-mx-spoiler")
+		if isSpoiler {
+			if parser.SpoilerConverter != nil {
+				str = parser.SpoilerConverter(str, reason, ctx)
+			} else if len(reason) > 0 {
+				str = fmt.Sprintf("||%s|%s||", reason, str)
+			} else {
+				str = fmt.Sprintf("||%s||", str)
+			}
+		}
+	}
+	if parser.ColorConverter != nil {
+		fg := parser.getAttribute(node, "data-mx-color")
+		if len(fg) == 0 && node.Data == "font" {
+			fg = parser.getAttribute(node, "color")
+		}
+		bg := parser.getAttribute(node, "data-mx-bg-color")
+		if len(bg) > 0 || len(fg) > 0 {
+			str = parser.ColorConverter(str, fg, bg, ctx)
+		}
+	}
+	return str
+}
+
+func (parser *HTMLParser) headerToString(node *html.Node, stripLinebreak bool, ctx Context) string {
+	children := parser.nodeToStrings(node.FirstChild, stripLinebreak, ctx)
+	length := int(node.Data[1] - '0')
+	prefix := strings.Repeat("#", length) + " "
+	return prefix + strings.Join(children, "")
+}
+
+func (parser *HTMLParser) blockquoteToString(node *html.Node, stripLinebreak bool, ctx Context) string {
+	str := parser.nodeToTagAwareString(node.FirstChild, stripLinebreak, ctx)
+	childrenArr := strings.Split(strings.TrimSpace(str), "\n")
+	// TODO make blockquote prefix configurable
+	for index, child := range childrenArr {
+		childrenArr[index] = "> " + child
+	}
+	return strings.Join(childrenArr, "\n")
+}
+
+func (parser *HTMLParser) linkToString(node *html.Node, stripLinebreak bool, ctx Context) string {
+	str := parser.nodeToTagAwareString(node.FirstChild, stripLinebreak, ctx)
+	href := parser.getAttribute(node, "href")
+	if len(href) == 0 {
+		return str
+	}
+	if parser.PillConverter != nil {
+		parsedMatrix, err := id.ParseMatrixURIOrMatrixToURL(href)
+		if err == nil && parsedMatrix != nil {
+			return parser.PillConverter(str, parsedMatrix.PrimaryIdentifier(), parsedMatrix.SecondaryIdentifier(), ctx)
+		}
+	}
+	if parser.LinkConverter != nil {
+		return parser.LinkConverter(str, href, ctx)
+	} else if str == href {
+		return str
+	}
+	return fmt.Sprintf("%s (%s)", str, href)
+}
+
+func (parser *HTMLParser) tagToString(node *html.Node, stripLinebreak bool, ctx Context) string {
+	switch node.Data {
+	case "blockquote":
+		return parser.blockquoteToString(node, stripLinebreak, ctx)
+	case "ol", "ul":
+		return parser.listToString(node, stripLinebreak, ctx)
+	case "h1", "h2", "h3", "h4", "h5", "h6":
+		return parser.headerToString(node, stripLinebreak, ctx)
+	case "br":
+		return parser.Newline
+	case "b", "strong", "i", "em", "s", "strike", "del", "u", "ins", "tt", "code":
+		return parser.basicFormatToString(node, stripLinebreak, ctx)
+	case "span", "font":
+		return parser.spanToString(node, stripLinebreak, ctx)
+	case "a":
+		return parser.linkToString(node, stripLinebreak, ctx)
+	case "p":
+		return parser.nodeToTagAwareString(node.FirstChild, stripLinebreak, ctx)
+	case "hr":
+		return parser.HorizontalLine
+	case "pre":
+		var preStr, language string
+		if node.FirstChild != nil && node.FirstChild.Type == html.ElementNode && node.FirstChild.Data == "code" {
+			class := parser.getAttribute(node.FirstChild, "class")
+			if strings.HasPrefix(class, "language-") {
+				language = class[len("language-"):]
+			}
+			preStr = parser.nodeToString(node.FirstChild.FirstChild, false, ctx)
+		} else {
+			preStr = parser.nodeToString(node.FirstChild, false, ctx)
+		}
+		if parser.MonospaceBlockConverter != nil {
+			return parser.MonospaceBlockConverter(preStr, language, ctx)
+		}
+		if len(preStr) == 0 || preStr[len(preStr)-1] != '\n' {
+			preStr += "\n"
+		}
+		return fmt.Sprintf("```%s\n%s```", language, preStr)
+	default:
+		return parser.nodeToTagAwareString(node.FirstChild, stripLinebreak, ctx)
+	}
+}
+
+func (parser *HTMLParser) singleNodeToString(node *html.Node, stripLinebreak bool, ctx Context) TaggedString {
+	switch node.Type {
+	case html.TextNode:
+		if stripLinebreak {
+			node.Data = strings.Replace(node.Data, "\n", "", -1)
+		}
+		if parser.TextConverter != nil {
+			node.Data = parser.TextConverter(node.Data, ctx)
+		}
+		return TaggedString{node.Data, "text"}
+	case html.ElementNode:
+		return TaggedString{parser.tagToString(node, stripLinebreak, ctx), node.Data}
+	case html.DocumentNode:
+		return TaggedString{parser.nodeToTagAwareString(node.FirstChild, stripLinebreak, ctx), "html"}
+	default:
+		return TaggedString{"", "unknown"}
+	}
+}
+
+func (parser *HTMLParser) nodeToTaggedStrings(node *html.Node, stripLinebreak bool, ctx Context) (strs []TaggedString) {
+	for ; node != nil; node = node.NextSibling {
+		strs = append(strs, parser.singleNodeToString(node, stripLinebreak, ctx))
+	}
+	return
+}
+
+var BlockTags = []string{"p", "h1", "h2", "h3", "h4", "h5", "h6", "ol", "ul", "pre", "blockquote", "div", "hr", "table"}
+
+func (parser *HTMLParser) isBlockTag(tag string) bool {
+	for _, blockTag := range BlockTags {
+		if tag == blockTag {
+			return true
+		}
+	}
+	return false
+}
+
+func (parser *HTMLParser) nodeToTagAwareString(node *html.Node, stripLinebreak bool, ctx Context) string {
+	strs := parser.nodeToTaggedStrings(node, stripLinebreak, ctx)
+	var output strings.Builder
+	for _, str := range strs {
+		tstr := str.string
+		if parser.isBlockTag(str.tag) {
+			tstr = fmt.Sprintf("\n%s\n", tstr)
+		}
+		output.WriteString(tstr)
+	}
+	return strings.TrimSpace(output.String())
+}
+
+func (parser *HTMLParser) nodeToStrings(node *html.Node, stripLinebreak bool, ctx Context) (strs []string) {
+	for ; node != nil; node = node.NextSibling {
+		strs = append(strs, parser.singleNodeToString(node, stripLinebreak, ctx).string)
+	}
+	return
+}
+
+func (parser *HTMLParser) nodeToString(node *html.Node, stripLinebreak bool, ctx Context) string {
+	return strings.Join(parser.nodeToStrings(node, stripLinebreak, ctx), "")
+}
+
+// Parse converts Matrix HTML into text using the settings in this parser.
+func (parser *HTMLParser) Parse(htmlData string, ctx Context) string {
+	if parser.TabsToSpaces >= 0 {
+		htmlData = strings.Replace(htmlData, "\t", strings.Repeat(" ", parser.TabsToSpaces), -1)
+	}
+	node, _ := html.Parse(strings.NewReader(htmlData))
+	return parser.nodeToTagAwareString(node, true, ctx)
+}
+
+// HTMLToText converts Matrix HTML into text with the default settings.
+func HTMLToText(html string) string {
+	return (&HTMLParser{
+		TabsToSpaces:   4,
+		Newline:        "\n",
+		HorizontalLine: "\n---\n",
+		PillConverter:  DefaultPillConverter,
+	}).Parse(html, make(Context))
+}
+
+// HTMLToMarkdown converts Matrix HTML into markdown with the default settings.
+//
+// Currently, the only difference to HTMLToText is how links are formatted.
+func HTMLToMarkdown(html string) string {
+	return (&HTMLParser{
+		TabsToSpaces:   4,
+		Newline:        "\n",
+		HorizontalLine: "\n---\n",
+		PillConverter:  DefaultPillConverter,
+		LinkConverter: func(text, href string, ctx Context) string {
+			if text == href {
+				return text
+			}
+			return fmt.Sprintf("[%s](%s)", text, href)
+		},
+	}).Parse(html, make(Context))
+}