add vendoring

This commit is contained in:
Aine
2022-11-16 12:08:51 +02:00
parent 14751cbf3a
commit c1d33fe3cb
1104 changed files with 759066 additions and 0 deletions

View File

@@ -0,0 +1,64 @@
package coding
import (
"fmt"
"io"
)
// base64CleanerTable notes byte values that should be stripped (-2), stripped w/ error (-1).
var base64CleanerTable = []int8{
-1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -1, -1, -2, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -2, -1, -1,
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
}
// Base64Cleaner improves the tolerance of in Go's built-in base64 decoder by stripping out
// characters that would cause decoding to fail.
type Base64Cleaner struct {
// Report of non-whitespace characters detected while cleaning base64 data.
Errors []error
r io.Reader
buffer [1024]byte
}
// Enforce io.Reader interface.
var _ io.Reader = &Base64Cleaner{}
// NewBase64Cleaner returns a Base64Cleaner object for the specified reader. Base64Cleaner
// implements the io.Reader interface.
func NewBase64Cleaner(r io.Reader) *Base64Cleaner {
return &Base64Cleaner{
Errors: make([]error, 0),
r: r,
}
}
// Read method for io.Reader interface.
func (bc *Base64Cleaner) Read(p []byte) (n int, err error) {
// Size our buf to smallest of len(p) or len(bc.buffer).
size := len(bc.buffer)
if size > len(p) {
size = len(p)
}
buf := bc.buffer[:size]
bn, err := bc.r.Read(buf)
for i := 0; i < bn; i++ {
switch base64CleanerTable[buf[i]&0x7f] {
case -2:
// Strip these silently: tab, \n, \r, space, equals sign.
case -1:
// Strip these, but warn the client.
bc.Errors = append(bc.Errors, fmt.Errorf("unexpected %q in base64 stream", buf[i]))
default:
p[n] = buf[i]
n++
}
}
return
}

View File

@@ -0,0 +1,339 @@
package coding
import (
"bytes"
"fmt"
"io"
"io/ioutil"
"regexp"
"strings"
"github.com/cention-sany/utf7"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
"golang.org/x/text/encoding/japanese"
"golang.org/x/text/encoding/korean"
"golang.org/x/text/encoding/simplifiedchinese"
"golang.org/x/text/encoding/traditionalchinese"
"golang.org/x/text/encoding/unicode"
"golang.org/x/text/transform"
)
const utf8 = "utf-8"
// encodings is based on golang.org/x/net/html/charset/table.go
var encodings = map[string]struct {
e encoding.Encoding
name string
}{
"unicode-1-1-utf-8": {encoding.Nop, utf8},
"utf-8": {encoding.Nop, utf8},
"utf8": {encoding.Nop, utf8},
"utf-7": {utf7.UTF7, "utf-7"},
"utf7": {utf7.UTF7, "utf-7"},
"866": {charmap.CodePage866, "ibm866"},
"cp866": {charmap.CodePage866, "ibm866"},
"csibm866": {charmap.CodePage866, "ibm866"},
"ibm866": {charmap.CodePage866, "ibm866"},
"csisolatin2": {charmap.ISO8859_2, "iso-8859-2"},
"iso-8859-2": {charmap.ISO8859_2, "iso-8859-2"},
"iso-ir-101": {charmap.ISO8859_2, "iso-8859-2"},
"iso8859-2": {charmap.ISO8859_2, "iso-8859-2"},
"iso88592": {charmap.ISO8859_2, "iso-8859-2"},
"iso_8859-2": {charmap.ISO8859_2, "iso-8859-2"},
"iso_8859-2:1987": {charmap.ISO8859_2, "iso-8859-2"},
"l2": {charmap.ISO8859_2, "iso-8859-2"},
"latin2": {charmap.ISO8859_2, "iso-8859-2"},
"csisolatin3": {charmap.ISO8859_3, "iso-8859-3"},
"iso-8859-3": {charmap.ISO8859_3, "iso-8859-3"},
"iso-ir-109": {charmap.ISO8859_3, "iso-8859-3"},
"iso8859-3": {charmap.ISO8859_3, "iso-8859-3"},
"iso88593": {charmap.ISO8859_3, "iso-8859-3"},
"iso_8859-3": {charmap.ISO8859_3, "iso-8859-3"},
"iso_8859-3:1988": {charmap.ISO8859_3, "iso-8859-3"},
"l3": {charmap.ISO8859_3, "iso-8859-3"},
"latin3": {charmap.ISO8859_3, "iso-8859-3"},
"csisolatin4": {charmap.ISO8859_4, "iso-8859-4"},
"iso-8859-4": {charmap.ISO8859_4, "iso-8859-4"},
"iso-ir-110": {charmap.ISO8859_4, "iso-8859-4"},
"iso8859-4": {charmap.ISO8859_4, "iso-8859-4"},
"iso88594": {charmap.ISO8859_4, "iso-8859-4"},
"iso_8859-4": {charmap.ISO8859_4, "iso-8859-4"},
"iso_8859-4:1988": {charmap.ISO8859_4, "iso-8859-4"},
"l4": {charmap.ISO8859_4, "iso-8859-4"},
"latin4": {charmap.ISO8859_4, "iso-8859-4"},
"csisolatincyrillic": {charmap.ISO8859_5, "iso-8859-5"},
"cyrillic": {charmap.ISO8859_5, "iso-8859-5"},
"iso-8859-5": {charmap.ISO8859_5, "iso-8859-5"},
"iso-ir-144": {charmap.ISO8859_5, "iso-8859-5"},
"iso8859-5": {charmap.ISO8859_5, "iso-8859-5"},
"iso88595": {charmap.ISO8859_5, "iso-8859-5"},
"iso_8859-5": {charmap.ISO8859_5, "iso-8859-5"},
"iso_8859-5:1988": {charmap.ISO8859_5, "iso-8859-5"},
"arabic": {charmap.ISO8859_6, "iso-8859-6"},
"asmo-708": {charmap.ISO8859_6, "iso-8859-6"},
"csiso88596e": {charmap.ISO8859_6, "iso-8859-6"},
"csiso88596i": {charmap.ISO8859_6, "iso-8859-6"},
"csisolatinarabic": {charmap.ISO8859_6, "iso-8859-6"},
"ecma-114": {charmap.ISO8859_6, "iso-8859-6"},
"iso-8859-6": {charmap.ISO8859_6, "iso-8859-6"},
"iso-8859-6-e": {charmap.ISO8859_6, "iso-8859-6"},
"iso-8859-6-i": {charmap.ISO8859_6, "iso-8859-6"},
"iso-ir-127": {charmap.ISO8859_6, "iso-8859-6"},
"iso8859-6": {charmap.ISO8859_6, "iso-8859-6"},
"iso88596": {charmap.ISO8859_6, "iso-8859-6"},
"iso_8859-6": {charmap.ISO8859_6, "iso-8859-6"},
"iso_8859-6:1987": {charmap.ISO8859_6, "iso-8859-6"},
"csisolatingreek": {charmap.ISO8859_7, "iso-8859-7"},
"ecma-118": {charmap.ISO8859_7, "iso-8859-7"},
"elot_928": {charmap.ISO8859_7, "iso-8859-7"},
"greek": {charmap.ISO8859_7, "iso-8859-7"},
"greek8": {charmap.ISO8859_7, "iso-8859-7"},
"iso-8859-7": {charmap.ISO8859_7, "iso-8859-7"},
"iso-ir-126": {charmap.ISO8859_7, "iso-8859-7"},
"iso8859-7": {charmap.ISO8859_7, "iso-8859-7"},
"iso88597": {charmap.ISO8859_7, "iso-8859-7"},
"iso_8859-7": {charmap.ISO8859_7, "iso-8859-7"},
"iso_8859-7:1987": {charmap.ISO8859_7, "iso-8859-7"},
"sun_eu_greek": {charmap.ISO8859_7, "iso-8859-7"},
"csiso88598e": {charmap.ISO8859_8, "iso-8859-8"},
"csisolatinhebrew": {charmap.ISO8859_8, "iso-8859-8"},
"hebrew": {charmap.ISO8859_8, "iso-8859-8"},
"iso-8859-8": {charmap.ISO8859_8, "iso-8859-8"},
"iso-8859-8-e": {charmap.ISO8859_8, "iso-8859-8"},
"iso-ir-138": {charmap.ISO8859_8, "iso-8859-8"},
"iso8859-8": {charmap.ISO8859_8, "iso-8859-8"},
"iso88598": {charmap.ISO8859_8, "iso-8859-8"},
"iso_8859-8": {charmap.ISO8859_8, "iso-8859-8"},
"iso_8859-8:1988": {charmap.ISO8859_8, "iso-8859-8"},
"visual": {charmap.ISO8859_8, "iso-8859-8"},
"csiso88598i": {charmap.ISO8859_8, "iso-8859-8-i"},
"iso-8859-8-i": {charmap.ISO8859_8, "iso-8859-8-i"},
"logical": {charmap.ISO8859_8, "iso-8859-8-i"},
"csisolatin6": {charmap.ISO8859_10, "iso-8859-10"},
"iso-8859-10": {charmap.ISO8859_10, "iso-8859-10"},
"iso-ir-157": {charmap.ISO8859_10, "iso-8859-10"},
"iso8859-10": {charmap.ISO8859_10, "iso-8859-10"},
"iso885910": {charmap.ISO8859_10, "iso-8859-10"},
"l6": {charmap.ISO8859_10, "iso-8859-10"},
"latin6": {charmap.ISO8859_10, "iso-8859-10"},
"iso-8859-13": {charmap.ISO8859_13, "iso-8859-13"},
"iso8859-13": {charmap.ISO8859_13, "iso-8859-13"},
"iso885913": {charmap.ISO8859_13, "iso-8859-13"},
"iso-8859-14": {charmap.ISO8859_14, "iso-8859-14"},
"iso8859-14": {charmap.ISO8859_14, "iso-8859-14"},
"iso885914": {charmap.ISO8859_14, "iso-8859-14"},
"csisolatin9": {charmap.ISO8859_15, "iso-8859-15"},
"iso-8859-15": {charmap.ISO8859_15, "iso-8859-15"},
"iso8859-15": {charmap.ISO8859_15, "iso-8859-15"},
"iso885915": {charmap.ISO8859_15, "iso-8859-15"},
"iso_8859-15": {charmap.ISO8859_15, "iso-8859-15"},
"l9": {charmap.ISO8859_15, "iso-8859-15"},
"iso-8859-16": {charmap.ISO8859_16, "iso-8859-16"},
"cskoi8r": {charmap.KOI8R, "koi8-r"},
"koi": {charmap.KOI8R, "koi8-r"},
"koi8": {charmap.KOI8R, "koi8-r"},
"koi8-r": {charmap.KOI8R, "koi8-r"},
"koi8_r": {charmap.KOI8R, "koi8-r"},
"koi8-u": {charmap.KOI8U, "koi8-u"},
"csmacintosh": {charmap.Macintosh, "macintosh"},
"mac": {charmap.Macintosh, "macintosh"},
"macintosh": {charmap.Macintosh, "macintosh"},
"x-mac-roman": {charmap.Macintosh, "macintosh"},
"dos-874": {charmap.Windows874, "windows-874"},
"iso-8859-11": {charmap.Windows874, "windows-874"},
"iso8859-11": {charmap.Windows874, "windows-874"},
"iso885911": {charmap.Windows874, "windows-874"},
"tis-620": {charmap.Windows874, "windows-874"},
"windows-874": {charmap.Windows874, "windows-874"},
"cp1250": {charmap.Windows1250, "windows-1250"},
"windows-1250": {charmap.Windows1250, "windows-1250"},
"x-cp1250": {charmap.Windows1250, "windows-1250"},
"cp1251": {charmap.Windows1251, "windows-1251"},
"windows-1251": {charmap.Windows1251, "windows-1251"},
"x-cp1251": {charmap.Windows1251, "windows-1251"},
"ansi_x3.4-1968": {charmap.Windows1252, "windows-1252"},
"ascii": {charmap.Windows1252, "windows-1252"},
"cp1252": {charmap.Windows1252, "windows-1252"},
"cp819": {charmap.Windows1252, "windows-1252"},
"csisolatin1": {charmap.Windows1252, "windows-1252"},
"ibm819": {charmap.Windows1252, "windows-1252"},
"iso-8859-1": {charmap.ISO8859_1, "iso-8859-1"},
"iso-ir-100": {charmap.Windows1252, "windows-1252"},
"iso8859-1": {charmap.ISO8859_1, "iso-8859-1"},
"iso8859_1": {charmap.ISO8859_1, "iso-8859-1"},
"iso88591": {charmap.ISO8859_1, "iso-8859-1"},
"iso_8859-1": {charmap.ISO8859_1, "iso-8859-1"},
"iso_8859-1:1987": {charmap.ISO8859_1, "iso-8859-1"},
"l1": {charmap.Windows1252, "windows-1252"},
"latin1": {charmap.Windows1252, "windows-1252"},
"us-ascii": {charmap.Windows1252, "windows-1252"},
"windows-1252": {charmap.Windows1252, "windows-1252"},
"x-cp1252": {charmap.Windows1252, "windows-1252"},
"cp1253": {charmap.Windows1253, "windows-1253"},
"windows-1253": {charmap.Windows1253, "windows-1253"},
"x-cp1253": {charmap.Windows1253, "windows-1253"},
"cp1254": {charmap.Windows1254, "windows-1254"},
"csisolatin5": {charmap.Windows1254, "windows-1254"},
"iso-8859-9": {charmap.Windows1254, "windows-1254"},
"iso-ir-148": {charmap.Windows1254, "windows-1254"},
"iso8859-9": {charmap.Windows1254, "windows-1254"},
"iso88599": {charmap.Windows1254, "windows-1254"},
"iso_8859-9": {charmap.Windows1254, "windows-1254"},
"iso_8859-9:1989": {charmap.Windows1254, "windows-1254"},
"l5": {charmap.Windows1254, "windows-1254"},
"latin5": {charmap.Windows1254, "windows-1254"},
"windows-1254": {charmap.Windows1254, "windows-1254"},
"x-cp1254": {charmap.Windows1254, "windows-1254"},
"cp1255": {charmap.Windows1255, "windows-1255"},
"windows-1255": {charmap.Windows1255, "windows-1255"},
"x-cp1255": {charmap.Windows1255, "windows-1255"},
"cp1256": {charmap.Windows1256, "windows-1256"},
"windows-1256": {charmap.Windows1256, "windows-1256"},
"x-cp1256": {charmap.Windows1256, "windows-1256"},
"cp1257": {charmap.Windows1257, "windows-1257"},
"windows-1257": {charmap.Windows1257, "windows-1257"},
"x-cp1257": {charmap.Windows1257, "windows-1257"},
"cp1258": {charmap.Windows1258, "windows-1258"},
"windows-1258": {charmap.Windows1258, "windows-1258"},
"x-cp1258": {charmap.Windows1258, "windows-1258"},
"x-mac-cyrillic": {charmap.MacintoshCyrillic, "x-mac-cyrillic"},
"x-mac-ukrainian": {charmap.MacintoshCyrillic, "x-mac-cyrillic"},
"chinese": {simplifiedchinese.GBK, "gbk"},
"csgb2312": {simplifiedchinese.GBK, "gbk"},
"csiso58gb231280": {simplifiedchinese.GBK, "gbk"},
"gb2312": {simplifiedchinese.GBK, "gbk"},
"gb_2312": {simplifiedchinese.GBK, "gbk"},
"gb_2312-80": {simplifiedchinese.GBK, "gbk"},
"gbk": {simplifiedchinese.GBK, "gbk"},
"iso-ir-58": {simplifiedchinese.GBK, "gbk"},
"x-gbk": {simplifiedchinese.GBK, "gbk"},
"gb18030": {simplifiedchinese.GB18030, "gb18030"},
"hz-gb-2312": {simplifiedchinese.HZGB2312, "hz-gb-2312"},
"big5": {traditionalchinese.Big5, "big5"},
"big5-hkscs": {traditionalchinese.Big5, "big5"},
"cn-big5": {traditionalchinese.Big5, "big5"},
"csbig5": {traditionalchinese.Big5, "big5"},
"x-x-big5": {traditionalchinese.Big5, "big5"},
"cseucpkdfmtjapanese": {japanese.EUCJP, "euc-jp"},
"euc-jp": {japanese.EUCJP, "euc-jp"},
"x-euc-jp": {japanese.EUCJP, "euc-jp"},
"csiso2022jp": {japanese.ISO2022JP, "iso-2022-jp"},
"iso-2022-jp": {japanese.ISO2022JP, "iso-2022-jp"},
"csshiftjis": {japanese.ShiftJIS, "shift_jis"},
"ms_kanji": {japanese.ShiftJIS, "shift_jis"},
"shift-jis": {japanese.ShiftJIS, "shift_jis"},
"shift_jis": {japanese.ShiftJIS, "shift_jis"},
"sjis": {japanese.ShiftJIS, "shift_jis"},
"windows-31j": {japanese.ShiftJIS, "shift_jis"},
"x-sjis": {japanese.ShiftJIS, "shift_jis"},
"cseuckr": {korean.EUCKR, "euc-kr"},
"csksc56011987": {korean.EUCKR, "euc-kr"},
"euc-kr": {korean.EUCKR, "euc-kr"},
"iso-ir-149": {korean.EUCKR, "euc-kr"},
"korean": {korean.EUCKR, "euc-kr"},
"ks_c_5601-1987": {korean.EUCKR, "euc-kr"},
"ks_c_5601-1989": {korean.EUCKR, "euc-kr"},
"ksc5601": {korean.EUCKR, "euc-kr"},
"ksc_5601": {korean.EUCKR, "euc-kr"},
"windows-949": {korean.EUCKR, "euc-kr"},
"csiso2022kr": {encoding.Replacement, "replacement"},
"iso-2022-kr": {encoding.Replacement, "replacement"},
"iso-2022-cn": {encoding.Replacement, "replacement"},
"iso-2022-cn-ext": {encoding.Replacement, "replacement"},
"utf-16be": {unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM), "utf-16be"},
"utf-16": {unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM), "utf-16le"},
"utf-16le": {unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM), "utf-16le"},
"x-user-defined": {charmap.XUserDefined, "x-user-defined"},
"iso646-us": {charmap.Windows1252, "windows-1252"}, // ISO646 isn't us-ascii but 1991 version is.
"iso: western": {charmap.Windows1252, "windows-1252"}, // same as iso-8859-1
"we8iso8859p1": {charmap.Windows1252, "windows-1252"}, // same as iso-8859-1
"cp936": {simplifiedchinese.GBK, "gbk"}, // same as gb2312
"cp850": {charmap.CodePage850, "cp850"},
"cp-850": {charmap.CodePage850, "cp850"},
"ibm850": {charmap.CodePage850, "cp850"},
"136": {traditionalchinese.Big5, "big5"}, // same as chinese big5
"cp932": {japanese.ShiftJIS, "shift_jis"},
"8859-1": {charmap.Windows1252, "windows-1252"},
"8859_1": {charmap.Windows1252, "windows-1252"},
"8859-2": {charmap.ISO8859_2, "iso-8859-2"},
"8859_2": {charmap.ISO8859_2, "iso-8859-2"},
"8859-3": {charmap.ISO8859_3, "iso-8859-3"},
"8859_3": {charmap.ISO8859_3, "iso-8859-3"},
"8859-4": {charmap.ISO8859_4, "iso-8859-4"},
"8859_4": {charmap.ISO8859_4, "iso-8859-4"},
"8859-5": {charmap.ISO8859_5, "iso-8859-5"},
"8859_5": {charmap.ISO8859_5, "iso-8859-5"},
"8859-6": {charmap.ISO8859_6, "iso-8859-6"},
"8859_6": {charmap.ISO8859_6, "iso-8859-6"},
"8859-7": {charmap.ISO8859_7, "iso-8859-7"},
"8859_7": {charmap.ISO8859_7, "iso-8859-7"},
"8859-8": {charmap.ISO8859_8, "iso-8859-8"},
"8859_8": {charmap.ISO8859_8, "iso-8859-8"},
"8859-10": {charmap.ISO8859_10, "iso-8859-10"},
"8859_10": {charmap.ISO8859_10, "iso-8859-10"},
"8859-13": {charmap.ISO8859_13, "iso-8859-13"},
"8859_13": {charmap.ISO8859_13, "iso-8859-13"},
"8859-14": {charmap.ISO8859_14, "iso-8859-14"},
"8859_14": {charmap.ISO8859_14, "iso-8859-14"},
"8859-15": {charmap.ISO8859_15, "iso-8859-15"},
"8859_15": {charmap.ISO8859_15, "iso-8859-15"},
"8859-16": {charmap.ISO8859_16, "iso-8859-16"},
"8859_16": {charmap.ISO8859_16, "iso-8859-16"},
"utf8mb4": {encoding.Nop, "utf-8"}, // emojis, but golang can handle it directly
"238": {charmap.Windows1250, "windows-1250"},
}
var metaTagCharsetRegexp = regexp.MustCompile(
`(?i)<meta.*charset="?\s*(?P<charset>[a-zA-Z0-9_.:-]+)\s*"?`)
var metaTagCharsetIndex int
func init() {
// Find the submatch index for charset in metaTagCharsetRegexp
for i, name := range metaTagCharsetRegexp.SubexpNames() {
if name == "charset" {
metaTagCharsetIndex = i
break
}
}
}
// ConvertToUTF8String uses the provided charset to decode a slice of bytes into a normal
// UTF-8 string.
func ConvertToUTF8String(charset string, textBytes []byte) (string, error) {
csentry, ok := encodings[strings.ToLower(charset)]
if !ok {
return "", fmt.Errorf("unsupported charset %q", charset)
}
input := bytes.NewReader(textBytes)
reader := transform.NewReader(input, csentry.e.NewDecoder())
output, err := ioutil.ReadAll(reader)
if err != nil {
return "", err
}
return string(output), nil
}
// NewCharsetReader generates charset-conversion readers, converting from the provided charset into
// UTF-8. CharsetReader is a factory signature defined by Go's mime.WordDecoder.
//
// This function is similar to: https://godoc.org/golang.org/x/net/html/charset#NewReaderLabel
func NewCharsetReader(charset string, input io.Reader) (io.Reader, error) {
if strings.ToLower(charset) == utf8 {
return input, nil
}
csentry, ok := encodings[strings.ToLower(charset)]
if !ok {
return nil, fmt.Errorf("unsupported charset %q", charset)
}
return transform.NewReader(input, csentry.e.NewDecoder()), nil
}
// FindCharsetInHTML looks for charset in the HTML meta tag (v4.01 and v5).
func FindCharsetInHTML(html string) string {
charsetMatches := metaTagCharsetRegexp.FindAllStringSubmatch(html, -1)
if len(charsetMatches) > 0 {
return charsetMatches[0][metaTagCharsetIndex]
}
return ""
}

View File

@@ -0,0 +1,135 @@
package coding
import (
"fmt"
"io"
"mime"
"strings"
)
// NewExtMimeDecoder creates new MIME word decoder which allows decoding of additional charsets.
func NewExtMimeDecoder() *mime.WordDecoder {
return &mime.WordDecoder{
CharsetReader: NewCharsetReader,
}
}
// DecodeExtHeader decodes a single line (per RFC 2047, aka Message Header Extensions) using Golang's
// mime.WordDecoder.
func DecodeExtHeader(input string) string {
if !strings.Contains(input, "=?") {
// Don't scan if there is nothing to do here
return input
}
header, err := NewExtMimeDecoder().DecodeHeader(input)
if err != nil {
return input
}
return header
}
// RFC2047Decode returns a decoded string if the input uses RFC2047 encoding, otherwise it will
// return the input.
//
// RFC2047 Example: `=?UTF-8?B?bmFtZT0iw7DCn8KUwoo=?=`
func RFC2047Decode(s string) string {
// Convert CR/LF to spaces.
s = strings.Map(func(r rune) rune {
if r == '\n' || r == '\r' {
return ' '
}
return r
}, s)
var err error
decoded := false
for {
s, err = rfc2047Recurse(s)
switch err {
case nil:
decoded = true
continue
default:
if decoded {
keyValuePair := strings.SplitAfter(s, "=")
if len(keyValuePair) < 2 {
return s
}
// Add quotes as needed.
if !strings.HasPrefix(keyValuePair[1], "\"") {
keyValuePair[1] = fmt.Sprintf("\"%s", keyValuePair[1])
}
if !strings.HasSuffix(keyValuePair[1], "\"") {
keyValuePair[1] = fmt.Sprintf("%s\"", keyValuePair[1])
}
return strings.Join(keyValuePair, "")
}
return s
}
}
}
// rfc2047Recurse is called for if the value contains content encoded in RFC2047 format and decodes
// it.
func rfc2047Recurse(s string) (string, error) {
us := strings.ToUpper(s)
if !strings.Contains(us, "?Q?") && !strings.Contains(us, "?B?") {
return s, io.EOF
}
var val string
if val = DecodeExtHeader(s); val == s {
if val = DecodeExtHeader(fixRFC2047String(val)); val == s {
return val, io.EOF
}
}
return val, nil
}
// fixRFC2047String removes the following characters from charset and encoding segments of an
// RFC2047 string: '\n', '\r' and ' '
func fixRFC2047String(s string) string {
inString := false
isWithinTerminatingEqualSigns := false
questionMarkCount := 0
sb := &strings.Builder{}
for _, v := range s {
switch v {
case '=':
if questionMarkCount == 3 {
inString = false
} else {
isWithinTerminatingEqualSigns = true
}
sb.WriteRune(v)
case '?':
if isWithinTerminatingEqualSigns {
inString = true
} else {
questionMarkCount++
}
isWithinTerminatingEqualSigns = false
sb.WriteRune(v)
case '\n', '\r', ' ':
if !inString {
sb.WriteRune(v)
}
isWithinTerminatingEqualSigns = false
default:
isWithinTerminatingEqualSigns = false
sb.WriteRune(v)
}
}
return sb.String()
}

View File

@@ -0,0 +1,26 @@
package coding
import (
"net/url"
"strings"
)
// FromIDHeader decodes a Content-ID or Message-ID header value (RFC 2392) into a utf-8 string.
// Example: "<foo%3fbar+baz>" becomes "foo?bar baz".
func FromIDHeader(v string) string {
if v == "" {
return v
}
v = strings.TrimLeft(v, "<")
v = strings.TrimRight(v, ">")
if r, err := url.QueryUnescape(v); err == nil {
v = r
}
return v
}
// ToIDHeader encodes a Content-ID or Message-ID header value (RFC 2392) from a utf-8 string.
func ToIDHeader(v string) string {
v = url.QueryEscape(v)
return "<" + strings.Replace(v, "%40", "@", -1) + ">"
}

View File

@@ -0,0 +1,161 @@
package coding
import (
"bufio"
"fmt"
"io"
)
// QPCleaner scans quoted printable content for invalid characters and encodes them so that
// Go's quoted-printable decoder does not abort with an error.
type QPCleaner struct {
in *bufio.Reader
overflow []byte
lineLen int
}
// MaxQPLineLen is the maximum line length we allow before inserting `=\r\n`. Prevents buffer
// overflows in mime/quotedprintable.Reader.
const MaxQPLineLen = 1024
var (
_ io.Reader = &QPCleaner{} // Assert QPCleaner implements io.Reader.
escapedEquals = []byte("=3D") // QP encoded value of an equals sign.
lineBreak = []byte("=\r\n")
)
// NewQPCleaner returns a QPCleaner for the specified reader.
func NewQPCleaner(r io.Reader) *QPCleaner {
return &QPCleaner{
in: bufio.NewReader(r),
overflow: nil,
lineLen: 0,
}
}
// Read method for io.Reader interface.
func (qp *QPCleaner) Read(dest []byte) (n int, err error) {
destLen := len(dest)
if len(qp.overflow) > 0 {
// Copy bytes that didn't fit into dest buffer during previous read.
n = copy(dest, qp.overflow)
qp.overflow = qp.overflow[n:]
}
// writeByte outputs a single byte, space for which will have already been ensured by the loop
// condition. Updates counters.
writeByte := func(in byte) {
dest[n] = in
n++
qp.lineLen++
}
// safeWriteByte outputs a single byte, storing overflow for next read. Updates counters.
safeWriteByte := func(in byte) {
if n < destLen {
dest[n] = in
n++
} else {
qp.overflow = append(qp.overflow, in)
}
qp.lineLen++
}
// writeBytes outputs multiple bytes, storing overflow for next read. Updates counters.
writeBytes := func(in []byte) {
nc := copy(dest[n:], in)
if nc < len(in) {
// Stash unwritten bytes into overflow.
qp.overflow = append(qp.overflow, []byte(in[nc:])...)
}
n += nc
qp.lineLen += len(in)
}
// ensureLineLen ensures there is room to write `requested` bytes, preventing a line break being
// inserted in the middle of the escaped string. The requested count is in addition to the
// byte that was already reserved for this loop iteration.
ensureLineLen := func(requested int) {
if qp.lineLen+requested >= MaxQPLineLen {
writeBytes(lineBreak)
qp.lineLen = 0
}
}
// Loop over bytes in qp.in ByteReader while there is space in dest.
for n < destLen {
var b byte
b, err = qp.in.ReadByte()
if err != nil {
return n, err
}
if qp.lineLen >= MaxQPLineLen {
writeBytes(lineBreak)
qp.lineLen = 0
if n == destLen {
break
}
}
switch {
// Pass valid hex bytes through, otherwise escapes the equals symbol.
case b == '=':
ensureLineLen(2)
var hexBytes []byte
hexBytes, err = qp.in.Peek(2)
if err != nil && err != io.EOF {
return 0, err
}
if validHexBytes(hexBytes) {
safeWriteByte(b)
} else {
writeBytes(escapedEquals)
}
// Valid special character.
case b == '\t':
writeByte(b)
// Valid special characters that reset line length.
case b == '\r' || b == '\n':
writeByte(b)
qp.lineLen = 0
// Invalid characters, render as quoted-printable.
case b < ' ' || '~' < b:
ensureLineLen(2)
writeBytes([]byte(fmt.Sprintf("=%02X", b)))
// Acceptable characters.
default:
writeByte(b)
}
}
return n, err
}
func validHexByte(b byte) bool {
return '0' <= b && b <= '9' || 'A' <= b && b <= 'F' || 'a' <= b && b <= 'f'
}
// validHexBytes returns true if this byte sequence represents a valid quoted-printable escape
// sequence or line break, minus the initial equals sign.
func validHexBytes(v []byte) bool {
if len(v) > 0 && v[0] == '\n' {
// Soft line break.
return true
}
if len(v) < 2 {
return false
}
if v[0] == '\r' && v[1] == '\n' {
// Soft line break.
return true
}
return validHexByte(v[0]) && validHexByte(v[1])
}

View File

@@ -0,0 +1,88 @@
package stringutil
import (
"bytes"
"net/mail"
"strings"
)
// JoinAddress formats a slice of Address structs such that they can be used in a To or Cc header.
func JoinAddress(addrs []mail.Address) string {
if len(addrs) == 0 {
return ""
}
buf := &bytes.Buffer{}
for i, a := range addrs {
if i > 0 {
_, _ = buf.WriteString(", ")
}
_, _ = buf.WriteString(a.String())
}
return buf.String()
}
// EnsureCommaDelimitedAddresses is used by AddressList to ensure that address lists are properly
// delimited.
func EnsureCommaDelimitedAddresses(s string) string {
// This normalizes the whitespace, but may interfere with CFWS (comments with folding whitespace)
// RFC-5322 3.4.0:
// because some legacy implementations interpret the comment,
// comments generally SHOULD NOT be used in address fields
// to avoid confusing such implementations.
s = strings.Join(strings.Fields(s), " ")
inQuotes := false
inDomain := false
escapeSequence := false
sb := strings.Builder{}
for i, r := range s {
if escapeSequence {
escapeSequence = false
sb.WriteRune(r)
continue
}
if r == '"' {
inQuotes = !inQuotes
sb.WriteRune(r)
continue
}
if inQuotes {
if r == '\\' {
escapeSequence = true
sb.WriteRune(r)
continue
}
} else {
if r == '@' {
inDomain = true
sb.WriteRune(r)
continue
}
if inDomain {
if r == ';' {
inDomain = false
if i == len(s)-1 {
// omit trailing semicolon
continue
}
sb.WriteRune(',')
continue
}
if r == ',' {
inDomain = false
sb.WriteRune(r)
continue
}
if r == ' ' {
inDomain = false
sb.WriteRune(',')
sb.WriteRune(r)
continue
}
}
}
sb.WriteRune(r)
}
return sb.String()
}

View File

@@ -0,0 +1,39 @@
package stringutil
// FindUnquoted returns the indexes of the instance of v in s, or empty slice if v is not present in s.
// It ignores v present inside quoted runs.
func FindUnquoted(s string, v rune, quote rune) []int {
escaped := false
quoted := false
indexes := make([]int, 0)
quotedIndexes := make([]int, 0)
for i := 0; i < len(s); i++ {
switch rune(s[i]) {
case escape:
escaped = !escaped // escape can escape itself.
case quote:
if escaped {
escaped = false
continue
}
quoted = !quoted
if !quoted {
quotedIndexes = quotedIndexes[:0] // drop possible indices inside quoted segment
}
case v:
escaped = false
if quoted {
quotedIndexes = append(quotedIndexes, i)
} else {
indexes = append(indexes, i)
}
default:
escaped = false
}
}
return append(indexes, quotedIndexes...)
}

View File

@@ -0,0 +1,45 @@
package stringutil
const escape = '\\'
// SplitUnquoted slices s into all substrings separated by sep and returns a slice of
// the substrings between those separators.
//
// If s does not contain sep and sep is not empty, SplitUnquoted returns a
// slice of length 1 whose only element is s.
//
// It ignores sep present inside quoted runs.
func SplitUnquoted(s string, sep rune, quote rune) []string {
return splitUnquoted(s, sep, quote, false)
}
// SplitAfterUnquoted slices s into all substrings after each instance of sep and
// returns a slice of those substrings.
//
// If s does not contain sep and sep is not empty, SplitAfterUnquoted returns
// a slice of length 1 whose only element is s.
//
// It ignores sep present inside quoted runs.
func SplitAfterUnquoted(s string, sep rune, quote rune) []string {
return splitUnquoted(s, sep, quote, true)
}
func splitUnquoted(s string, sep rune, quote rune, preserveSep bool) []string {
ixs := FindUnquoted(s, sep, quote)
if len(ixs) == 0 {
return []string{s}
}
start := 0
result := make([]string, 0, len(ixs)+1)
for _, ix := range ixs {
end := ix
if preserveSep {
end++
}
result = append(result, s[start:end])
start = ix + 1
}
return append(result, s[start:])
}

View File

@@ -0,0 +1,24 @@
package stringutil
import (
"fmt"
"math/rand"
"sync"
"time"
)
var uuidRand = rand.New(rand.NewSource(time.Now().UnixNano()))
var uuidMutex = &sync.Mutex{}
// UUID generates a random UUID according to RFC 4122.
func UUID() string {
uuid := make([]byte, 16)
uuidMutex.Lock()
_, _ = uuidRand.Read(uuid)
uuidMutex.Unlock()
// variant bits; see section 4.1.1
uuid[8] = uuid[8]&^0xc0 | 0x80
// version 4 (pseudo-random); see section 4.1.3
uuid[6] = uuid[6]&^0xf0 | 0x40
return fmt.Sprintf("%x-%x-%x-%x-%x", uuid[0:4], uuid[4:6], uuid[6:8], uuid[8:10], uuid[10:])
}

View File

@@ -0,0 +1,36 @@
package stringutil
// Wrap builds a byte slice from strs, wrapping on word boundaries before max chars
func Wrap(max int, strs ...string) []byte {
input := make([]byte, 0)
output := make([]byte, 0)
for _, s := range strs {
input = append(input, []byte(s)...)
}
if len(input) < max {
// Doesn't need to be wrapped
return input
}
ls := -1 // Last seen space index
lw := -1 // Last written byte index
ll := 0 // Length of current line
for i := 0; i < len(input); i++ {
ll++
switch input[i] {
case ' ', '\t':
ls = i
}
if ll >= max {
if ls >= 0 {
output = append(output, input[lw+1:ls]...)
output = append(output, '\r', '\n', ' ')
lw = ls // Jump over the space we broke on
ll = 1 // Count leading space above
// Rewind
i = lw + 1
ls = -1
}
}
}
return append(output, input[lw+1:]...)
}