Files
postmoogle/vendor/github.com/jhillyerd/enmime/header.go
2022-11-16 12:08:51 +02:00

243 lines
7.2 KiB
Go

package enmime
import (
"bufio"
"bytes"
"fmt"
"mime"
"net/mail"
"net/textproto"
"strings"
"github.com/jhillyerd/enmime/internal/coding"
"github.com/jhillyerd/enmime/internal/stringutil"
"github.com/jhillyerd/enmime/mediatype"
"github.com/pkg/errors"
)
const (
// Standard MIME content dispositions
cdAttachment = "attachment"
cdInline = "inline"
// Standard MIME content types
ctAppOctetStream = "application/octet-stream"
ctMultipartAltern = "multipart/alternative"
ctMultipartMixed = "multipart/mixed"
ctMultipartPrefix = "multipart/"
ctMultipartRelated = "multipart/related"
ctTextPlain = "text/plain"
ctTextHTML = "text/html"
// Standard Transfer encodings
cte7Bit = "7bit"
cte8Bit = "8bit"
cteBase64 = "base64"
cteBinary = "binary"
cteQuotedPrintable = "quoted-printable"
// Standard MIME header names
hnContentDisposition = "Content-Disposition"
hnContentEncoding = "Content-Transfer-Encoding"
hnContentID = "Content-ID"
hnContentType = "Content-Type"
hnMIMEVersion = "MIME-Version"
// Standard MIME header parameters
hpBoundary = "boundary"
hpCharset = "charset"
hpFile = "file"
hpFilename = "filename"
hpName = "name"
hpModDate = "modification-date"
utf8 = "utf-8"
)
// AddressHeaders is the set of SMTP headers that contain email addresses, used by
// Envelope.AddressList(). Key characters must be all lowercase.
var AddressHeaders = map[string]bool{
"bcc": true,
"cc": true,
"delivered-to": true,
"from": true,
"reply-to": true,
"to": true,
"sender": true,
"resent-bcc": true,
"resent-cc": true,
"resent-from": true,
"resent-reply-to": true,
"resent-to": true,
"resent-sender": true,
}
// ParseAddressList returns a mail.Address slice with RFC 2047 encoded names converted to UTF-8.
// It is more tolerant of malformed headers than the ParseAddressList func provided in Go's net/mail
// package.
func ParseAddressList(list string) ([]*mail.Address, error) {
parser := mail.AddressParser{WordDecoder: coding.NewExtMimeDecoder()}
ret, err := parser.ParseList(list)
if err != nil {
switch err.Error() {
case "mail: expected comma":
// Attempt to add commas and parse again.
return parser.ParseList(stringutil.EnsureCommaDelimitedAddresses(list))
case "mail: no address":
return nil, mail.ErrHeaderNotPresent
}
return nil, err
}
for i := range ret {
// try to additionally decode with less strict decoder
ret[i].Name = coding.DecodeExtHeader(ret[i].Name)
ret[i].Address = coding.DecodeExtHeader(ret[i].Address)
}
return ret, nil
}
// Terminology from RFC 2047:
// encoded-word: the entire =?charset?encoding?encoded-text?= string
// charset: the character set portion of the encoded word
// encoding: the character encoding type used for the encoded-text
// encoded-text: the text we are decoding
// ParseMediaType is a more tolerant implementation of Go's mime.ParseMediaType function.
//
// Tolerances accounted for:
// * Missing ';' between content-type and media parameters
// * Repeating media parameters
// * Unquoted values in media parameters containing 'tspecials' characters
func ParseMediaType(ctype string) (mtype string, params map[string]string, invalidParams []string,
err error) {
// Export of internal function.
return mediatype.Parse(ctype)
}
// readHeader reads a block of SMTP or MIME headers and returns a textproto.MIMEHeader.
// Header parse warnings & errors will be added to p.Errors, io errors will be returned directly.
func readHeader(r *bufio.Reader, p *Part) (textproto.MIMEHeader, error) {
// buf holds the massaged output for textproto.Reader.ReadMIMEHeader()
buf := &bytes.Buffer{}
tp := textproto.NewReader(r)
firstHeader := true
for {
// Pull out each line of the headers as a temporary slice s
s, err := tp.ReadLineBytes()
if err != nil {
buf.Write([]byte{'\r', '\n'})
break
}
firstColon := bytes.IndexByte(s, ':')
firstSpace := bytes.IndexAny(s, " \t\n\r")
if firstSpace == 0 {
// Starts with space: continuation
buf.WriteByte(' ')
buf.Write(textproto.TrimBytes(s))
continue
}
if firstColon == 0 {
// Can't parse line starting with colon: skip
p.addError(ErrorMalformedHeader, "Header line %q started with a colon", s)
continue
}
if firstColon > 0 {
// Contains a colon, treat as a new header line
if !firstHeader {
// New Header line, end the previous
buf.Write([]byte{'\r', '\n'})
}
// Behavior change in net/textproto package in Golang 1.12.10 and 1.13.1:
// A space preceding the first colon in a header line is no longer handled
// automatically due to CVE-2019-16276 which takes advantage of this
// particular violation of RFC-7230 to exploit HTTP/1.1
if bytes.Contains(s[:firstColon+1], []byte{' ', ':'}) {
s = bytes.Replace(s, []byte{' ', ':'}, []byte{':'}, 1)
}
s = textproto.TrimBytes(s)
buf.Write(s)
firstHeader = false
} else {
// No colon: potential non-indented continuation
if len(s) > 0 {
// Attempt to detect and repair a non-indented continuation of previous line
buf.WriteByte(' ')
buf.Write(s)
p.addWarning(ErrorMalformedHeader, "Continued line %q was not indented", s)
} else {
// Empty line, finish header parsing
buf.Write([]byte{'\r', '\n'})
break
}
}
}
buf.Write([]byte{'\r', '\n'})
tr := textproto.NewReader(bufio.NewReader(buf))
header, err := tr.ReadMIMEHeader()
return header, errors.WithStack(err)
}
// decodeToUTF8Base64Header decodes a MIME header per RFC 2047, reencoding to =?utf-8b?
func decodeToUTF8Base64Header(input string) string {
if !strings.Contains(input, "=?") {
// Don't scan if there is nothing to do here
return input
}
// The standard lib performs an incremental inspection of this string, where the
// "skipSpace" method only strings.trimLeft for spaces and tabs. Here we have a
// hard dependency on space existing and not on next expected rune.
//
// For resolving #112 with the least change, I will implement the
// "quoted display-name" detector, which will resolve the case specific
// issue stated in #112, but only in the case of a quoted display-name
// followed, without whitespace, by addr-spec.
tokens := strings.FieldsFunc(quotedDisplayName(input), whiteSpaceRune)
output := make([]string, len(tokens))
for i, token := range tokens {
if len(token) > 4 && strings.Contains(token, "=?") {
// Stash parenthesis, they should not be encoded
prefix := ""
suffix := ""
if token[0] == '(' {
prefix = "("
token = token[1:]
}
if token[len(token)-1] == ')' {
suffix = ")"
token = token[:len(token)-1]
}
// Base64 encode token
output[i] = prefix +
mime.BEncoding.Encode("UTF-8", coding.DecodeExtHeader(token)) +
suffix
} else {
output[i] = token
}
}
// Return space separated tokens
return strings.Join(output, " ")
}
func quotedDisplayName(s string) string {
if !strings.HasPrefix(s, "\"") {
return s
}
idx := strings.LastIndex(s, "\"")
return fmt.Sprintf("%s %s", s[:idx+1], s[idx+1:])
}
// Detects a RFC-822 linear-white-space, passed to strings.FieldsFunc.
func whiteSpaceRune(r rune) bool {
return r == ' ' || r == '\t' || r == '\r' || r == '\n'
}