346 lines
9.8 KiB
Go
346 lines
9.8 KiB
Go
package enmime
|
|
|
|
import (
|
|
"fmt"
|
|
"io"
|
|
"mime"
|
|
"net/mail"
|
|
"net/textproto"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/jaytaylor/html2text"
|
|
"github.com/jhillyerd/enmime/internal/coding"
|
|
"github.com/jhillyerd/enmime/mediatype"
|
|
"github.com/pkg/errors"
|
|
)
|
|
|
|
// Envelope is a simplified wrapper for MIME email messages.
|
|
type Envelope struct {
|
|
Text string // The plain text portion of the message
|
|
HTML string // The HTML portion of the message
|
|
Root *Part // The top-level Part
|
|
Attachments []*Part // All parts having a Content-Disposition of attachment
|
|
Inlines []*Part // All parts having a Content-Disposition of inline
|
|
// All non-text parts that were not placed in Attachments or Inlines, such as multipart/related
|
|
// content.
|
|
OtherParts []*Part
|
|
Errors []*Error // Errors encountered while parsing
|
|
header *textproto.MIMEHeader // Header from original message
|
|
}
|
|
|
|
// GetHeaderKeys returns a list of header keys seen in this message. Get
|
|
// individual headers with `GetHeader(name)`
|
|
func (e *Envelope) GetHeaderKeys() (headers []string) {
|
|
if e.header == nil {
|
|
return
|
|
}
|
|
for key := range *e.header {
|
|
headers = append(headers, key)
|
|
}
|
|
return headers
|
|
}
|
|
|
|
// GetHeader processes the specified header for RFC 2047 encoded words and returns the result as a
|
|
// UTF-8 string
|
|
func (e *Envelope) GetHeader(name string) string {
|
|
if e.header == nil {
|
|
return ""
|
|
}
|
|
return coding.DecodeExtHeader(e.header.Get(name))
|
|
}
|
|
|
|
// GetHeaderValues processes the specified header for RFC 2047 encoded words and returns all existing
|
|
// values as a list of UTF-8 strings
|
|
func (e *Envelope) GetHeaderValues(name string) []string {
|
|
if e.header == nil {
|
|
return []string{}
|
|
}
|
|
|
|
rawValues := (*e.header)[textproto.CanonicalMIMEHeaderKey(name)]
|
|
values := make([]string, 0, len(rawValues))
|
|
for _, v := range rawValues {
|
|
values = append(values, coding.DecodeExtHeader(v))
|
|
}
|
|
return values
|
|
}
|
|
|
|
// SetHeader sets given header name to the given value.
|
|
// If the header exists already, all existing values are replaced.
|
|
func (e *Envelope) SetHeader(name string, value []string) error {
|
|
if name == "" {
|
|
return fmt.Errorf("provide non-empty header name")
|
|
}
|
|
|
|
for i, v := range value {
|
|
if i == 0 {
|
|
e.header.Set(name, mime.BEncoding.Encode("utf-8", v))
|
|
continue
|
|
}
|
|
e.header.Add(name, mime.BEncoding.Encode("utf-8", v))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// AddHeader appends given header value to header name without changing existing values.
|
|
// If the header does not exist already, it will be created.
|
|
func (e *Envelope) AddHeader(name string, value string) error {
|
|
if name == "" {
|
|
return fmt.Errorf("provide non-empty header name")
|
|
}
|
|
|
|
e.header.Add(name, mime.BEncoding.Encode("utf-8", value))
|
|
return nil
|
|
}
|
|
|
|
// DeleteHeader deletes given header.
|
|
func (e *Envelope) DeleteHeader(name string) error {
|
|
if name == "" {
|
|
return fmt.Errorf("provide non-empty header name")
|
|
}
|
|
|
|
e.header.Del(name)
|
|
return nil
|
|
}
|
|
|
|
// AddressList returns a mail.Address slice with RFC 2047 encoded names converted to UTF-8
|
|
func (e *Envelope) AddressList(key string) ([]*mail.Address, error) {
|
|
if e.header == nil {
|
|
return nil, fmt.Errorf("no headers available")
|
|
}
|
|
if !AddressHeaders[strings.ToLower(key)] {
|
|
return nil, fmt.Errorf("%s is not an address header", key)
|
|
}
|
|
|
|
return ParseAddressList(e.header.Get(key))
|
|
}
|
|
|
|
// Date parses the Date header field.
|
|
func (e *Envelope) Date() (time.Time, error) {
|
|
hdr := e.GetHeader("Date")
|
|
if hdr == "" {
|
|
return time.Time{}, mail.ErrHeaderNotPresent
|
|
}
|
|
return mail.ParseDate(hdr)
|
|
}
|
|
|
|
// Clone returns a clone of the current Envelope
|
|
func (e *Envelope) Clone() *Envelope {
|
|
if e == nil {
|
|
return nil
|
|
}
|
|
|
|
newEnvelope := &Envelope{
|
|
e.Text,
|
|
e.HTML,
|
|
e.Root.Clone(nil),
|
|
e.Attachments,
|
|
e.Inlines,
|
|
e.OtherParts,
|
|
e.Errors,
|
|
e.header,
|
|
}
|
|
return newEnvelope
|
|
}
|
|
|
|
// ReadEnvelope is a wrapper around ReadParts and EnvelopeFromPart. It parses the content of the
|
|
// provided reader into an Envelope, downconverting HTML to plain text if needed, and sorting the
|
|
// attachments, inlines and other parts into their respective slices. Errors are collected from all
|
|
// Parts and placed into the Envelope.Errors slice.
|
|
// Uses default parser.
|
|
func ReadEnvelope(r io.Reader) (*Envelope, error) {
|
|
return defaultParser.ReadEnvelope(r)
|
|
}
|
|
|
|
// ReadEnvelope is the same as ReadEnvelope, but respects parser configurations.
|
|
func (p Parser) ReadEnvelope(r io.Reader) (*Envelope, error) {
|
|
// Read MIME parts from reader
|
|
root, err := p.ReadParts(r)
|
|
if err != nil {
|
|
return nil, errors.WithMessage(err, "Failed to ReadParts")
|
|
}
|
|
return p.EnvelopeFromPart(root)
|
|
}
|
|
|
|
// EnvelopeFromPart uses the provided Part tree to build an Envelope, downconverting HTML to plain
|
|
// text if needed, and sorting the attachments, inlines and other parts into their respective
|
|
// slices. Errors are collected from all Parts and placed into the Envelopes Errors slice.
|
|
func EnvelopeFromPart(root *Part) (*Envelope, error) {
|
|
return defaultParser.EnvelopeFromPart(root)
|
|
}
|
|
|
|
// EnvelopeFromPart is the same as EnvelopeFromPart, but respects parser configurations.
|
|
func (p Parser) EnvelopeFromPart(root *Part) (*Envelope, error) {
|
|
e := &Envelope{
|
|
Root: root,
|
|
header: &root.Header,
|
|
}
|
|
|
|
if detectMultipartMessage(root, p.multipartWOBoundaryAsSinglePart) {
|
|
// Multi-part message (message with attachments, etc)
|
|
if err := parseMultiPartBody(root, e); err != nil {
|
|
return nil, err
|
|
}
|
|
} else {
|
|
if detectBinaryBody(root) {
|
|
// Attachment only, no text
|
|
if root.Disposition == cdInline {
|
|
e.Inlines = append(e.Inlines, root)
|
|
} else {
|
|
e.Attachments = append(e.Attachments, root)
|
|
}
|
|
} else {
|
|
// Only text, no attachments
|
|
if err := parseTextOnlyBody(root, e); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
}
|
|
|
|
// Down-convert HTML to text if necessary
|
|
if e.Text == "" && e.HTML != "" {
|
|
// We always warn when this happens
|
|
e.Root.addWarning(
|
|
ErrorPlainTextFromHTML,
|
|
"Message did not contain a text/plain part")
|
|
var err error
|
|
if e.Text, err = html2text.FromString(e.HTML); err != nil {
|
|
e.Text = "" // Down-conversion shouldn't fail
|
|
p := e.Root.BreadthMatchFirst(matchHTMLBodyPart)
|
|
p.addError(ErrorPlainTextFromHTML, "Failed to downconvert HTML: %v", err)
|
|
}
|
|
}
|
|
|
|
// Copy part errors into Envelope.
|
|
if e.Root != nil {
|
|
_ = e.Root.DepthMatchAll(func(part *Part) bool {
|
|
// Using DepthMatchAll to traverse all parts, don't care about result.
|
|
for i := range part.Errors {
|
|
// Range index is needed to get the correct address, because range value points to
|
|
// a locally scoped variable.
|
|
e.Errors = append(e.Errors, part.Errors[i])
|
|
}
|
|
return false
|
|
})
|
|
}
|
|
|
|
return e, nil
|
|
}
|
|
|
|
// parseTextOnlyBody parses a plain text message in root that has MIME-like headers, but
|
|
// only contains a single part - no boundaries, etc. The result is placed in e.
|
|
func parseTextOnlyBody(root *Part, e *Envelope) error {
|
|
// Determine character set
|
|
var charset string
|
|
var isHTML bool
|
|
if ctype := root.Header.Get(hnContentType); ctype != "" {
|
|
if mediatype, mparams, _, err := mediatype.Parse(ctype); err == nil {
|
|
isHTML = (mediatype == ctTextHTML)
|
|
if mparams[hpCharset] != "" {
|
|
charset = mparams[hpCharset]
|
|
}
|
|
}
|
|
}
|
|
|
|
// Read transcoded text
|
|
if isHTML {
|
|
rawHTML := string(root.Content)
|
|
// Note: Empty e.Text will trigger html2text conversion
|
|
e.HTML = rawHTML
|
|
if charset == "" {
|
|
// Search for charset in HTML metadata
|
|
if charset = coding.FindCharsetInHTML(rawHTML); charset != "" {
|
|
// Found charset in HTML
|
|
if convHTML, err := coding.ConvertToUTF8String(charset, root.Content); err == nil {
|
|
// Successful conversion
|
|
e.HTML = convHTML
|
|
} else {
|
|
// Conversion failed
|
|
root.addWarning(ErrorCharsetConversion, err.Error())
|
|
}
|
|
}
|
|
// Converted from charset in HTML
|
|
return nil
|
|
}
|
|
} else {
|
|
e.Text = string(root.Content)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// parseMultiPartBody parses a multipart message in root. The result is placed in e.
|
|
func parseMultiPartBody(root *Part, e *Envelope) error {
|
|
// Parse top-level multipart
|
|
ctype := root.Header.Get(hnContentType)
|
|
mediatype, params, _, err := mediatype.Parse(ctype)
|
|
if err != nil {
|
|
return fmt.Errorf("unable to parse media type: %v", err)
|
|
}
|
|
if !strings.HasPrefix(mediatype, ctMultipartPrefix) {
|
|
return fmt.Errorf("unknown mediatype: %v", mediatype)
|
|
}
|
|
boundary := params[hpBoundary]
|
|
if boundary == "" {
|
|
return fmt.Errorf("unable to locate boundary param in Content-Type header")
|
|
}
|
|
|
|
// Locate text body
|
|
if mediatype == ctMultipartAltern {
|
|
p := root.BreadthMatchFirst(func(p *Part) bool {
|
|
return p.ContentType == ctTextPlain && p.Disposition != cdAttachment
|
|
})
|
|
if p != nil {
|
|
e.Text = string(p.Content)
|
|
}
|
|
} else {
|
|
// multipart is of a mixed type
|
|
parts := root.DepthMatchAll(func(p *Part) bool {
|
|
return p.ContentType == ctTextPlain && p.Disposition != cdAttachment
|
|
})
|
|
for i, p := range parts {
|
|
if i > 0 {
|
|
e.Text += "\n--\n"
|
|
}
|
|
e.Text += string(p.Content)
|
|
}
|
|
}
|
|
|
|
// Locate HTML body
|
|
p := root.DepthMatchFirst(matchHTMLBodyPart)
|
|
if p != nil {
|
|
e.HTML += string(p.Content)
|
|
}
|
|
|
|
// Locate attachments
|
|
e.Attachments = root.BreadthMatchAll(func(p *Part) bool {
|
|
return p.Disposition == cdAttachment || p.ContentType == ctAppOctetStream
|
|
})
|
|
|
|
// Locate inlines
|
|
e.Inlines = root.BreadthMatchAll(func(p *Part) bool {
|
|
return p.Disposition == cdInline && !strings.HasPrefix(p.ContentType, ctMultipartPrefix)
|
|
})
|
|
|
|
// Locate others parts not considered in attachments or inlines
|
|
e.OtherParts = root.BreadthMatchAll(func(p *Part) bool {
|
|
if strings.HasPrefix(p.ContentType, ctMultipartPrefix) {
|
|
return false
|
|
}
|
|
if p.Disposition != "" {
|
|
return false
|
|
}
|
|
if p.ContentType == ctAppOctetStream {
|
|
return false
|
|
}
|
|
return p.ContentType != ctTextPlain && p.ContentType != ctTextHTML
|
|
})
|
|
|
|
return nil
|
|
}
|
|
|
|
// Used by Part matchers to locate the HTML body. Not inlined because it's used in multiple places.
|
|
func matchHTMLBodyPart(p *Part) bool {
|
|
return p.ContentType == ctTextHTML && p.Disposition != cdAttachment
|
|
}
|