upgrade deps; rewrite smtp session

This commit is contained in:
Aine
2024-02-19 22:55:14 +02:00
parent 10213cc7d7
commit a01720da00
277 changed files with 106832 additions and 7641 deletions

View File

@@ -57,6 +57,25 @@ if err != nil {
}
```
### Validating that the publix suffix is ICANN managed ###
Whether the public suffix is managed by the Internet Corporation for Assigned Names and Numbers.
If not an error is returned and the public suffix is privately managed. For example, foo.org and foo.co.uk are ICANN domains, foo.dyndns.org and foo.blogspot.co.uk are private domains. More information on [publix suffixes here](https://godoc.org/golang.org/x/net/publicsuffix).
```go
import "github.com/mcnijman/go-emailaddress"
email, err := emailaddress.Parse("foo@bar.com")
if err != nil {
fmt.Println("invalid email")
}
err := email.ValidateIcanSuffix()
if err != nil {
fmt.Println("not an icann suffix")
}
```
### Finding emails ###
This will look for emails in a byte array (ie text or an html response).
@@ -64,7 +83,7 @@ This will look for emails in a byte array (ie text or an html response).
```go
import "github.com/mcnijman/go-emailaddress"
text := []byte(`Send me an email at foo@bar.com.`)
text := []byte(`Send me an email at foo@bar.com or foo@domain.fakesuffix.`)
validateHost := false
emails := emailaddress.Find(text, validateHost)
@@ -73,6 +92,7 @@ for _, e := range emails {
fmt.Println(e)
}
// foo@bar.com
// foo@domain.fakesuffix
```
As RFC 5322 is really broad this method will likely match images and urls that contain
@@ -81,7 +101,7 @@ the '@' character (ie. !--logo@2x.png). For more reliable results, you can use t
```go
import "github.com/mcnijman/go-emailaddress"
text := []byte(`Send me an email at foo@bar.com or fake@domain.foobar.`)
text := []byte(`Send me an email at foo@domain.com or foo@domain.fakesuffix.`)
validateHost := false
emails := emailaddress.FindWithIcannSuffix(text, validateHost)

View File

@@ -9,7 +9,7 @@ addresses. This library is tested for Go v1.9 and above.
go get -u github.com/mcnijman/go-emailaddress
Local validation
# Local validation
Parse and validate the email locally using RFC 5322 regex, note that when err == nil it doesn't
necessarily mean the email address actually exists.
@@ -26,7 +26,7 @@ necessarily mean the email address actually exists.
fmt.Println(email) // foo@bar.com
fmt.Println(email.String()) // foo@bar.com
Host validation
# Host validation
Host validation will first attempt to resolve the domain and then verify if we can start a mail
transaction with the host. This is relatively slow as it will contact the host several times.
@@ -44,7 +44,7 @@ Note that when err == nil it doesn't necessarily mean the email address actually
fmt.Println("invalid host")
}
Finding emails
# Finding emails
This will look for emails in a byte array (ie text or an html response).
@@ -74,7 +74,6 @@ the '@' character (ie. !--logo@2x.png). For more reliable results, you can use t
fmt.Println(e)
}
// foo@bar.com
*/
package emailaddress
@@ -92,9 +91,13 @@ var (
// rfc5322 is a RFC 5322 regex, as per: https://stackoverflow.com/a/201378/5405453.
// Note that this can't verify that the address is an actual working email address.
// Use ValidateHost as a starter and/or send them one :-).
rfc5322 = "(?i)(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|\"(?:[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x21\\x23-\\x5b\\x5d-\\x7f]|\\\\[\\x01-\\x09\\x0b\\x0c\\x0e-\\x7f])*\")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\\[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\\.){3}(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:(?:[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x21-\\x5a\\x53-\\x7f]|\\\\[\\x01-\\x09\\x0b\\x0c\\x0e-\\x7f])+)\\])"
validEmailRegexp = regexp.MustCompile(fmt.Sprintf("^%s*$", rfc5322))
findEmailRegexp = regexp.MustCompile(rfc5322)
rfc5322 = "(?i)(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|\"(?:[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x21\\x23-\\x5b\\x5d-\\x7f]|\\\\[\\x01-\\x09\\x0b\\x0c\\x0e-\\x7f])*\")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\\[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\\.){3}(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:(?:[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x21-\\x5a\\x53-\\x7f]|\\\\[\\x01-\\x09\\x0b\\x0c\\x0e-\\x7f])+)\\])"
validRfc5322Regexp = regexp.MustCompile(fmt.Sprintf("^%s*$", rfc5322))
findRfc5322Regexp = regexp.MustCompile(rfc5322)
// findCommonRegexp is a stricter regex than the RFC 5322 and matches emails that
// are more likely to be real.
findCommonRegexp = regexp.MustCompile("(?i)([A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,24})")
)
// EmailAddress is a structure that stores the address local-part@domain parts.
@@ -117,11 +120,11 @@ func (e EmailAddress) String() string {
// ValidateHost will test if the email address is actually reachable. It will first try to resolve
// the host and then start a mail transaction.
func (e EmailAddress) ValidateHost() error {
host, err := lookupHost(e.Domain)
host, err := LookupHost(e.Domain)
if err != nil {
return err
}
return tryHost(host, e)
return TryHost(host, e)
}
// ValidateIcanSuffix will test if the public suffix of the domain is managed by ICANN using
@@ -136,12 +139,30 @@ func (e EmailAddress) ValidateIcanSuffix() error {
return nil
}
// Find uses the RFC 5322 regex to match, parse and validate any email addresses found in a string.
// If the validateHost boolean is true it will call the validate host for every email address
// encounterd. As RFC 5322 is really broad this method will likely match images and urls that
// contain the '@' character.
// Find uses the a stricter regex than the RFC 5322 and matches emails that are more likely to be
// real. Since the RFC 5322 spec is looser, it can miss emails that are real, but will more likely
// have better results. See examples in the tests.
func Find(haystack []byte, validateHost bool) (emails []*EmailAddress) {
results := findEmailRegexp.FindAll(haystack, -1)
results := findCommonRegexp.FindAll(haystack, -1)
for _, r := range results {
if e, err := Parse(string(r)); err == nil {
if validateHost {
if err := e.ValidateHost(); err != nil {
continue
}
}
emails = append(emails, e)
}
}
return emails
}
// FindWithRFC5322 uses the RFC 5322 regex to match, parse and validate any email addresses found in a string.
// If the validateHost boolean is true it will call the validate host for every email address
// encountered. As RFC 5322 is really broad this method will likely match images and urls that
// contain the '@' character.
func FindWithRFC5322(haystack []byte, validateHost bool) (emails []*EmailAddress) {
results := findRfc5322Regexp.FindAll(haystack, -1)
for _, r := range results {
if e, err := Parse(string(r)); err == nil {
if validateHost {
@@ -158,7 +179,7 @@ func Find(haystack []byte, validateHost bool) (emails []*EmailAddress) {
// FindWithIcannSuffix uses the RFC 5322 regex to match, parse and validate any email addresses
// found in a string. It will return emails if its eTLD is managed by the ICANN organization.
// If the validateHost boolean is true it will call the validate host for every email address
// encounterd. As RFC 5322 is really broad this method will likely match images and urls that
// encountered. As RFC 5322 is really broad this method will likely match images and urls that
// contain the '@' character.
func FindWithIcannSuffix(haystack []byte, validateHost bool) (emails []*EmailAddress) {
results := Find(haystack, false)
@@ -178,22 +199,26 @@ func FindWithIcannSuffix(haystack []byte, validateHost bool) (emails []*EmailAdd
// Parse will parse the input and validate the email locally. If you want to validate the host of
// this email address remotely call the ValidateHost method.
func Parse(email string) (*EmailAddress, error) {
if !validEmailRegexp.MatchString(email) {
if !validRfc5322Regexp.MatchString(email) {
return nil, fmt.Errorf("format is incorrect for %s", email)
}
i := strings.LastIndexByte(email, '@')
e := &EmailAddress{
LocalPart: email[:i],
Domain: email[i+1:],
}
if e.Domain == "" {
return nil, fmt.Errorf("format is incorrect for %s", email)
}
return e, nil
}
// lookupHost first checks if any MX records are available and if not, it will check
// LookupHost first checks if any MX records are available and if not, it will check
// if A records are available as they can resolve email server hosts. An error indicates
// that non of the A or MX records are available.
func lookupHost(domain string) (string, error) {
func LookupHost(domain string) (string, error) {
if mx, err := net.LookupMX(domain); err == nil {
return mx[0].Host, nil
}
@@ -203,9 +228,10 @@ func lookupHost(domain string) (string, error) {
return "", fmt.Errorf("failed finding MX and A records for domain %s", domain)
}
// tryHost will verify if we can start a mail transaction with the host.
func tryHost(host string, e EmailAddress) error {
client, err := smtp.Dial(fmt.Sprintf("%s:%d", host, 25))
// TryHost will verify if we can start a mail transaction with the host. A lot of
// hosts block this method so don't expect much from it.
func TryHost(host string, e EmailAddress) error {
client, err := smtp.Dial(fmt.Sprintf("%s:%d", host, 587))
if err != nil {
return err
}