add !pm stripify option
This commit is contained in:
0
vendor/github.com/kvannotten/mailstrip/.gitignore
generated
vendored
Normal file
0
vendor/github.com/kvannotten/mailstrip/.gitignore
generated
vendored
Normal file
52
vendor/github.com/kvannotten/mailstrip/LICENSE
generated
vendored
Normal file
52
vendor/github.com/kvannotten/mailstrip/LICENSE
generated
vendored
Normal file
@@ -0,0 +1,52 @@
|
||||
All original parts of this library that are not considered derivate work of
|
||||
email_reply_parser:
|
||||
-------------------------------------------------------------------------------
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2013 Thomson Reuters Global Resources
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
The content of the fixtures directory (as imported in 78ad5d), as well as the
|
||||
comments are copied from email_reply_parser. Most of the code itself is a
|
||||
line-by-line port, and is therefor likely to be considered a derivate work:
|
||||
-------------------------------------------------------------------------------
|
||||
The MIT License
|
||||
|
||||
Copyright (c) GitHub
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
-------------------------------------------------------------------------------
|
||||
35
vendor/github.com/kvannotten/mailstrip/README.md
generated
vendored
Normal file
35
vendor/github.com/kvannotten/mailstrip/README.md
generated
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
# mailstrip
|
||||
|
||||
mailstrip is a [Go][2] library that parses email text and strips it of
|
||||
signatures and reply quotes. It is a port of [email\_reply\_parser][1], GitHub's
|
||||
library for parsing email replies.
|
||||
|
||||
## Differences to email_reply_parser
|
||||
|
||||
Most of mailstrip is a line-by-line port of email\_reply\_parser and it passes
|
||||
all tests from the email\_reply\_parser test suite. However, it also implements
|
||||
a few improvements that are not part of email\_reply\_parser:
|
||||
|
||||
* Forwarded fragments are detected and considered to be visible text, see
|
||||
[d321c1][3].
|
||||
* Replies from Yahoo! which lack ">" quote indicators are handled correctly,
|
||||
see [e844d][4].
|
||||
* Alternative quote headers used by gmail are handled correctly, see
|
||||
[7ecb6][5]
|
||||
* Replies from Google inbox / gmail that has a quoute header in swedish(and possibly other languages) are handled. See [4128d][6].
|
||||
|
||||
## Documentation
|
||||
|
||||
The API documentation can be found here:
|
||||
http://godoc.org/github.com/kvannotten/mailstrip
|
||||
|
||||
## License
|
||||
|
||||
MIT License. See LICENSE file.
|
||||
|
||||
[1]: https://github.com/github/email_reply_parser
|
||||
[2]: http://golang.org/
|
||||
[3]: https://github.com/kvannotten/mailstrip/commit/d321c10543f77c0beaacb40b04511e619f0652c6
|
||||
[4]: https://github.com/kvannotten/mailstrip/commit/e844df52342787c3cf2e0ebb8850b16e35f7f437
|
||||
[5]: https://github.com/kvannotten/mailstrip/commit/7ecb608981016c5633575cb93abb00e4c7370bcf
|
||||
[6]: https://github.com/kvannotten/mailstrip/commit/4128d1860b0b9477145ac4b4bbf14d1f072f7a4c
|
||||
268
vendor/github.com/kvannotten/mailstrip/mailstrip.go
generated
vendored
Normal file
268
vendor/github.com/kvannotten/mailstrip/mailstrip.go
generated
vendored
Normal file
@@ -0,0 +1,268 @@
|
||||
// mailstrip is a Go library that parses email text and strips it of
|
||||
// signatures and reply quotes. It is a port of email_reply_parser,
|
||||
// GitHub's library for parsing email replies.
|
||||
//
|
||||
// see https://github.com/github/email_reply_parser
|
||||
package mailstrip
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"regexp"
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// Parse parses a plaintext email and returns the results.
|
||||
func Parse(text string) Email {
|
||||
p := &parser{}
|
||||
return p.Parse(text)
|
||||
}
|
||||
|
||||
type parser struct {
|
||||
// This determines if any 'visible' Fragment has been found. Once any
|
||||
// visible Fragment is found, stop looking for hidden ones.
|
||||
foundVisible bool
|
||||
// This instance variable points to the current Fragment. If the matched
|
||||
// line fits, it should be added to this Fragment. Otherwise, finish it and
|
||||
// start a new Fragment.
|
||||
fragment *Fragment
|
||||
// The fragments parsed so far
|
||||
fragments []*Fragment
|
||||
}
|
||||
|
||||
// > I define UNIX as “30 definitions of regular expressions living under one
|
||||
// > roof.”
|
||||
// —Don Knuth
|
||||
//
|
||||
// Porting the Ruby regular expressions from email_reply_parser to Go required
|
||||
// making the following changes:
|
||||
//
|
||||
// - Unlike most regexp flavors I'm familiar with, ^ and $ stand for beginning
|
||||
// and end of line respectively in Ruby. Getting the same behavior in Go
|
||||
// required enabling Go's multiline mode "(?m)" for these expressions.
|
||||
// - Ruby's multiline mode "/m" is the same as Go's "(?s)" flag. Both are used
|
||||
// to make "." match "\n" characters.
|
||||
var (
|
||||
// used to join quote headers that were broken into multiple lines by the
|
||||
// e-mail client. e.g. gmail does that for lines exceeding 80 chars
|
||||
multiLineReplyHeaderRegexps = []*regexp.Regexp{
|
||||
// e.g. On Aug 22, 2011, at 7:37 PM, defunkt<reply@reply.github.com> wrote:
|
||||
regexp.MustCompile("(?sm)^(On\\s(?:.+)wrote:)$"),
|
||||
// e.g. 2013/11/13 John Smith <john@smith.org>
|
||||
regexp.MustCompile("(?sm)^(\\d{4}/\\d{1,2}/\\d{1,2} .*<.+@.+>)$"),
|
||||
}
|
||||
sigRegexp = regexp.MustCompile("(\\d+ swodniW rof >.*<liaM morf tneS|--|__|(?m)\\w-$)|(?m)(^(\\w+\\s*){1,3} " + reverseString("Sent from my") + "$)")
|
||||
fwdRegexp = regexp.MustCompile("(?mi)^--+\\s*" + reverseString("Forwarded message") + "\\s*--+$")
|
||||
quotedRegexp = regexp.MustCompile("(?m)(>+)$")
|
||||
quoteHeaderRegexp = regexp.MustCompile("(?m)^:etorw.*nO$|^.*[0-9]{4}\\s\\.\\w{2,4}\\s\\d{1,2}\\s.{3,4}$|^\\w{3,4}\\s\\d{1,2}\\s\\w{3,4}\\.\\s[0-9]{4}.*$|^>.*\\d{1,2}/\\d{1,2}/\\d{4}$|^(?m)^.*?[0-9]{4}\\s\\.\\w+\\s\\d\\s.*n\\.*$")
|
||||
)
|
||||
|
||||
func (p *parser) Parse(text string) Email {
|
||||
// Normalize line endings.
|
||||
text = strings.Replace(text, "\r\n", "\n", -1)
|
||||
|
||||
// Check for multi-line reply headers. Some clients break up the "On DATE,
|
||||
// NAME <EMAIL> wrote:" line (and similar quote headers) into multiple lines.
|
||||
for _, r := range multiLineReplyHeaderRegexps {
|
||||
if m := r.FindStringSubmatch(text); len(m) == 2 {
|
||||
// Remove all new lines from the reply header.
|
||||
text = strings.Replace(text, m[1], strings.Replace(m[1], "\n", "", -1), -1)
|
||||
}
|
||||
}
|
||||
|
||||
// The text is reversed initially due to the way we check for hidden
|
||||
// fragments.
|
||||
text = reverseString(text)
|
||||
|
||||
// Use the Reader to pull out each line of the email content.
|
||||
reader := bufio.NewReader(strings.NewReader(text))
|
||||
for {
|
||||
line, e := reader.ReadBytes('\n')
|
||||
p.scanLine(strings.TrimRight(string(line), "\n"))
|
||||
if e == io.EOF {
|
||||
break
|
||||
} else if e != nil {
|
||||
// Our underlaying reader is a strings.Reader, which will never return
|
||||
// errors other than io.EOF, so this is merely a sanity check.
|
||||
panic(fmt.Sprintf("Bug: ReadBytes returned an error other than io.EOF: %#v", e))
|
||||
}
|
||||
}
|
||||
|
||||
// Finish up the final fragment. Finishing a fragment will detect any
|
||||
// attributes (hidden, signature, reply), and join each line into a
|
||||
// string.
|
||||
p.finishFragment()
|
||||
|
||||
// Now that parsing is done, reverse the order.
|
||||
reverseFragments(p.fragments)
|
||||
return Email(p.fragments)
|
||||
}
|
||||
|
||||
// scaneLine scans the given line of text and figures out which fragment it
|
||||
// belongs to.
|
||||
func (p *parser) scanLine(line string) {
|
||||
sigMatch := sigRegexp.MatchString(line)
|
||||
|
||||
if !sigMatch {
|
||||
line = strings.TrimLeftFunc(line, unicode.IsSpace)
|
||||
}
|
||||
|
||||
// We're looking for leading `>`'s to see if this line is part of a
|
||||
// quoted Fragment.
|
||||
isQuoted := quotedRegexp.MatchString(line)
|
||||
|
||||
// Mark the current Fragment as a signature if the current line is empty
|
||||
// and the Fragment starts with a common signature indicator.
|
||||
if p.fragment != nil && line == "" {
|
||||
// lastLine is really the first line, since the lines are still reversed
|
||||
// at this point.
|
||||
lastLine := p.fragment.lines[len(p.fragment.lines)-1]
|
||||
if fwdRegexp.MatchString(lastLine) {
|
||||
p.fragment.forwarded = true
|
||||
p.finishFragment()
|
||||
} else if sigRegexp.MatchString(lastLine) {
|
||||
p.fragment.signature = true
|
||||
p.finishFragment()
|
||||
}
|
||||
}
|
||||
|
||||
isQuoteHeader := p.quoteHeader(line)
|
||||
// Yahoo! does not use '>' quote indicator in replies, so if a quote header
|
||||
// suddenly appears in an otherwise unquoted fragment, consider it quoted
|
||||
// now.
|
||||
if p.fragment != nil && isQuoteHeader {
|
||||
p.fragment.quoted = true
|
||||
}
|
||||
|
||||
// If the line matches the current fragment, add it. Note that a common
|
||||
// reply header also counts as part of the quoted Fragment, even though
|
||||
// it doesn't start with `>`.
|
||||
if p.fragment != nil &&
|
||||
((p.fragment.quoted == isQuoted) ||
|
||||
(p.fragment.quoted && (isQuoteHeader || line == ""))) {
|
||||
p.fragment.lines = append(p.fragment.lines, line)
|
||||
|
||||
// Otherwise, finish the fragment and start a new one.
|
||||
} else {
|
||||
p.finishFragment()
|
||||
p.fragment = &Fragment{quoted: isQuoted, lines: []string{line}}
|
||||
}
|
||||
}
|
||||
|
||||
// quoteHeader detects if a given line is a header above a quoted area. It is
|
||||
// only checked for lines preceding quoted regions. Returns true if the line is
|
||||
// a valid header, or false.
|
||||
func (p *parser) quoteHeader(line string) bool {
|
||||
return quoteHeaderRegexp.MatchString(line)
|
||||
}
|
||||
|
||||
// finishFragment builds the fragment string and reverses it, after all lines
|
||||
// have been added. It also checks to see if this Fragment is hidden. The
|
||||
// hidden Fragment check reads from the bottom to the top.
|
||||
//
|
||||
// Any quoted Fragments or signature Fragments are marked hidden if they are
|
||||
// below any visible Fragments. Visible Fragments are expected to contain
|
||||
// original content by the author. If they are below a quoted Fragment, then
|
||||
// the Fragment should be visible to give context to the reply.
|
||||
//
|
||||
// some original text (visible)
|
||||
//
|
||||
// > do you have any two's? (quoted, visible)
|
||||
//
|
||||
// Go fish! (visible)
|
||||
//
|
||||
// > -- > Player 1 (quoted, hidden)
|
||||
//
|
||||
// -- Player 2 (signature, hidden)
|
||||
func (p *parser) finishFragment() {
|
||||
if p.fragment != nil {
|
||||
p.fragment.finish()
|
||||
if !p.foundVisible {
|
||||
if p.fragment.quoted || p.fragment.signature ||
|
||||
strings.TrimSpace(p.fragment.String()) == "" {
|
||||
p.fragment.hidden = true
|
||||
} else {
|
||||
p.foundVisible = true
|
||||
}
|
||||
}
|
||||
p.fragments = append(p.fragments, p.fragment)
|
||||
}
|
||||
p.fragment = nil
|
||||
}
|
||||
|
||||
func reverseString(s string) string {
|
||||
runes := []rune(s)
|
||||
for i, j := 0, len(runes)-1; i < j; i, j = i+1, j-1 {
|
||||
runes[i], runes[j] = runes[j], runes[i]
|
||||
}
|
||||
return string(runes)
|
||||
}
|
||||
|
||||
func reverseFragments(f []*Fragment) {
|
||||
for i, j := 0, len(f)-1; i < j; i, j = i+1, j-1 {
|
||||
f[i], f[j] = f[j], f[i]
|
||||
}
|
||||
}
|
||||
|
||||
// Email contains the parsed contents of an email.
|
||||
type Email []*Fragment
|
||||
|
||||
// String returns the non-Hidden() fragments of the Email.
|
||||
func (e Email) String() string {
|
||||
results := []string{}
|
||||
for _, fragment := range e {
|
||||
if fragment.Hidden() {
|
||||
continue
|
||||
}
|
||||
|
||||
results = append(results, fragment.String())
|
||||
}
|
||||
|
||||
result := strings.Join(results, "\n")
|
||||
result = strings.TrimRightFunc(result, unicode.IsSpace)
|
||||
return result
|
||||
}
|
||||
|
||||
// Fragment contains a parsed section of an email.
|
||||
type Fragment struct {
|
||||
lines []string
|
||||
content string
|
||||
hidden bool
|
||||
signature bool
|
||||
forwarded bool
|
||||
quoted bool
|
||||
}
|
||||
|
||||
// finish builds the string content by joining the lines and reversing them.
|
||||
func (f *Fragment) finish() {
|
||||
f.content = strings.Join(f.lines, "\n")
|
||||
f.lines = nil
|
||||
f.content = reverseString(f.content)
|
||||
}
|
||||
|
||||
// Forwarded returns if the fragment is forwarded or not.
|
||||
func (f *Fragment) Forwarded() bool {
|
||||
return f.forwarded
|
||||
}
|
||||
|
||||
// Signature returns if the fragment is a signature or not.
|
||||
func (f *Fragment) Signature() bool {
|
||||
return f.signature
|
||||
}
|
||||
|
||||
// Signature returns if the fragment is a quote or not.
|
||||
func (f *Fragment) Quoted() bool {
|
||||
return f.quoted
|
||||
}
|
||||
|
||||
// Signature returns if the fragment is considered hidden or not.
|
||||
func (f *Fragment) Hidden() bool {
|
||||
return f.hidden
|
||||
}
|
||||
|
||||
// String returns the content of the fragment.
|
||||
func (f *Fragment) String() string {
|
||||
return f.content
|
||||
}
|
||||
Reference in New Issue
Block a user