add !pm stripify option

This commit is contained in:
Aine
2024-02-26 20:42:37 +02:00
parent ba1a8c8390
commit 271a4a0e31
14 changed files with 442 additions and 13 deletions

0
vendor/github.com/kvannotten/mailstrip/.gitignore generated vendored Normal file
View File

52
vendor/github.com/kvannotten/mailstrip/LICENSE generated vendored Normal file
View File

@@ -0,0 +1,52 @@
All original parts of this library that are not considered derivate work of
email_reply_parser:
-------------------------------------------------------------------------------
The MIT License (MIT)
Copyright (c) 2013 Thomson Reuters Global Resources
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
-------------------------------------------------------------------------------
The content of the fixtures directory (as imported in 78ad5d), as well as the
comments are copied from email_reply_parser. Most of the code itself is a
line-by-line port, and is therefor likely to be considered a derivate work:
-------------------------------------------------------------------------------
The MIT License
Copyright (c) GitHub
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
-------------------------------------------------------------------------------

35
vendor/github.com/kvannotten/mailstrip/README.md generated vendored Normal file
View File

@@ -0,0 +1,35 @@
# mailstrip
mailstrip is a [Go][2] library that parses email text and strips it of
signatures and reply quotes. It is a port of [email\_reply\_parser][1], GitHub's
library for parsing email replies.
## Differences to email_reply_parser
Most of mailstrip is a line-by-line port of email\_reply\_parser and it passes
all tests from the email\_reply\_parser test suite. However, it also implements
a few improvements that are not part of email\_reply\_parser:
* Forwarded fragments are detected and considered to be visible text, see
[d321c1][3].
* Replies from Yahoo! which lack ">" quote indicators are handled correctly,
see [e844d][4].
* Alternative quote headers used by gmail are handled correctly, see
[7ecb6][5]
* Replies from Google inbox / gmail that has a quoute header in swedish(and possibly other languages) are handled. See [4128d][6].
## Documentation
The API documentation can be found here:
http://godoc.org/github.com/kvannotten/mailstrip
## License
MIT License. See LICENSE file.
[1]: https://github.com/github/email_reply_parser
[2]: http://golang.org/
[3]: https://github.com/kvannotten/mailstrip/commit/d321c10543f77c0beaacb40b04511e619f0652c6
[4]: https://github.com/kvannotten/mailstrip/commit/e844df52342787c3cf2e0ebb8850b16e35f7f437
[5]: https://github.com/kvannotten/mailstrip/commit/7ecb608981016c5633575cb93abb00e4c7370bcf
[6]: https://github.com/kvannotten/mailstrip/commit/4128d1860b0b9477145ac4b4bbf14d1f072f7a4c

268
vendor/github.com/kvannotten/mailstrip/mailstrip.go generated vendored Normal file
View File

@@ -0,0 +1,268 @@
// mailstrip is a Go library that parses email text and strips it of
// signatures and reply quotes. It is a port of email_reply_parser,
// GitHub's library for parsing email replies.
//
// see https://github.com/github/email_reply_parser
package mailstrip
import (
"bufio"
"fmt"
"io"
"regexp"
"strings"
"unicode"
)
// Parse parses a plaintext email and returns the results.
func Parse(text string) Email {
p := &parser{}
return p.Parse(text)
}
type parser struct {
// This determines if any 'visible' Fragment has been found. Once any
// visible Fragment is found, stop looking for hidden ones.
foundVisible bool
// This instance variable points to the current Fragment. If the matched
// line fits, it should be added to this Fragment. Otherwise, finish it and
// start a new Fragment.
fragment *Fragment
// The fragments parsed so far
fragments []*Fragment
}
// > I define UNIX as “30 definitions of regular expressions living under one
// > roof.”
// —Don Knuth
//
// Porting the Ruby regular expressions from email_reply_parser to Go required
// making the following changes:
//
// - Unlike most regexp flavors I'm familiar with, ^ and $ stand for beginning
// and end of line respectively in Ruby. Getting the same behavior in Go
// required enabling Go's multiline mode "(?m)" for these expressions.
// - Ruby's multiline mode "/m" is the same as Go's "(?s)" flag. Both are used
// to make "." match "\n" characters.
var (
// used to join quote headers that were broken into multiple lines by the
// e-mail client. e.g. gmail does that for lines exceeding 80 chars
multiLineReplyHeaderRegexps = []*regexp.Regexp{
// e.g. On Aug 22, 2011, at 7:37 PM, defunkt<reply@reply.github.com> wrote:
regexp.MustCompile("(?sm)^(On\\s(?:.+)wrote:)$"),
// e.g. 2013/11/13 John Smith <john@smith.org>
regexp.MustCompile("(?sm)^(\\d{4}/\\d{1,2}/\\d{1,2} .*<.+@.+>)$"),
}
sigRegexp = regexp.MustCompile("(\\d+ swodniW rof >.*<liaM morf tneS|--|__|(?m)\\w-$)|(?m)(^(\\w+\\s*){1,3} " + reverseString("Sent from my") + "$)")
fwdRegexp = regexp.MustCompile("(?mi)^--+\\s*" + reverseString("Forwarded message") + "\\s*--+$")
quotedRegexp = regexp.MustCompile("(?m)(>+)$")
quoteHeaderRegexp = regexp.MustCompile("(?m)^:etorw.*nO$|^.*[0-9]{4}\\s\\.\\w{2,4}\\s\\d{1,2}\\s.{3,4}$|^\\w{3,4}\\s\\d{1,2}\\s\\w{3,4}\\.\\s[0-9]{4}.*$|^>.*\\d{1,2}/\\d{1,2}/\\d{4}$|^(?m)^.*?[0-9]{4}\\s\\.\\w+\\s\\d\\s.*n\\.*$")
)
func (p *parser) Parse(text string) Email {
// Normalize line endings.
text = strings.Replace(text, "\r\n", "\n", -1)
// Check for multi-line reply headers. Some clients break up the "On DATE,
// NAME <EMAIL> wrote:" line (and similar quote headers) into multiple lines.
for _, r := range multiLineReplyHeaderRegexps {
if m := r.FindStringSubmatch(text); len(m) == 2 {
// Remove all new lines from the reply header.
text = strings.Replace(text, m[1], strings.Replace(m[1], "\n", "", -1), -1)
}
}
// The text is reversed initially due to the way we check for hidden
// fragments.
text = reverseString(text)
// Use the Reader to pull out each line of the email content.
reader := bufio.NewReader(strings.NewReader(text))
for {
line, e := reader.ReadBytes('\n')
p.scanLine(strings.TrimRight(string(line), "\n"))
if e == io.EOF {
break
} else if e != nil {
// Our underlaying reader is a strings.Reader, which will never return
// errors other than io.EOF, so this is merely a sanity check.
panic(fmt.Sprintf("Bug: ReadBytes returned an error other than io.EOF: %#v", e))
}
}
// Finish up the final fragment. Finishing a fragment will detect any
// attributes (hidden, signature, reply), and join each line into a
// string.
p.finishFragment()
// Now that parsing is done, reverse the order.
reverseFragments(p.fragments)
return Email(p.fragments)
}
// scaneLine scans the given line of text and figures out which fragment it
// belongs to.
func (p *parser) scanLine(line string) {
sigMatch := sigRegexp.MatchString(line)
if !sigMatch {
line = strings.TrimLeftFunc(line, unicode.IsSpace)
}
// We're looking for leading `>`'s to see if this line is part of a
// quoted Fragment.
isQuoted := quotedRegexp.MatchString(line)
// Mark the current Fragment as a signature if the current line is empty
// and the Fragment starts with a common signature indicator.
if p.fragment != nil && line == "" {
// lastLine is really the first line, since the lines are still reversed
// at this point.
lastLine := p.fragment.lines[len(p.fragment.lines)-1]
if fwdRegexp.MatchString(lastLine) {
p.fragment.forwarded = true
p.finishFragment()
} else if sigRegexp.MatchString(lastLine) {
p.fragment.signature = true
p.finishFragment()
}
}
isQuoteHeader := p.quoteHeader(line)
// Yahoo! does not use '>' quote indicator in replies, so if a quote header
// suddenly appears in an otherwise unquoted fragment, consider it quoted
// now.
if p.fragment != nil && isQuoteHeader {
p.fragment.quoted = true
}
// If the line matches the current fragment, add it. Note that a common
// reply header also counts as part of the quoted Fragment, even though
// it doesn't start with `>`.
if p.fragment != nil &&
((p.fragment.quoted == isQuoted) ||
(p.fragment.quoted && (isQuoteHeader || line == ""))) {
p.fragment.lines = append(p.fragment.lines, line)
// Otherwise, finish the fragment and start a new one.
} else {
p.finishFragment()
p.fragment = &Fragment{quoted: isQuoted, lines: []string{line}}
}
}
// quoteHeader detects if a given line is a header above a quoted area. It is
// only checked for lines preceding quoted regions. Returns true if the line is
// a valid header, or false.
func (p *parser) quoteHeader(line string) bool {
return quoteHeaderRegexp.MatchString(line)
}
// finishFragment builds the fragment string and reverses it, after all lines
// have been added. It also checks to see if this Fragment is hidden. The
// hidden Fragment check reads from the bottom to the top.
//
// Any quoted Fragments or signature Fragments are marked hidden if they are
// below any visible Fragments. Visible Fragments are expected to contain
// original content by the author. If they are below a quoted Fragment, then
// the Fragment should be visible to give context to the reply.
//
// some original text (visible)
//
// > do you have any two's? (quoted, visible)
//
// Go fish! (visible)
//
// > -- > Player 1 (quoted, hidden)
//
// -- Player 2 (signature, hidden)
func (p *parser) finishFragment() {
if p.fragment != nil {
p.fragment.finish()
if !p.foundVisible {
if p.fragment.quoted || p.fragment.signature ||
strings.TrimSpace(p.fragment.String()) == "" {
p.fragment.hidden = true
} else {
p.foundVisible = true
}
}
p.fragments = append(p.fragments, p.fragment)
}
p.fragment = nil
}
func reverseString(s string) string {
runes := []rune(s)
for i, j := 0, len(runes)-1; i < j; i, j = i+1, j-1 {
runes[i], runes[j] = runes[j], runes[i]
}
return string(runes)
}
func reverseFragments(f []*Fragment) {
for i, j := 0, len(f)-1; i < j; i, j = i+1, j-1 {
f[i], f[j] = f[j], f[i]
}
}
// Email contains the parsed contents of an email.
type Email []*Fragment
// String returns the non-Hidden() fragments of the Email.
func (e Email) String() string {
results := []string{}
for _, fragment := range e {
if fragment.Hidden() {
continue
}
results = append(results, fragment.String())
}
result := strings.Join(results, "\n")
result = strings.TrimRightFunc(result, unicode.IsSpace)
return result
}
// Fragment contains a parsed section of an email.
type Fragment struct {
lines []string
content string
hidden bool
signature bool
forwarded bool
quoted bool
}
// finish builds the string content by joining the lines and reversing them.
func (f *Fragment) finish() {
f.content = strings.Join(f.lines, "\n")
f.lines = nil
f.content = reverseString(f.content)
}
// Forwarded returns if the fragment is forwarded or not.
func (f *Fragment) Forwarded() bool {
return f.forwarded
}
// Signature returns if the fragment is a signature or not.
func (f *Fragment) Signature() bool {
return f.signature
}
// Signature returns if the fragment is a quote or not.
func (f *Fragment) Quoted() bool {
return f.quoted
}
// Signature returns if the fragment is considered hidden or not.
func (f *Fragment) Hidden() bool {
return f.hidden
}
// String returns the content of the fragment.
func (f *Fragment) String() string {
return f.content
}