Fix POP mail parsing in multipart bounce e-mails.
This was originally authored by @stevesavanna in #707. This commit contains changes and refactors that could not be pushed to the original PR. Changes from #707 - Don't ignore bounce mails missing campaign / subscriber UUIDs. The original behaviour falls back to looking up subscribers by e-mail. - Refactor repetetive header.get + regexp conditions per header into a simpler lookup map. - Trim e-mail header values of `\r`. Closes #707, #763 Co-authored-by: stevesavanna <steven@savannacorp.com>
This commit is contained in:
parent
a7145511fd
commit
73e4c1cf28
2 changed files with 74 additions and 27 deletions
|
@ -2,7 +2,9 @@ package mailbox
|
|||
|
||||
import (
|
||||
"encoding/json"
|
||||
"io"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/emersion/go-message"
|
||||
|
@ -17,9 +19,24 @@ type POP struct {
|
|||
client *pop3.Client
|
||||
}
|
||||
|
||||
type bounceHeaders struct {
|
||||
Header string
|
||||
Regexp *regexp.Regexp
|
||||
}
|
||||
|
||||
var (
|
||||
reCampUUID = regexp.MustCompile(`(?m)(?m:^` + models.EmailHeaderCampaignUUID + `:\s+?)([a-z0-9\-]{36})`)
|
||||
reSubUUID = regexp.MustCompile(`(?m)(?m:^` + models.EmailHeaderSubscriberUUID + `:\s+?)([a-z0-9\-]{36})`)
|
||||
// List of header to look for in the e-mail body, regexp to fall back to if the header is empty.
|
||||
headerLookups = []bounceHeaders{
|
||||
{models.EmailHeaderCampaignUUID, regexp.MustCompile(`(?m)(?:^` + models.EmailHeaderCampaignUUID + `:\s+?)([a-z0-9\-]{36})`)},
|
||||
{models.EmailHeaderSubscriberUUID, regexp.MustCompile(`(?m)(?:^` + models.EmailHeaderSubscriberUUID + `:\s+?)([a-z0-9\-]{36})`)},
|
||||
{models.EmailHeaderDate, regexp.MustCompile(`(?m)(?:^` + models.EmailHeaderDate + `:\s+?)([\w,\,\ ,:,+,-]*(?:\(?:\w*\))?)`)},
|
||||
{models.EmailHeaderFrom, regexp.MustCompile(`(?m)(?:^` + models.EmailHeaderFrom + `:\s+?)(.*)`)},
|
||||
{models.EmailHeaderSubject, regexp.MustCompile(`(?m)(?:^` + models.EmailHeaderSubject + `:\s+?)(.*)`)},
|
||||
{models.EmailHeaderMessageId, regexp.MustCompile(`(?m)(?:^` + models.EmailHeaderMessageId + `:\s+?)(.*)`)},
|
||||
{models.EmailHeaderDeliveredTo, regexp.MustCompile(`(?m)(?:^` + models.EmailHeaderDeliveredTo + `:\s+?)(.*)`)},
|
||||
}
|
||||
|
||||
reHdrReceived = regexp.MustCompile(`(?m)(?:^` + models.EmailHeaderReceived + `:\s+?)(.*)`)
|
||||
)
|
||||
|
||||
// NewPOP returns a new instance of the POP mailbox client.
|
||||
|
@ -81,29 +98,51 @@ func (p *POP) Scan(limit int, ch chan models.Bounce) error {
|
|||
return err
|
||||
}
|
||||
|
||||
// Check if the identifiers are available in the parsed message.
|
||||
var (
|
||||
campUUID = m.Header.Get(models.EmailHeaderCampaignUUID)
|
||||
subUUID = m.Header.Get(models.EmailHeaderSubscriberUUID)
|
||||
)
|
||||
h := m
|
||||
|
||||
// If they are not, try to extract them from the message body.
|
||||
if campUUID == "" {
|
||||
if u := reCampUUID.FindSubmatch(b.Bytes()); len(u) == 2 {
|
||||
campUUID = string(u[1])
|
||||
}
|
||||
}
|
||||
if subUUID == "" {
|
||||
if u := reSubUUID.FindSubmatch(b.Bytes()); len(u) == 2 {
|
||||
subUUID = string(u[1])
|
||||
// If this is a multipart message, find the last part.
|
||||
if mr := m.MultipartReader(); mr != nil {
|
||||
for {
|
||||
part, err := mr.NextPart()
|
||||
if err == io.EOF {
|
||||
break
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
h = part
|
||||
}
|
||||
}
|
||||
|
||||
if campUUID == "" || subUUID == "" {
|
||||
continue
|
||||
// Reset the "unread portion" pointer of the message buffer.
|
||||
// If you don't do this, you can't read the entire body because the pointer will not point to the beginning.
|
||||
b, _ = c.RetrRaw(id)
|
||||
|
||||
// Lookup headers in the e-mail. If a header isn't found, fall back to regexp lookups.
|
||||
hdr := make(map[string]string, 7)
|
||||
for _, l := range headerLookups {
|
||||
v := h.Header.Get(l.Header)
|
||||
|
||||
// Not in the header. Try regexp.
|
||||
if v == "" {
|
||||
if m := l.Regexp.FindAllSubmatch(b.Bytes(), -1); m != nil {
|
||||
v = string(m[len(m)-1][1])
|
||||
}
|
||||
}
|
||||
|
||||
hdr[l.Header] = strings.TrimSpace(v)
|
||||
}
|
||||
|
||||
date, _ := time.Parse("Mon, 02 Jan 2006 15:04:05 -0700", m.Header.Get("Date"))
|
||||
// Received is a []string header.
|
||||
msgReceived := h.Header.Map()[models.EmailHeaderReceived]
|
||||
if len(msgReceived) == 0 {
|
||||
if u := reHdrReceived.FindAllSubmatch(b.Bytes(), -1); u != nil {
|
||||
for i := 0; i < len(u); i++ {
|
||||
msgReceived = append(msgReceived, string(u[i][1]))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
date, _ := time.Parse("Mon, 02 Jan 2006 15:04:05 -0700", hdr[models.EmailHeaderDate])
|
||||
if date.IsZero() {
|
||||
date = time.Now()
|
||||
}
|
||||
|
@ -116,21 +155,21 @@ func (p *POP) Scan(limit int, ch chan models.Bounce) error {
|
|||
DeliveredTo string `json:"delivered_to"`
|
||||
Received []string `json:"received"`
|
||||
}{
|
||||
From: m.Header.Get("From"),
|
||||
Subject: m.Header.Get("Subject"),
|
||||
MessageID: m.Header.Get("Message-Id"),
|
||||
DeliveredTo: m.Header.Get("Delivered-To"),
|
||||
Received: m.Header.Map()["Received"],
|
||||
From: hdr[models.EmailHeaderFrom],
|
||||
Subject: hdr[models.EmailHeaderSubject],
|
||||
MessageID: hdr[models.EmailHeaderMessageId],
|
||||
DeliveredTo: hdr[models.EmailHeaderDeliveredTo],
|
||||
Received: msgReceived,
|
||||
})
|
||||
|
||||
select {
|
||||
case ch <- models.Bounce{
|
||||
Type: "hard",
|
||||
CampaignUUID: campUUID,
|
||||
SubscriberUUID: subUUID,
|
||||
CampaignUUID: hdr[models.EmailHeaderCampaignUUID],
|
||||
SubscriberUUID: hdr[models.EmailHeaderSubscriberUUID],
|
||||
Source: p.opt.Host,
|
||||
CreatedAt: date,
|
||||
Meta: json.RawMessage(meta),
|
||||
Meta: meta,
|
||||
}:
|
||||
default:
|
||||
}
|
||||
|
|
|
@ -69,6 +69,14 @@ const (
|
|||
EmailHeaderSubscriberUUID = "X-Listmonk-Subscriber"
|
||||
EmailHeaderCampaignUUID = "X-Listmonk-Campaign"
|
||||
|
||||
// Standard e-mail headers.
|
||||
EmailHeaderDate = "Date"
|
||||
EmailHeaderFrom = "From"
|
||||
EmailHeaderSubject = "Subject"
|
||||
EmailHeaderMessageId = "Message-Id"
|
||||
EmailHeaderDeliveredTo = "Delivered-To"
|
||||
EmailHeaderReceived = "Received"
|
||||
|
||||
BounceTypeHard = "hard"
|
||||
BounceTypeSoft = "soft"
|
||||
)
|
||||
|
|
Loading…
Reference in a new issue