8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23 package org.openjdk.skara.mailinglist;
24
25 import org.openjdk.skara.email.*;
26
27 import java.io.*;
28 import java.nio.charset.StandardCharsets;
29 import java.time.format.DateTimeFormatter;
30 import java.util.*;
31 import java.util.function.Function;
32 import java.util.logging.Logger;
33 import java.util.regex.Pattern;
34 import java.util.stream.Collectors;
35
36 public class Mbox {
37 private final static Logger log = Logger.getLogger("org.openjdk.skara.mailinglist");
38
39 private final static Pattern mboxMessagePattern = Pattern.compile(
40 "^(From (?:.(?!^\\R^From ))*)", Pattern.MULTILINE | Pattern.DOTALL);
41 private final static DateTimeFormatter ctimeFormat = DateTimeFormatter.ofPattern(
42 "EEE LLL dd HH:mm:ss yyyy", Locale.US);
43 private final static Pattern fromStringEncodePattern = Pattern.compile("^(>*From )", Pattern.MULTILINE);
44 private final static Pattern fromStringDecodePattern = Pattern.compile("^>(>*From )", Pattern.MULTILINE);
45 private final static Pattern encodeQuotedPrintablePattern = Pattern.compile("([^\\x00-\\x7f]+)");
46 private final static Pattern decodedQuotedPrintablePattern = Pattern.compile("=\\?utf-8\\?b\\?(.*?)\\?=");
47
48 private static List<Email> splitMbox(String mbox) {
49 // Initial split
50 var messages = mboxMessagePattern.matcher(mbox).results()
51 .map(match -> match.group(1))
52 .filter(message -> message.length() > 0)
53 .map(Mbox::decodeFromStrings)
54 .map(Mbox::decodeQuotedPrintable)
55 .collect(Collectors.toList());
56
57 // Pipermail can occasionally fail to encode 'From ' in message bodies, try to handle this
58 var messageBuilder = new StringBuilder();
59 var parsedMails = new ArrayList<Email>();
60 Collections.reverse(messages);
61 for (var message : messages) {
62 messageBuilder.insert(0, message);
63 try {
64 var email = Email.parse(messageBuilder.toString());
65 parsedMails.add(email);
66 messageBuilder.setLength(0);
67 } catch (RuntimeException ignored) {
68 }
69 }
70
71 Collections.reverse(parsedMails);
72 return parsedMails;
73 }
74
75 private static String encodeFromStrings(String body) {
76 var fromStringMatcher = fromStringEncodePattern.matcher(body);
77 return fromStringMatcher.replaceAll(">$1");
78 }
79
80 private static String decodeFromStrings(String body) {
81 var fromStringMatcher = fromStringDecodePattern.matcher(body);
82 return fromStringMatcher.replaceAll("$1");
83 }
84
85 private static String encodeQuotedPrintable(String raw) {
86 var quoteMatcher = encodeQuotedPrintablePattern.matcher(raw);
87 return quoteMatcher.replaceAll(mo -> "=?utf-8?b?" + Base64.getEncoder().encodeToString(String.valueOf(mo.group(1)).getBytes(StandardCharsets.UTF_8)) + "?=");
88 }
89
90 private static String decodeQuotedPrintable(String raw) {
91 var quotedMatcher = decodedQuotedPrintablePattern.matcher(raw);
92 return quotedMatcher.replaceAll(mo -> new String(Base64.getDecoder().decode(mo.group(1)), StandardCharsets.UTF_8));
93 }
94
95 public static List<Conversation> parseMbox(String mbox) {
96 var emails = splitMbox(mbox);
97 var idToMail = emails.stream().collect(Collectors.toMap(Email::id, Function.identity(), (a, b) -> a));
98 var idToConversation = idToMail.values().stream()
99 .filter(email -> !email.hasHeader("In-Reply-To"))
100 .collect(Collectors.toMap(Email::id, Conversation::new));
101
102 for (var email : emails) {
103 if (email.hasHeader("In-Reply-To")) {
104 var inReplyTo = EmailAddress.parse(email.headerValue("In-Reply-To"));
105 if (!idToMail.containsKey(inReplyTo)) {
106 log.info("Can't find parent: " + inReplyTo + " - discarding");
107 } else {
108 var parent = idToMail.get(inReplyTo);
109 if (!idToConversation.containsKey(inReplyTo)) {
110 log.info("Can't find conversation: " + inReplyTo + " - discarding");
111 } else {
112 var conversation = idToConversation.get(inReplyTo);
113 conversation.addReply(parent, email);
114 idToConversation.put(email.id(), conversation);
115 }
116 }
117 }
118 }
119
120 return idToConversation.values().stream()
121 .distinct()
122 .collect(Collectors.toList());
123 }
124
125 public static String fromMail(Email mail) {
126 var mboxString = new StringWriter();
127 var mboxMail = new PrintWriter(mboxString);
128
129 mboxMail.println();
130 mboxMail.println("From " + mail.sender().address() + " " + mail.date().format(ctimeFormat));
131 mboxMail.println("From: " + mail.author().toObfuscatedString());
132 if (!mail.author().equals(mail.sender())) {
133 mboxMail.println("Sender: " + mail.sender().toObfuscatedString());
134 }
135 if (!mail.recipients().isEmpty()) {
136 mboxMail.println("To: " + mail.recipients().stream()
137 .map(EmailAddress::toString)
138 .collect(Collectors.joining(", ")));
139 }
140 mboxMail.println("Date: " + mail.date().format(DateTimeFormatter.RFC_1123_DATE_TIME));
141 mboxMail.println("Subject: " + mail.subject());
142 mboxMail.println("Message-Id: " + mail.id());
143 mail.headers().forEach(header -> mboxMail.println(header + ": " + mail.headerValue(header)));
144 mboxMail.println();
145 mboxMail.println(encodeFromStrings(mail.body()));
146
147 return encodeQuotedPrintable(mboxString.toString());
148 }
149 }
|
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23 package org.openjdk.skara.mailinglist;
24
25 import org.openjdk.skara.email.*;
26
27 import java.io.*;
28 import java.time.format.DateTimeFormatter;
29 import java.util.*;
30 import java.util.function.Function;
31 import java.util.logging.Logger;
32 import java.util.regex.Pattern;
33 import java.util.stream.Collectors;
34
35 public class Mbox {
36 private final static Logger log = Logger.getLogger("org.openjdk.skara.mailinglist");
37
38 private final static Pattern mboxMessagePattern = Pattern.compile(
39 "^(From (?:.(?!^\\R^From ))*)", Pattern.MULTILINE | Pattern.DOTALL);
40 private final static DateTimeFormatter ctimeFormat = DateTimeFormatter.ofPattern(
41 "EEE LLL dd HH:mm:ss yyyy", Locale.US);
42 private final static Pattern fromStringEncodePattern = Pattern.compile("^(>*From )", Pattern.MULTILINE);
43 private final static Pattern fromStringDecodePattern = Pattern.compile("^>(>*From )", Pattern.MULTILINE);
44
45 private static List<Email> splitMbox(String mbox) {
46 // Initial split
47 var messages = mboxMessagePattern.matcher(mbox).results()
48 .map(match -> match.group(1))
49 .filter(message -> message.length() > 0)
50 .map(Mbox::decodeFromStrings)
51 .collect(Collectors.toList());
52
53 // Pipermail can occasionally fail to encode 'From ' in message bodies, try to handle this
54 var messageBuilder = new StringBuilder();
55 var parsedMails = new ArrayList<Email>();
56 Collections.reverse(messages);
57 for (var message : messages) {
58 messageBuilder.insert(0, message);
59 try {
60 var email = Email.parse(messageBuilder.toString());
61 parsedMails.add(email);
62 messageBuilder.setLength(0);
63 } catch (RuntimeException ignored) {
64 }
65 }
66
67 Collections.reverse(parsedMails);
68 return parsedMails;
69 }
70
71 private static String encodeFromStrings(String body) {
72 var fromStringMatcher = fromStringEncodePattern.matcher(body);
73 return fromStringMatcher.replaceAll(">$1");
74 }
75
76 private static String decodeFromStrings(String body) {
77 var fromStringMatcher = fromStringDecodePattern.matcher(body);
78 return fromStringMatcher.replaceAll("$1");
79 }
80
81 public static List<Conversation> parseMbox(String mbox) {
82 var emails = splitMbox(mbox);
83 var idToMail = emails.stream().collect(Collectors.toMap(Email::id, Function.identity(), (a, b) -> a));
84 var idToConversation = idToMail.values().stream()
85 .filter(email -> !email.hasHeader("In-Reply-To"))
86 .collect(Collectors.toMap(Email::id, Conversation::new));
87
88 for (var email : emails) {
89 if (email.hasHeader("In-Reply-To")) {
90 var inReplyTo = EmailAddress.parse(email.headerValue("In-Reply-To"));
91 if (!idToMail.containsKey(inReplyTo)) {
92 log.info("Can't find parent: " + inReplyTo + " - discarding");
93 } else {
94 var parent = idToMail.get(inReplyTo);
95 if (!idToConversation.containsKey(inReplyTo)) {
96 log.info("Can't find conversation: " + inReplyTo + " - discarding");
97 } else {
98 var conversation = idToConversation.get(inReplyTo);
99 conversation.addReply(parent, email);
100 idToConversation.put(email.id(), conversation);
101 }
102 }
103 }
104 }
105
106 return idToConversation.values().stream()
107 .distinct()
108 .collect(Collectors.toList());
109 }
110
111 public static String fromMail(Email mail) {
112 var mboxString = new StringWriter();
113 var mboxMail = new PrintWriter(mboxString);
114
115 mboxMail.println();
116 mboxMail.println("From " + mail.sender().address() + " " + mail.date().format(ctimeFormat));
117 mboxMail.println("From: " + MimeText.encode(mail.author().toObfuscatedString()));
118 if (!mail.author().equals(mail.sender())) {
119 mboxMail.println("Sender: " + MimeText.encode(mail.sender().toObfuscatedString()));
120 }
121 if (!mail.recipients().isEmpty()) {
122 mboxMail.println("To: " + mail.recipients().stream()
123 .map(EmailAddress::toString)
124 .map(MimeText::encode)
125 .collect(Collectors.joining(", ")));
126 }
127 mboxMail.println("Date: " + mail.date().format(DateTimeFormatter.RFC_1123_DATE_TIME));
128 mboxMail.println("Subject: " + MimeText.encode(mail.subject()));
129 mboxMail.println("Message-Id: " + mail.id());
130 mail.headers().forEach(header -> mboxMail.println(header + ": " + MimeText.encode(mail.headerValue(header))));
131 mboxMail.println();
132 mboxMail.println(encodeFromStrings(MimeText.encode(mail.body())));
133
134 return mboxString.toString();
135 }
136 }
|