1 /* 2 * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 package org.openjdk.skara.mailinglist; 24 25 import org.openjdk.skara.email.*; 26 27 import java.io.*; 28 import java.time.format.DateTimeFormatter; 29 import java.util.*; 30 import java.util.function.Function; 31 import java.util.logging.Logger; 32 import java.util.regex.Pattern; 33 import java.util.stream.Collectors; 34 35 public class Mbox { 36 private final static Logger log = Logger.getLogger("org.openjdk.skara.mailinglist"); 37 38 private final static Pattern mboxMessagePattern = Pattern.compile( 39 "^(From (?:.(?!^\\R^From ))*)", Pattern.MULTILINE | Pattern.DOTALL); 40 private final static DateTimeFormatter ctimeFormat = DateTimeFormatter.ofPattern( 41 "EEE LLL dd HH:mm:ss yyyy", Locale.US); 42 private final static Pattern fromStringEncodePattern = Pattern.compile("^(>*From )", Pattern.MULTILINE); 43 private final static Pattern fromStringDecodePattern = Pattern.compile("^>(>*From )", Pattern.MULTILINE); 44 45 private static List<Email> splitMbox(String mbox) { 46 // Initial split 47 var messages = mboxMessagePattern.matcher(mbox).results() 48 .map(match -> match.group(1)) 49 .filter(message -> message.length() > 0) 50 .map(Mbox::decodeFromStrings) 51 .collect(Collectors.toList()); 52 53 // Pipermail can occasionally fail to encode 'From ' in message bodies, try to handle this 54 var messageBuilder = new StringBuilder(); 55 var parsedMails = new ArrayList<Email>(); 56 Collections.reverse(messages); 57 for (var message : messages) { 58 messageBuilder.insert(0, message); 59 try { 60 var email = Email.parse(messageBuilder.toString()); 61 parsedMails.add(email); 62 messageBuilder.setLength(0); 63 } catch (RuntimeException ignored) { 64 } 65 } 66 67 Collections.reverse(parsedMails); 68 return parsedMails; 69 } 70 71 private static String encodeFromStrings(String body) { 72 var fromStringMatcher = fromStringEncodePattern.matcher(body); 73 return fromStringMatcher.replaceAll(">$1"); 74 } 75 76 private static String decodeFromStrings(String body) { 77 var fromStringMatcher = fromStringDecodePattern.matcher(body); 78 return fromStringMatcher.replaceAll("$1"); 79 } 80 81 public static List<Conversation> parseMbox(String mbox) { 82 var emails = splitMbox(mbox); 83 var idToMail = emails.stream().collect(Collectors.toMap(Email::id, Function.identity(), (a, b) -> a)); 84 var idToConversation = idToMail.values().stream() 85 .filter(email -> !email.hasHeader("In-Reply-To")) 86 .collect(Collectors.toMap(Email::id, Conversation::new)); 87 88 for (var email : emails) { 89 if (email.hasHeader("In-Reply-To")) { 90 var inReplyTo = EmailAddress.parse(email.headerValue("In-Reply-To")); 91 if (!idToMail.containsKey(inReplyTo)) { 92 log.info("Can't find parent: " + inReplyTo + " - discarding"); 93 } else { 94 var parent = idToMail.get(inReplyTo); 95 if (!idToConversation.containsKey(inReplyTo)) { 96 log.info("Can't find conversation: " + inReplyTo + " - discarding"); 97 } else { 98 var conversation = idToConversation.get(inReplyTo); 99 conversation.addReply(parent, email); 100 idToConversation.put(email.id(), conversation); 101 } 102 } 103 } 104 } 105 106 return idToConversation.values().stream() 107 .distinct() 108 .collect(Collectors.toList()); 109 } 110 111 public static String fromMail(Email mail) { 112 var mboxString = new StringWriter(); 113 var mboxMail = new PrintWriter(mboxString); 114 115 mboxMail.println(); 116 mboxMail.println("From " + mail.sender().address() + " " + mail.date().format(ctimeFormat)); 117 mboxMail.println("From: " + MimeText.encode(mail.author().toObfuscatedString())); 118 if (!mail.author().equals(mail.sender())) { 119 mboxMail.println("Sender: " + MimeText.encode(mail.sender().toObfuscatedString())); 120 } 121 if (!mail.recipients().isEmpty()) { 122 mboxMail.println("To: " + mail.recipients().stream() 123 .map(EmailAddress::toString) 124 .map(MimeText::encode) 125 .collect(Collectors.joining(", "))); 126 } 127 mboxMail.println("Date: " + mail.date().format(DateTimeFormatter.RFC_1123_DATE_TIME)); 128 mboxMail.println("Subject: " + MimeText.encode(mail.subject())); 129 mboxMail.println("Message-Id: " + mail.id()); 130 mail.headers().forEach(header -> mboxMail.println(header + ": " + MimeText.encode(mail.headerValue(header)))); 131 mboxMail.println(); 132 mboxMail.println(encodeFromStrings(MimeText.encode(mail.body()))); 133 134 return mboxString.toString(); 135 } 136 }