1 /* 2 * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 package org.openjdk.skara.mailinglist; 24 25 import org.openjdk.skara.email.*; 26 27 import java.io.*; 28 import java.nio.charset.StandardCharsets; 29 import java.time.format.DateTimeFormatter; 30 import java.util.*; 31 import java.util.function.Function; 32 import java.util.logging.Logger; 33 import java.util.regex.Pattern; 34 import java.util.stream.Collectors; 35 36 public class Mbox { 37 private final static Logger log = Logger.getLogger("org.openjdk.skara.mailinglist"); 38 39 private final static Pattern mboxMessagePattern = Pattern.compile( 40 "^(From (?:.(?!^\\R^From ))*)", Pattern.MULTILINE | Pattern.DOTALL); 41 private final static DateTimeFormatter ctimeFormat = DateTimeFormatter.ofPattern( 42 "EEE LLL dd HH:mm:ss yyyy", Locale.US); 43 private final static Pattern fromStringEncodePattern = Pattern.compile("^(>*From )", Pattern.MULTILINE); 44 private final static Pattern fromStringDecodePattern = Pattern.compile("^>(>*From )", Pattern.MULTILINE); 45 private final static Pattern encodeQuotedPrintablePattern = Pattern.compile("([^\\x00-\\x7f]+)"); 46 private final static Pattern decodedQuotedPrintablePattern = Pattern.compile("=\\?utf-8\\?b\\?(.*?)\\?="); 47 48 private static List<Email> splitMbox(String mbox) { 49 // Initial split 50 var messages = mboxMessagePattern.matcher(mbox).results() 51 .map(match -> match.group(1)) 52 .filter(message -> message.length() > 0) 53 .map(Mbox::decodeFromStrings) 54 .map(Mbox::decodeQuotedPrintable) 55 .collect(Collectors.toList()); 56 57 // Pipermail can occasionally fail to encode 'From ' in message bodies, try to handle this 58 var messageBuilder = new StringBuilder(); 59 var parsedMails = new ArrayList<Email>(); 60 Collections.reverse(messages); 61 for (var message : messages) { 62 messageBuilder.insert(0, message); 63 try { 64 var email = Email.parse(messageBuilder.toString()); 65 parsedMails.add(email); 66 messageBuilder.setLength(0); 67 } catch (RuntimeException ignored) { 68 } 69 } 70 71 Collections.reverse(parsedMails); 72 return parsedMails; 73 } 74 75 private static String encodeFromStrings(String body) { 76 var fromStringMatcher = fromStringEncodePattern.matcher(body); 77 return fromStringMatcher.replaceAll(">$1"); 78 } 79 80 private static String decodeFromStrings(String body) { 81 var fromStringMatcher = fromStringDecodePattern.matcher(body); 82 return fromStringMatcher.replaceAll("$1"); 83 } 84 85 private static String encodeQuotedPrintable(String raw) { 86 var quoteMatcher = encodeQuotedPrintablePattern.matcher(raw); 87 return quoteMatcher.replaceAll(mo -> "=?utf-8?b?" + Base64.getEncoder().encodeToString(String.valueOf(mo.group(1)).getBytes(StandardCharsets.UTF_8)) + "?="); 88 } 89 90 private static String decodeQuotedPrintable(String raw) { 91 var quotedMatcher = decodedQuotedPrintablePattern.matcher(raw); 92 return quotedMatcher.replaceAll(mo -> new String(Base64.getDecoder().decode(mo.group(1)), StandardCharsets.UTF_8)); 93 } 94 95 public static List<Conversation> parseMbox(String mbox) { 96 var emails = splitMbox(mbox); 97 var idToMail = emails.stream().collect(Collectors.toMap(Email::id, Function.identity(), (a, b) -> a)); 98 var idToConversation = idToMail.values().stream() 99 .filter(email -> !email.hasHeader("In-Reply-To")) 100 .collect(Collectors.toMap(Email::id, Conversation::new)); 101 102 for (var email : emails) { 103 if (email.hasHeader("In-Reply-To")) { 104 var inReplyTo = EmailAddress.parse(email.headerValue("In-Reply-To")); 105 if (!idToMail.containsKey(inReplyTo)) { 106 log.info("Can't find parent: " + inReplyTo + " - discarding"); 107 } else { 108 var parent = idToMail.get(inReplyTo); 109 if (!idToConversation.containsKey(inReplyTo)) { 110 log.info("Can't find conversation: " + inReplyTo + " - discarding"); 111 } else { 112 var conversation = idToConversation.get(inReplyTo); 113 conversation.addReply(parent, email); 114 idToConversation.put(email.id(), conversation); 115 } 116 } 117 } 118 } 119 120 return idToConversation.values().stream() 121 .distinct() 122 .collect(Collectors.toList()); 123 } 124 125 public static String fromMail(Email mail) { 126 var mboxString = new StringWriter(); 127 var mboxMail = new PrintWriter(mboxString); 128 129 mboxMail.println(); 130 mboxMail.println("From " + mail.sender().address() + " " + mail.date().format(ctimeFormat)); 131 mboxMail.println("From: " + mail.author().toObfuscatedString()); 132 if (!mail.author().equals(mail.sender())) { 133 mboxMail.println("Sender: " + mail.sender().toObfuscatedString()); 134 } 135 if (!mail.recipients().isEmpty()) { 136 mboxMail.println("To: " + mail.recipients().stream() 137 .map(EmailAddress::toString) 138 .collect(Collectors.joining(", "))); 139 } 140 mboxMail.println("Date: " + mail.date().format(DateTimeFormatter.RFC_1123_DATE_TIME)); 141 mboxMail.println("Subject: " + mail.subject()); 142 mboxMail.println("Message-Id: " + mail.id()); 143 mail.headers().forEach(header -> mboxMail.println(header + ": " + mail.headerValue(header))); 144 mboxMail.println(); 145 mboxMail.println(encodeFromStrings(mail.body())); 146 147 return encodeQuotedPrintable(mboxString.toString()); 148 } 149 }