1 /*
  2  * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  */
 23 package org.openjdk.skara.mailinglist;
 24 
 25 import org.openjdk.skara.email.*;
 26 
 27 import java.io.*;
 28 import java.time.format.DateTimeFormatter;
 29 import java.util.*;
 30 import java.util.function.Function;
 31 import java.util.logging.Logger;
 32 import java.util.regex.Pattern;
 33 import java.util.stream.Collectors;
 34 
 35 public class Mbox {
 36     private final static Logger log = Logger.getLogger("org.openjdk.skara.mailinglist");
 37 
 38     private final static Pattern mboxMessagePattern = Pattern.compile(
 39             "^(From (?:.(?!^\\R^From ))*)", Pattern.MULTILINE | Pattern.DOTALL);
 40     private final static DateTimeFormatter ctimeFormat = DateTimeFormatter.ofPattern(
 41             "EEE LLL dd HH:mm:ss yyyy", Locale.US);
 42     private final static Pattern fromStringEncodePattern = Pattern.compile("^(>*From )", Pattern.MULTILINE);
 43     private final static Pattern fromStringDecodePattern = Pattern.compile("^>(>*From )", Pattern.MULTILINE);
 44 
 45     private static List<Email> splitMbox(String mbox) {
 46         // Initial split
 47         var messages = mboxMessagePattern.matcher(mbox).results()
 48                                          .map(match -> match.group(1))
 49                                          .filter(message -> message.length() > 0)
 50                                          .map(Mbox::decodeFromStrings)
 51                                          .collect(Collectors.toList());
 52 
 53         // Pipermail can occasionally fail to encode 'From ' in message bodies, try to handle this
 54         var messageBuilder = new StringBuilder();
 55         var parsedMails = new ArrayList<Email>();
 56         Collections.reverse(messages);
 57         for (var message : messages) {
 58             messageBuilder.insert(0, message);
 59             try {
 60                 var email = Email.parse(messageBuilder.toString());
 61                 parsedMails.add(email);
 62                 messageBuilder.setLength(0);
 63             } catch (RuntimeException ignored) {
 64             }
 65         }
 66 
 67         Collections.reverse(parsedMails);
 68         return parsedMails;
 69     }
 70 
 71     private static String encodeFromStrings(String body) {
 72         var fromStringMatcher = fromStringEncodePattern.matcher(body);
 73         return fromStringMatcher.replaceAll(">$1");
 74     }
 75 
 76     private static String decodeFromStrings(String body) {
 77         var fromStringMatcher = fromStringDecodePattern.matcher(body);
 78         return fromStringMatcher.replaceAll("$1");
 79     }
 80 
 81     public static List<Conversation> parseMbox(String mbox) {
 82         var emails = splitMbox(mbox);
 83         var idToMail = emails.stream().collect(Collectors.toMap(Email::id, Function.identity(), (a, b) -> a));
 84         var idToConversation = idToMail.values().stream()
 85                                        .filter(email -> !email.hasHeader("In-Reply-To"))
 86                                        .collect(Collectors.toMap(Email::id, Conversation::new));
 87 
 88         for (var email : emails) {
 89             if (email.hasHeader("In-Reply-To")) {
 90                 var inReplyTo = EmailAddress.parse(email.headerValue("In-Reply-To"));
 91                 if (!idToMail.containsKey(inReplyTo)) {
 92                     log.info("Can't find parent: " + inReplyTo + " - discarding");
 93                 } else {
 94                     var parent = idToMail.get(inReplyTo);
 95                     if (!idToConversation.containsKey(inReplyTo)) {
 96                         log.info("Can't find conversation: " + inReplyTo + " - discarding");
 97                     } else {
 98                         var conversation = idToConversation.get(inReplyTo);
 99                         conversation.addReply(parent, email);
100                         idToConversation.put(email.id(), conversation);
101                     }
102                 }
103             }
104         }
105 
106         return idToConversation.values().stream()
107                                .distinct()
108                                .collect(Collectors.toList());
109     }
110 
111     public static String fromMail(Email mail) {
112         var mboxString = new StringWriter();
113         var mboxMail = new PrintWriter(mboxString);
114 
115         mboxMail.println();
116         mboxMail.println("From " + mail.sender().address() + "  " + mail.date().format(ctimeFormat));
117         mboxMail.println("From: " + MimeText.encode(mail.author().toObfuscatedString()));
118         if (!mail.author().equals(mail.sender())) {
119             mboxMail.println("Sender: " + MimeText.encode(mail.sender().toObfuscatedString()));
120         }
121         if (!mail.recipients().isEmpty()) {
122             mboxMail.println("To: " + mail.recipients().stream()
123                                           .map(EmailAddress::toString)
124                                           .map(MimeText::encode)
125                                           .collect(Collectors.joining(", ")));
126         }
127         mboxMail.println("Date: " + mail.date().format(DateTimeFormatter.RFC_1123_DATE_TIME));
128         mboxMail.println("Subject: " + MimeText.encode(mail.subject()));
129         mboxMail.println("Message-Id: " + mail.id());
130         mail.headers().forEach(header -> mboxMail.println(header + ": " + MimeText.encode(mail.headerValue(header))));
131         mboxMail.println();
132         mboxMail.println(encodeFromStrings(MimeText.encode(mail.body())));
133 
134         return mboxString.toString();
135     }
136 }