RemoveMalformedComments.java

package pro.verron.officestamper.preset.preprocessors.malformedcomments;

import org.docx4j.TraversalUtil;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.WordprocessingML.CommentsPart;
import org.docx4j.wml.*;
import org.jvnet.jaxb2_commons.ppp.Child;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pro.verron.officestamper.api.OfficeStamperException;
import pro.verron.officestamper.api.PreProcessor;
import pro.verron.officestamper.utils.WmlUtils;

import java.math.BigInteger;
import java.util.*;

import static java.util.stream.Collectors.toSet;

public class RemoveMalformedComments
        implements PreProcessor {
    private static final Logger log = LoggerFactory.getLogger(RemoveMalformedComments.class);

    @Override public void process(WordprocessingMLPackage document) {
        var commentElements = WmlUtils.extractCommentElements(document);

        var commentIds = new ArrayList<BigInteger>(commentElements.size());
        var openedCommentsIds = new ArrayDeque<BigInteger>();
        for (Child commentElement : commentElements) {
            if (commentElement instanceof CommentRangeStart crs) {
                var lastOpenedCommentId = crs.getId();
                assert lastOpenedCommentId != null;
                log.debug("Comment {} opened.", lastOpenedCommentId);
                commentIds.add(lastOpenedCommentId);
                openedCommentsIds.add(lastOpenedCommentId);
            }
            else if (commentElement instanceof CommentRangeEnd cre) {
                var lastClosedCommentId = cre.getId();
                assert lastClosedCommentId != null;
                log.debug("Comment {} closed.", lastClosedCommentId);
                commentIds.add(lastClosedCommentId);

                var lastOpenedCommentId = openedCommentsIds.pollLast();
                if (!lastClosedCommentId.equals(lastOpenedCommentId)) {
                    log.debug("Comment {} is closing just after comment {} starts.",
                            lastClosedCommentId,
                            lastOpenedCommentId);
                    throw new OfficeStamperException("Cannot figure which comment contains the other !");
                }
            }
            else if (commentElement instanceof R.CommentReference cr) {
                var commentId = cr.getId();
                assert commentId != null;
                log.debug("Comment {} referenced.", commentId);
                commentIds.add(commentId);
            }
        }

        log.debug("These comments have been opened, but never closed: {}", openedCommentsIds);
        var malformedCommentIds = new ArrayList<>(openedCommentsIds);

        var mainDocumentPart = document.getMainDocumentPart();
        Set<BigInteger> writtenCommentsId = Optional.ofNullable(mainDocumentPart.getCommentsPart())
                                                    .map(RemoveMalformedComments::tryGetCommentsPart)
                                                    .map(Comments::getComment)
                                                    .orElse(Collections.emptyList())
                                                    .stream()
                                                    .filter(c -> !isEmpty(c))
                                                    .map(CTMarkup::getId)
                                                    .collect(toSet());

        commentIds.removeAll(writtenCommentsId);

        log.debug("These comments have been referenced in body, but have no related content: {}", commentIds);
        malformedCommentIds.addAll(commentIds);

        var crVisitor = new CommentReferenceRemoverVisitor(malformedCommentIds);
        var crsVisitor = new CommentRangeStartRemoverVisitor(malformedCommentIds);
        var creVisitor = new CommentRangeEndRemoverVisitor(malformedCommentIds);
        TraversalUtil.visit(document, true, List.of(crVisitor, crsVisitor, creVisitor));
        crVisitor.run();
        crsVisitor.run();
        creVisitor.run();
    }

    private static Comments tryGetCommentsPart(CommentsPart commentsPart) {
        try {
            return commentsPart.getContents();
        } catch (Docx4JException e) {
            throw new OfficeStamperException(e);
        }
    }

    private static boolean isEmpty(Comments.Comment c) {
        var content = c.getContent();
        return content == null || content.isEmpty();
    }

}