| 1 | package pro.verron.officestamper.preset.preprocessors.malformedcomments; | |
| 2 | ||
| 3 | import org.docx4j.TraversalUtil; | |
| 4 | import org.docx4j.openpackaging.exceptions.Docx4JException; | |
| 5 | import org.docx4j.openpackaging.packages.WordprocessingMLPackage; | |
| 6 | import org.docx4j.openpackaging.parts.WordprocessingML.CommentsPart; | |
| 7 | import org.docx4j.wml.*; | |
| 8 | import org.jvnet.jaxb2_commons.ppp.Child; | |
| 9 | import org.slf4j.Logger; | |
| 10 | import org.slf4j.LoggerFactory; | |
| 11 | import pro.verron.officestamper.api.OfficeStamperException; | |
| 12 | import pro.verron.officestamper.api.PreProcessor; | |
| 13 | import pro.verron.officestamper.utils.wml.WmlUtils; | |
| 14 | ||
| 15 | import java.math.BigInteger; | |
| 16 | import java.util.ArrayDeque; | |
| 17 | import java.util.ArrayList; | |
| 18 | import java.util.List; | |
| 19 | import java.util.Optional; | |
| 20 | ||
| 21 | import static java.util.Collections.emptyList; | |
| 22 | import static java.util.stream.Collectors.toSet; | |
| 23 | ||
| 24 | /// This pre-processor removes malformed comments from a WordprocessingMLPackage document. | |
| 25 | /// | |
| 26 | /// Malformed comments are those that: | |
| 27 | /// | |
| 28 | /// - Are opened but never closed (unbalanced comments) | |
| 29 | /// - Are referenced in the document body but have no corresponding comment content | |
| 30 | /// | |
| 31 | /// | |
| 32 | /// The processor traverses all comment-related elements in the document, validates their structure, and removes any | |
| 33 | /// malformed comment references, range starts, and range ends. | |
| 34 | /// | |
| 35 | /// @author Joseph Verron | |
| 36 | /// @version ${version} | |
| 37 | public class RemoveMalformedComments | |
| 38 | implements PreProcessor { | |
| 39 | private static final Logger log = LoggerFactory.getLogger(RemoveMalformedComments.class); | |
| 40 | ||
| 41 | @Override | |
| 42 | public void process(WordprocessingMLPackage document) { | |
| 43 | var commentElements = WmlUtils.extractCommentElements(document); | |
| 44 | ||
| 45 | var commentIds = new ArrayList<BigInteger>(commentElements.size()); | |
| 46 | var openedCommentsIds = new ArrayDeque<BigInteger>(); | |
| 47 | for (Child commentElement : commentElements) { | |
| 48 | switch (commentElement) { | |
| 49 | case CommentRangeStart crs -> { | |
| 50 | var lastOpenedCommentId = crs.getId(); | |
| 51 | assert lastOpenedCommentId != null; | |
| 52 | log.trace("Comment {} opened.", lastOpenedCommentId); | |
| 53 | commentIds.add(lastOpenedCommentId); | |
| 54 | openedCommentsIds.add(lastOpenedCommentId); | |
| 55 | } | |
| 56 | case CommentRangeEnd cre -> { | |
| 57 | var lastClosedCommentId = cre.getId(); | |
| 58 | assert lastClosedCommentId != null; | |
| 59 | log.trace("Comment {} closed.", lastClosedCommentId); | |
| 60 | commentIds.add(lastClosedCommentId); | |
| 61 | ||
| 62 | var lastOpenedCommentId = openedCommentsIds.pollLast(); | |
| 63 |
1
1. process : negated conditional → KILLED |
if (!lastClosedCommentId.equals(lastOpenedCommentId)) { |
| 64 | log.debug("Comment {} is closing just after comment {} starts.", | |
| 65 | lastClosedCommentId, | |
| 66 | lastOpenedCommentId); | |
| 67 | throw new OfficeStamperException("Cannot figure which comment contains the other !"); | |
| 68 | } | |
| 69 | } | |
| 70 | case R.CommentReference cr -> { | |
| 71 | var commentId = cr.getId(); | |
| 72 | assert commentId != null; | |
| 73 | log.trace("Comment {} referenced.", commentId); | |
| 74 | commentIds.add(commentId); | |
| 75 | } | |
| 76 | default -> { /* Do Nothing */ } | |
| 77 | } | |
| 78 | } | |
| 79 | ||
| 80 | log.debug("These comments have been opened, but never closed: {}", openedCommentsIds); | |
| 81 | var malformedCommentIds = new ArrayList<>(openedCommentsIds); | |
| 82 | ||
| 83 | var mainDocumentPart = document.getMainDocumentPart(); | |
| 84 | var writtenCommentsId = Optional.ofNullable(mainDocumentPart.getCommentsPart()) | |
| 85 | .map(RemoveMalformedComments::tryGetCommentsPart) | |
| 86 | .map(Comments::getComment) | |
| 87 | .orElse(emptyList()) | |
| 88 | .stream() | |
| 89 |
2
1. lambda$process$0 : replaced boolean return with true for pro/verron/officestamper/preset/preprocessors/malformedcomments/RemoveMalformedComments::lambda$process$0 → TIMED_OUT 2. lambda$process$0 : negated conditional → KILLED |
.filter(c -> !isEmpty(c)) |
| 90 | .map(CTMarkup::getId) | |
| 91 | .collect(toSet()); | |
| 92 | ||
| 93 | commentIds.removeAll(writtenCommentsId); | |
| 94 | ||
| 95 | if (!commentIds.isEmpty()) log.debug("Comments referenced in body, without related content: {}", commentIds); | |
| 96 | malformedCommentIds.addAll(commentIds); | |
| 97 | ||
| 98 | var crVisitor = new CommentReferenceRemoverVisitor(malformedCommentIds); | |
| 99 | var crsVisitor = new CommentRangeStartRemoverVisitor(malformedCommentIds); | |
| 100 | var creVisitor = new CommentRangeEndRemoverVisitor(malformedCommentIds); | |
| 101 |
1
1. process : removed call to org/docx4j/TraversalUtil::visit → TIMED_OUT |
TraversalUtil.visit(document, true, List.of(crVisitor, crsVisitor, creVisitor)); |
| 102 |
1
1. process : removed call to pro/verron/officestamper/preset/preprocessors/malformedcomments/CommentReferenceRemoverVisitor::run → TIMED_OUT |
crVisitor.run(); |
| 103 |
1
1. process : removed call to pro/verron/officestamper/preset/preprocessors/malformedcomments/CommentRangeStartRemoverVisitor::run → TIMED_OUT |
crsVisitor.run(); |
| 104 |
1
1. process : removed call to pro/verron/officestamper/preset/preprocessors/malformedcomments/CommentRangeEndRemoverVisitor::run → TIMED_OUT |
creVisitor.run(); |
| 105 | } | |
| 106 | ||
| 107 | private static Comments tryGetCommentsPart(CommentsPart commentsPart) { | |
| 108 | try { | |
| 109 |
1
1. tryGetCommentsPart : replaced return value with null for pro/verron/officestamper/preset/preprocessors/malformedcomments/RemoveMalformedComments::tryGetCommentsPart → KILLED |
return commentsPart.getContents(); |
| 110 | } catch (Docx4JException e) { | |
| 111 | throw new OfficeStamperException(e); | |
| 112 | } | |
| 113 | } | |
| 114 | ||
| 115 | private static boolean isEmpty(Comments.Comment c) { | |
| 116 | var content = c.getContent(); | |
| 117 |
3
1. isEmpty : negated conditional → KILLED 2. isEmpty : replaced boolean return with true for pro/verron/officestamper/preset/preprocessors/malformedcomments/RemoveMalformedComments::isEmpty → KILLED 3. isEmpty : negated conditional → KILLED |
return content == null || content.isEmpty(); |
| 118 | } | |
| 119 | } | |
Mutations | ||
| 63 |
1.1 |
|
| 89 |
1.1 2.2 |
|
| 101 |
1.1 |
|
| 102 |
1.1 |
|
| 103 |
1.1 |
|
| 104 |
1.1 |
|
| 109 |
1.1 |
|
| 117 |
1.1 2.2 3.3 |