MergeSameStyleRuns.java
package pro.verron.officestamper.preset.preprocessors.similarrun;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.wml.ContentAccessor;
import pro.verron.officestamper.api.PreProcessor;
import pro.verron.officestamper.utils.wml.WmlUtils;
import java.util.LinkedHashSet;
/// Merges consecutive runs with the same styling into a single run.
///
/// This preprocessor analyzes the document and identifies adjacent runs that share identical styling properties. It
/// then merges these runs into a single run to reduce document complexity and improve processing efficiency.
///
/// The merging process preserves all content from the original runs while maintaining the formatting of the first run
/// in each sequence of similar runs.
///
/// @author Joseph Verron
public class MergeSameStyleRuns
implements PreProcessor {
@Override
public void process(WordprocessingMLPackage document) {
var visitor = new SimilarRunVisitor();
WmlUtils.visitDocument(document, visitor);
for (var similarStyleRuns : visitor.getSimilarStyleRuns()) {
var firstRun = similarStyleRuns.getFirst();
var runContent = firstRun.getContent();
var firstRunContent = new LinkedHashSet<>(runContent);
var firstRunParentContent = ((ContentAccessor) firstRun.getParent()).getContent();
for (var r : similarStyleRuns.subList(1, similarStyleRuns.size())) {
firstRunParentContent.remove(r);
firstRunContent.addAll(r.getContent());
}
runContent.clear();
runContent.addAll(firstRunContent);
}
}
}