DocxImageExtractor.java

package pro.verron.officestamper.core;

import jakarta.xml.bind.JAXBElement;
import org.docx4j.dml.Graphic;
import org.docx4j.dml.picture.Pic;
import org.docx4j.dml.wordprocessingDrawing.Inline;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.wml.Drawing;
import org.docx4j.wml.R;
import pro.verron.officestamper.api.OfficeStamperException;

import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;

/**
 * Extracts images from a docx document.
 *
 * @author Joseph Verron
 * @version ${version}
 * @since 1.4.7
 */
public class DocxImageExtractor {

    private final WordprocessingMLPackage wordprocessingMLPackage;

    /**
     * Creates a new image extractor for the given docx document.
     *
     * @param wordprocessingMLPackage the docx document to extract images from.
     */
    public DocxImageExtractor(WordprocessingMLPackage wordprocessingMLPackage) {
        this.wordprocessingMLPackage = wordprocessingMLPackage;
    }

    /**
     * Extract an image bytes from an embedded image run.
     *
     * @param run run containing the embedded drawing.
     */
    byte[] getRunDrawingData(R run) {
        String imageRelId = getPic(run).getBlipFill()
                                       .getBlip()
                                       .getEmbed();
        String imageRelPartName = getImageRelPartName(imageRelId);
        long size = getImageSize(imageRelPartName);
        InputStream stream = getImageStream(imageRelPartName);
        return streamToByteArray(size, stream);
    }

    private static Pic getPic(R run) {
        for (Object runContent : run.getContent()) {
            if (!(runContent instanceof JAXBElement<?> runElement)) break;
            if (!(runElement.getValue() instanceof Drawing drawing)) break;
            Graphic graphic = getInlineGraphic(drawing);
            return graphic.getGraphicData()
                          .getPic();
        }
        throw new OfficeStamperException("Run drawing not found !");
    }

    private String getImageRelPartName(String imageRelId) {
        // TODO: find a better way to find image rel part name in source part store
        return wordprocessingMLPackage.getMainDocumentPart()
                                      .getRelationshipsPart()
                                      .getPart(imageRelId)
                                      .getPartName()
                                      .getName()
                                      .substring(1);
    }

    private long getImageSize(String imageRelPartName) {
        try {
            return wordprocessingMLPackage.getSourcePartStore()
                                          .getPartSize(imageRelPartName);
        } catch (Docx4JException e) {
            throw new OfficeStamperException(e);
        }
    }

    private InputStream getImageStream(String imageRelPartName) {
        try {
            return wordprocessingMLPackage.getSourcePartStore()
                                          .loadPart(imageRelPartName);
        } catch (Docx4JException e) {
            throw new OfficeStamperException(e);
        }
    }

    /**
     * Converts an InputStream to a byte array.
     *
     * @param size expected size of the byte array.
     * @param is   input stream to read data from.
     *
     * @return the data from the input stream.
     */
    private static byte[] streamToByteArray(long size, InputStream is) {
        if (size > Integer.MAX_VALUE) throw new OfficeStamperException("Image size exceeds maximum allowed (2GB)");

        int intSize = (int) size;
        byte[] data = new byte[intSize];
        int numRead = tryRead(is, data);
        return Arrays.copyOfRange(data, 0, numRead);
    }

    /**
     * Extract an inline graphic from a drawing.
     *
     * @param drawing the drawing containing the graphic.
     */
    private static Graphic getInlineGraphic(Drawing drawing) {
        var anchorOrInline = drawing.getAnchorOrInline();
        if (anchorOrInline.isEmpty()) throw new OfficeStamperException("Anchor or Inline is empty !");
        if (anchorOrInline.getFirst() instanceof Inline inline) return inline.getGraphic();
        throw new OfficeStamperException("Don't know how to process anchor !");
    }

    private static int tryRead(InputStream is, byte[] data) {
        try {
            return is.read(data);
        } catch (IOException e) {
            throw new OfficeStamperException(e);
        }
    }

    /**
     * Extract the name of the image from an embedded image run.
     *
     * @param run run containing the embedded drawing.
     *
     * @return a {@link String} object
     */
    public String getRunDrawingFilename(R run) {
        return getPic(run).getNvPicPr()
                          .getCNvPr()
                          .getName();
    }

    /**
     * Extract the content type of the image from an embedded image run.
     *
     * @param run run containing the embedded drawing.
     *
     * @return a {@link String} object
     */
    public String getRunDrawingAltText(R run) {
        return getPic(run).getNvPicPr()
                          .getCNvPr()
                          .getDescr();
    }

    /**
     * Extract the width of the image from an embedded image run.
     *
     * @param run run containing the embedded drawing.
     *
     * @return a {@link Integer} object
     */
    public Integer getRunDrawingMaxWidth(R run) {
        return (int) getPic(run).getSpPr()
                                .getXfrm()
                                .getExt()
                                .getCx();
    }
}