XlsxIterator.java

package pro.verron.officestamper.utils.sml;

import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.packages.SpreadsheetMLPackage;
import org.docx4j.openpackaging.parts.SpreadsheetML.WorksheetPart;
import org.docx4j.wml.ContentAccessor;
import org.docx4j.wml.SdtBlock;
import org.docx4j.wml.SdtRun;
import org.jspecify.annotations.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xlsx4j.sml.Row;
import org.xlsx4j.sml.Sheet;
import pro.verron.officestamper.utils.UtilsException;
import pro.verron.officestamper.utils.iterator.ResetableIterator;

import java.util.*;
import java.util.function.Supplier;

import static org.docx4j.XmlUtils.unwrap;

/// XlsxIterator is an [ResetableIterator] implementation that iterates through the elements of an Excel spreadsheet
/// document.
///
/// This iterator handles various XML elements found in an Excel file, including:
///
///     - [Sheet] - worksheet elements that contain rows of data
///     - [Row] - individual rows containing cells
///     - [ContentAccessor] - elements that can contain other content
///     - [SdtRun] and [SdtBlock] - structured document tags
///
/// The iterator automatically traverses the hierarchical structure of the spreadsheet, unwrapping nested elements and
/// providing a flat iteration interface over all contained objects.
///
/// @author Joseph Verron
/// @version ${version}
/// @since 3.0
public class XlsxIterator
        implements ResetableIterator<Object> {

    private static final Logger log = LoggerFactory.getLogger(XlsxIterator.class);
    private final Supplier<Iterator<?>> supplier;
    private final SpreadsheetMLPackage spreadsheet;
    private Queue<Iterator<?>> iteratorQueue;
    private @Nullable Object next;


    /// Constructs a new XlsxIterator for iterating through the elements of a SpreadsheetML package.
    ///
    /// @param spreadsheet the SpreadsheetMLPackage to iterate through
    ///
    /// @throws UtilsException if there's an error accessing the workbook contents
    public XlsxIterator(SpreadsheetMLPackage spreadsheet) {
        this.spreadsheet = spreadsheet;
        try {
            supplier = spreadsheet.getWorkbookPart()
                                  .getContents()
                                  .getSheets()
                                  .getSheet()::iterator;
        } catch (Docx4JException e) {
            throw new UtilsException(e);
        }
        var startingIterator = supplier.get();
        this.iteratorQueue = Collections.asLifoQueue(new ArrayDeque<>());
        this.iteratorQueue.add(startingIterator);
        this.next = startingIterator.hasNext() ? unwrap(startingIterator.next()) : null;
    }

    @Override
    public void reset() {
        var startingIterator = supplier.get();
        this.iteratorQueue = Collections.asLifoQueue(new ArrayDeque<>());
        this.iteratorQueue.add(startingIterator);
        this.next = startingIterator.hasNext() ? unwrap(startingIterator.next()) : null;
    }

    @Override
    public boolean hasNext() {
        return next != null;
    }

    @Override
    public Object next() {
        if (next == null) throw new NoSuchElementException("No more elements to iterate");

        var result = next;
        next = null;
        switch (result) {
            case ContentAccessor contentAccessor -> {
                var content = contentAccessor.getContent();
                iteratorQueue.add(content.iterator());
            }
            case SdtRun sdtRun -> {
                var sdtContent = sdtRun.getSdtContent();
                var content = sdtContent.getContent();
                iteratorQueue.add(content.iterator());
            }
            case SdtBlock sdtBlock -> {
                var sdtContent = sdtBlock.getSdtContent();
                var content = sdtContent.getContent();
                iteratorQueue.add(content.iterator());
            }
            case Sheet sheet -> {
                List<Row> content;
                try {
                    var sheetId = sheet.getId();
                    var relationshipsPart = spreadsheet.getWorkbookPart()
                                                       .getRelationshipsPart();
                    var part = relationshipsPart.getPart(sheetId);
                    content = ((WorksheetPart) part).getContents()
                                                    .getSheetData()
                                                    .getRow();
                } catch (Docx4JException e) {
                    throw new UtilsException(e);
                }
                iteratorQueue.add(content.iterator());
            }
            case Row row -> {
                var content = row.getC();
                iteratorQueue.add(content.iterator());
            }
            default -> log.debug("Unknown type: {}", result.getClass());
        }
        while (!iteratorQueue.isEmpty() && next == null) {
            var nextIterator = iteratorQueue.poll();
            if (nextIterator.hasNext()) {
                next = unwrap(nextIterator.next());
                iteratorQueue.add(nextIterator);
            }
        }
        return result;
    }
}