DocxToAsciiDoc.java

package pro.verron.officestamper.asciidoc;

import jakarta.xml.bind.JAXBElement;
import org.docx4j.TextUtils;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.WordprocessingML.StyleDefinitionsPart;
import org.docx4j.wml.*;

import java.io.StringWriter;

/// Minimal DOCX → AsciiDoc text extractor used by tests. This intentionally mirrors a subset of the legacy Stringifier
/// formatting for:
///  - Paragraphs
///  - Tables (|=== fences, each cell prefixed with '|')
///  - Basic inline text extraction More advanced features (headers/footers, breaks, styles) can be added incrementally
/// as needed by tests.
final class DocxToAsciiDoc {
    private DocxToAsciiDoc() {}

    static String compile(WordprocessingMLPackage pkg, AsciiDocDialect dialect) {
        var sb = new StringBuilder();
        var mdp = pkg.getMainDocumentPart();
        StyleDefinitionsPart styles = mdp.getStyleDefinitionsPart(false);
        for (Object o : mdp.getContent()) {
            Object val = unwrap(o);
            if (val instanceof P p) {
                sb.append(stringifyParagraph(p, styles, dialect))
                  .append("\n\n");
            }
            else if (val instanceof Tbl tbl) {
                sb.append(stringifyTable(tbl));
            }
        }
        return sb.toString();
    }

    private static Object unwrap(Object o) {
        return (o instanceof JAXBElement<?> j) ? j.getValue() : o;
    }

    private static String stringifyParagraph(P p, StyleDefinitionsPart styles, AsciiDocDialect dialect) {
        if (dialect == AsciiDocDialect.COMPAT) {
            String runs = stringifyRuns(p);
            return applyParagraphStyle(runs, p.getPPr(), styles);
        }
        // ADOC (initial simple): just raw text for now
        return extractText(p);
    }

    private static String stringifyTable(Tbl tbl) {
        var sb = new StringBuilder();
        sb.append("|===\n");
        for (Object trO : tbl.getContent()) {
            Object trV = unwrap(trO);
            if (!(trV instanceof Tr tr)) continue;
            for (Object tcO : tr.getContent()) {
                Object tcV = unwrap(tcO);
                if (!(tcV instanceof Tc tc)) continue;
                String cellText = extractText(tc).trim();
                sb.append("|")
                  .append(cellText)
                  .append("\n\n");
            }
        }
        sb.append("|===\n");
        return sb.toString();
    }

    private static String stringifyRuns(P p) {
        StringBuilder sb = new StringBuilder();
        for (Object o : p.getContent()) {
            Object v = unwrap(o);
            if (v instanceof R r) {
                String inner = stringifyRunContent(r);
                if (inner.isEmpty()) continue;
                String rpr = stringifyRPr(r.getRPr());
                if (rpr != null) {
                    sb.append("❬")
                      .append(inner)
                      .append("❘")
                      .append(rpr)
                      .append("❭");
                }
                else {
                    sb.append(inner);
                }
            }
            else if (v instanceof Br br) {
                STBrType type = br.getType();
                if (type == STBrType.PAGE) sb.append("\n[page-break]\n<<<\n");
                else if (type == STBrType.COLUMN) sb.append("\n[col-break]\n<<<\n");
                else sb.append("<br/>\n");
            }
            else if (v instanceof JAXBElement<?> j) {
                Object x = j.getValue();
                if (x instanceof R.Tab) {
                    sb.append("\t");
                }
            }
        }
        return sb.toString();
    }

    private static String applyParagraphStyle(String text, PPr ppr, StyleDefinitionsPart styles) {
        String result = text;
        if (ppr != null && ppr.getPStyle() != null && ppr.getPStyle()
                                                         .getVal() != null && styles != null) {
            String styleName = styles.getNameForStyleID(ppr.getPStyle()
                                                           .getVal());
            if (styleName != null) {
                String decorated = decorateWithStyle(styleName, text);
                if (decorated != null) result = decorated;
            }
        }
        // Section break marker after paragraph content
        if (ppr != null && ppr.getSectPr() != null) {
            String sect = stringifySectPr(ppr.getSectPr());
            if (!sect.isEmpty()) {
                result = result + "\n[section-break, " + sect + "]\n<<<";
            }
        }
        return result;
    }

    private static String extractText(P p) {
        try {
            var writer = new StringWriter();
            TextUtils.extractText(p, writer);
            return writer.toString();
        } catch (Docx4JException e) {
            throw new IllegalStateException("Failed to extract text from paragraph", e);
        } catch (Exception e) {
            throw new IllegalStateException("Failed to extract text from paragraph, before docx4j version 1.5.1", e);
        }
    }

    private static String extractText(Tc tc) {
        // Concatenate paragraphs text inside the cell
        var sb = new StringBuilder();
        for (Object o : tc.getContent()) {
            Object v = unwrap(o);
            if (v instanceof P p) {
                sb.append(extractText(p))
                  .append("\n\n");
            }
        }
        return sb.toString()
                 .trim();
    }

    private static String stringifyRunContent(R r) {
        StringBuilder sb = new StringBuilder();
        for (Object rc : r.getContent()) {
            Object rv = unwrap(rc);
            if (rv instanceof Text t) {
                sb.append(t.getValue());
            }
            else if (rv instanceof R.Tab) {
                sb.append("\t");
            }
            else if (rv instanceof Br br) {
                STBrType type = br.getType();
                switch (type) {
                    case STBrType.PAGE -> sb.append("\n[page-break]\n<<<\n");
                    case STBrType.COLUMN -> sb.append("\n[col-break]\n<<<\n");
                    default -> sb.append("<br/>\n");
                }
            }
        }
        return sb.toString();
    }

    private static String stringifyRPr(RPr rPr) {
        if (rPr == null) return null;
        java.util.TreeMap<String, String> map = new java.util.TreeMap<>();
        if (rPr.getB() != null && rPr.getB()
                                     .isVal()) {
            map.put("b", "true");
        }
        if (rPr.getI() != null && rPr.getI()
                                     .isVal()) {
            map.put("i", "true");
        }
        if (rPr.getVertAlign() != null && rPr.getVertAlign()
                                             .getVal() != null) {
            map.put("vertAlign",
                    rPr.getVertAlign()
                       .getVal()
                       .value());
        }
        if (map.isEmpty()) return null;
        return map.entrySet()
                  .stream()
                  .map(e -> e.getKey() + "=" + e.getValue())
                  .collect(java.util.stream.Collectors.joining(",", "{", "}"));
    }

    private static String decorateWithStyle(String styleName, String text) {
        String name = styleName == null ? "" : styleName.trim();
        if (name.equalsIgnoreCase("Title")) return "= " + text + "\n";
        java.util.regex.Matcher m = java.util.regex.Pattern.compile("(?i)heading\\s*([1-6])")
                                                           .matcher(name);
        if (m.find()) {
            int lvl = Integer.parseInt(m.group(1));
            String prefix; // Stringifier maps heading 1 -> "== "
            // In Stringifier, "heading 1" => "== ", i.e., level + 1
            prefix = "=".repeat(Math.clamp(lvl, 1, 6));
            return prefix + " " + text + "\n";
        }
        return null;
    }

    private static String stringifySectPr(SectPr sectPr) {
        java.util.TreeMap<String, String> map = new java.util.TreeMap<>();
        // docGrid
        if (sectPr.getDocGrid() != null && sectPr.getDocGrid()
                                                 .getLinePitch() != null) {
            map.put("docGrid",
                    "{linePitch=" + sectPr.getDocGrid()
                                          .getLinePitch() + "}");
        }
        // pgMar
        SectPr.PgMar m = sectPr.getPgMar();
        if (m != null) {
            java.util.TreeMap<String, String> mm = new java.util.TreeMap<>();
            if (m.getBottom() != null) mm.put("bottom", String.valueOf(m.getBottom()));
            if (m.getFooter() != null) mm.put("footer", String.valueOf(m.getFooter()));
            if (m.getGutter() != null) mm.put("gutter", String.valueOf(m.getGutter()));
            if (m.getHeader() != null) mm.put("header", String.valueOf(m.getHeader()));
            if (m.getLeft() != null) mm.put("left", String.valueOf(m.getLeft()));
            if (m.getRight() != null) mm.put("right", String.valueOf(m.getRight()));
            if (m.getTop() != null) mm.put("top", String.valueOf(m.getTop()));
            if (!mm.isEmpty()) {
                String v = mm.entrySet()
                             .stream()
                             .map(e -> e.getKey() + "=" + e.getValue())
                             .collect(java.util.stream.Collectors.joining(",", "{", "}"));
                map.put("pgMar", v);
            }
        }
        // pgSz
        SectPr.PgSz s = sectPr.getPgSz();
        if (s != null) {
            java.util.TreeMap<String, String> sm = new java.util.TreeMap<>();
            if (s.getH() != null) sm.put("h", String.valueOf(s.getH()));
            if (s.getOrient() != null) sm.put("orient", String.valueOf(s.getOrient()));
            if (s.getW() != null) sm.put("w", String.valueOf(s.getW()));
            if (!sm.isEmpty()) {
                String v = sm.entrySet()
                             .stream()
                             .map(e -> e.getKey() + "=" + e.getValue())
                             .collect(java.util.stream.Collectors.joining(",", "{", "}"));
                map.put("pgSz", v);
            }
        }
        if (map.isEmpty()) return "";
        return map.entrySet()
                  .stream()
                  .map(e -> e.getKey() + "=" + e.getValue())
                  .collect(java.util.stream.Collectors.joining(",", "{", "}"));
    }
}