| 1 | package pro.verron.officestamper.asciidoc; | |
| 2 | ||
| 3 | import org.asciidoctor.Asciidoctor; | |
| 4 | import org.asciidoctor.Options; | |
| 5 | import org.asciidoctor.ast.*; | |
| 6 | import org.asciidoctor.ast.Block; | |
| 7 | import org.asciidoctor.ast.Cell; | |
| 8 | import org.asciidoctor.ast.Row; | |
| 9 | import org.asciidoctor.ast.Table; | |
| 10 | ||
| 11 | import java.util.ArrayList; | |
| 12 | import java.util.Collection; | |
| 13 | import java.util.List; | |
| 14 | import java.util.stream.Stream; | |
| 15 | ||
| 16 | import static pro.verron.officestamper.asciidoc.AsciiDocModel.*; | |
| 17 | ||
| 18 | /// Parser based on AsciidoctorJ producing an [AsciiDocModel]. | |
| 19 | /// | |
| 20 | /// Supported subset mapped into our model: | |
| 21 | /// - Headings: document title (if present) and sections (levels 1..6) | |
| 22 | /// - Paragraphs: paragraph blocks | |
| 23 | /// - Inline emphasis: <code>*bold*</code> and <code>_italic_</code> via a lightweight inline parser | |
| 24 | public final class AsciiDocParser { | |
| 25 | ||
| 26 | private AsciiDocParser() { | |
| 27 | // utility | |
| 28 | } | |
| 29 | ||
| 30 | /// Parses the given AsciiDoc string into a model using AsciidoctorJ AST traversal. | |
| 31 | /// | |
| 32 | /// Notes: | |
| 33 | /// - If the document has a header/title (e.g. a leading "= Title"), it is emitted as a level-1 Heading. | |
| 34 | /// - Section levels are offset by +1 when a document title is present to preserve the perceived hierarchy of the | |
| 35 | /// previous homemade parser where "= Title" was treated as a heading, not a special header. | |
| 36 | /// | |
| 37 | /// @param asciidoc source text | |
| 38 | /// | |
| 39 | /// @return parsed model | |
| 40 | public static AsciiDocModel parse(String asciidoc) { | |
| 41 | var blocks = new ArrayList<AsciiDocModel.Block>(); | |
| 42 |
2
1. parse : negated conditional → KILLED 2. parse : negated conditional → KILLED |
if (asciidoc == null || asciidoc.isBlank()) { |
| 43 |
1
1. parse : replaced return value with null for pro/verron/officestamper/asciidoc/AsciiDocParser::parse → KILLED |
return AsciiDocModel.of(blocks); |
| 44 | } | |
| 45 | ||
| 46 | try (Asciidoctor engine = Asciidoctor.Factory.create()) { | |
| 47 | Options options = Options.builder() | |
| 48 | .sourcemap(true) | |
| 49 | .build(); | |
| 50 | Document doc = engine.load(asciidoc, options); | |
| 51 | ||
| 52 | for (StructuralNode child : doc.getBlocks()) { | |
| 53 |
1
1. parse : removed call to pro/verron/officestamper/asciidoc/AsciiDocParser::traverse → KILLED |
traverse(child, blocks); |
| 54 | } | |
| 55 | } | |
| 56 | ||
| 57 |
1
1. parse : replaced return value with null for pro/verron/officestamper/asciidoc/AsciiDocParser::parse → KILLED |
return AsciiDocModel.of(blocks); |
| 58 | } | |
| 59 | ||
| 60 | private static void traverse(StructuralNode node, List<AsciiDocModel.Block> out) { | |
| 61 | switch (node) { | |
| 62 | case Section section -> { | |
| 63 | int lvl = section.getLevel(); | |
| 64 |
4
1. traverse : changed conditional boundary → SURVIVED 2. traverse : negated conditional → KILLED 3. traverse : changed conditional boundary → KILLED 4. traverse : negated conditional → KILLED |
if (lvl >= 1 && lvl <= 6) { |
| 65 | out.add(new Heading(lvl, parseInlines(section.getTitle()))); | |
| 66 | } | |
| 67 | for (StructuralNode b : section.getBlocks()) { | |
| 68 |
1
1. traverse : removed call to pro/verron/officestamper/asciidoc/AsciiDocParser::traverse → KILLED |
traverse(b, out); |
| 69 | } | |
| 70 | } | |
| 71 | case Table table -> { | |
| 72 | List<AsciiDocModel.Row> rows = extractTableRowsViaReflection(table); | |
| 73 |
1
1. traverse : negated conditional → KILLED |
if (!rows.isEmpty()) { |
| 74 | out.add(new AsciiDocModel.Table(rows)); | |
| 75 | } | |
| 76 | // If extraction failed, continue traversal into children to salvage paragraphs | |
| 77 | } | |
| 78 | case PhraseNode phraseNode -> out.add(new Paragraph(parseInlines(phraseNode.getText()))); | |
| 79 |
1
1. traverse : negated conditional → KILLED |
case Block block when "simple".equals(block.getContentModel()) -> |
| 80 | out.add(new Paragraph(parseInlines(String.join("\n", block.getLines())))); | |
| 81 | default -> { | |
| 82 | // Recurse into other container nodes to keep paragraphs found within | |
| 83 | List<StructuralNode> children = node.getBlocks(); | |
| 84 |
1
1. traverse : negated conditional → NO_COVERAGE |
if (children != null) { |
| 85 |
1
1. traverse : removed call to pro/verron/officestamper/asciidoc/AsciiDocParser::traverse → NO_COVERAGE |
for (StructuralNode c : children) traverse(c, out); |
| 86 | } | |
| 87 | } | |
| 88 | } | |
| 89 | } | |
| 90 | ||
| 91 | private static List<Inline> parseInlines(String text) { | |
| 92 | // Stack-based inline parser with simple tokens for '*', '_', text, and escapes. | |
| 93 | // Non-overlapping nesting is allowed; crossing markers are treated as plain text. | |
| 94 | var root = new Frame(FrameType.ROOT); | |
| 95 | var stack = new ArrayList<Frame>(); | |
| 96 | stack.add(root); | |
| 97 | ||
| 98 |
2
1. parseInlines : negated conditional → KILLED 2. parseInlines : negated conditional → KILLED |
if (text == null || text.isEmpty()) { |
| 99 |
1
1. parseInlines : replaced return value with Collections.emptyList for pro/verron/officestamper/asciidoc/AsciiDocParser::parseInlines → NO_COVERAGE |
return root.children; |
| 100 | } | |
| 101 | ||
| 102 |
2
1. parseInlines : negated conditional → KILLED 2. parseInlines : changed conditional boundary → KILLED |
for (int i = 0; i < text.length(); i++) { |
| 103 | char c = text.charAt(i); | |
| 104 | ||
| 105 | // Escapes for '*', '_', and '\\' | |
| 106 |
1
1. parseInlines : negated conditional → SURVIVED |
if (c == '\\') { |
| 107 |
3
1. parseInlines : changed conditional boundary → NO_COVERAGE 2. parseInlines : negated conditional → NO_COVERAGE 3. parseInlines : Replaced integer addition with subtraction → NO_COVERAGE |
if (i + 1 < text.length()) { |
| 108 |
1
1. parseInlines : Replaced integer addition with subtraction → NO_COVERAGE |
char next = text.charAt(i + 1); |
| 109 |
3
1. parseInlines : negated conditional → NO_COVERAGE 2. parseInlines : negated conditional → NO_COVERAGE 3. parseInlines : negated conditional → NO_COVERAGE |
if (next == '*' || next == '_' || next == '\\') { |
| 110 | stack.getLast().text.append(next); | |
| 111 |
1
1. parseInlines : Changed increment from 1 to -1 → NO_COVERAGE |
i++; |
| 112 | continue; | |
| 113 | } | |
| 114 | } | |
| 115 | // Lone backslash | |
| 116 | stack.getLast().text.append(c); | |
| 117 | continue; | |
| 118 | } | |
| 119 | ||
| 120 |
2
1. parseInlines : negated conditional → KILLED 2. parseInlines : negated conditional → KILLED |
if (c == '*' || c == '_') { |
| 121 |
1
1. parseInlines : negated conditional → NO_COVERAGE |
FrameType type = (c == '*') ? FrameType.BOLD : FrameType.ITALIC; |
| 122 | Frame top = stack.getLast(); | |
| 123 |
1
1. parseInlines : negated conditional → NO_COVERAGE |
if (top.type == type) { |
| 124 | // Close current frame | |
| 125 |
1
1. parseInlines : removed call to pro/verron/officestamper/asciidoc/AsciiDocParser$Frame::flushTextToChildren → NO_COVERAGE |
top.flushTextToChildren(); |
| 126 |
1
1. parseInlines : negated conditional → NO_COVERAGE |
Inline node = (type == FrameType.BOLD) ? new Bold(top.children) : new Italic(top.children); |
| 127 | stack.removeLast(); | |
| 128 | Frame parent = stack.getLast(); | |
| 129 | parent.children.add(node); | |
| 130 | } | |
| 131 |
3
1. parseInlines : negated conditional → NO_COVERAGE 2. parseInlines : negated conditional → NO_COVERAGE 3. parseInlines : negated conditional → NO_COVERAGE |
else if (top.type == FrameType.BOLD || top.type == FrameType.ITALIC || top.type == FrameType.ROOT) { |
| 132 | // Open new frame | |
| 133 | Frame f = new Frame(type); | |
| 134 | stack.add(f); | |
| 135 | } | |
| 136 | else { | |
| 137 | // Should not happen | |
| 138 | stack.getLast().text.append(c); | |
| 139 | } | |
| 140 | continue; | |
| 141 | } | |
| 142 | ||
| 143 | // Detect literal |TAB| token -> emit a Tab inline | |
| 144 |
9
1. parseInlines : changed conditional boundary → NO_COVERAGE 2. parseInlines : negated conditional → NO_COVERAGE 3. parseInlines : Replaced integer addition with subtraction → NO_COVERAGE 4. parseInlines : negated conditional → NO_COVERAGE 5. parseInlines : Replaced integer addition with subtraction → NO_COVERAGE 6. parseInlines : negated conditional → NO_COVERAGE 7. parseInlines : Replaced integer addition with subtraction → NO_COVERAGE 8. parseInlines : negated conditional → SURVIVED 9. parseInlines : Replaced integer addition with subtraction → NO_COVERAGE |
if (c == '|' && i + 4 < text.length() && text.charAt(i + 1) == 'T' && text.charAt(i + 2) == 'A' |
| 145 |
3
1. parseInlines : negated conditional → NO_COVERAGE 2. parseInlines : Replaced integer addition with subtraction → NO_COVERAGE 3. parseInlines : negated conditional → NO_COVERAGE |
&& text.charAt(i + 3) == 'B' && text.charAt(i + 4) == '|') { |
| 146 | // Flush any pending text | |
| 147 | stack.getLast() | |
| 148 |
1
1. parseInlines : removed call to pro/verron/officestamper/asciidoc/AsciiDocParser$Frame::flushTextToChildren → NO_COVERAGE |
.flushTextToChildren(); |
| 149 | stack.getLast().children.add(new Tab()); | |
| 150 |
1
1. parseInlines : Changed increment from 4 to -4 → NO_COVERAGE |
i += 4; |
| 151 | continue; | |
| 152 | } | |
| 153 | ||
| 154 | // Regular char | |
| 155 | stack.getLast().text.append(c); | |
| 156 | } | |
| 157 | ||
| 158 | // Unwind: any unclosed frames become literal markers + content as plain text in parent | |
| 159 |
2
1. parseInlines : negated conditional → KILLED 2. parseInlines : changed conditional boundary → KILLED |
while (stack.size() > 1) { |
| 160 | Frame unfinished = stack.removeLast(); | |
| 161 |
1
1. parseInlines : negated conditional → NO_COVERAGE |
char marker = unfinished.type == FrameType.BOLD ? '*' : '_'; |
| 162 |
1
1. parseInlines : removed call to pro/verron/officestamper/asciidoc/AsciiDocParser$Frame::flushTextToChildren → NO_COVERAGE |
unfinished.flushTextToChildren(); |
| 163 | // Build literal: marker + children as text + (no closing marker since it is missing) | |
| 164 | StringBuilder literal = new StringBuilder(); | |
| 165 | literal.append(marker); | |
| 166 | for (Inline in : unfinished.children) { | |
| 167 | literal.append(in.text()); | |
| 168 | } | |
| 169 | stack.getLast().text.append(literal); | |
| 170 | } | |
| 171 | ||
| 172 | // Flush remainder text on root | |
| 173 |
1
1. parseInlines : removed call to pro/verron/officestamper/asciidoc/AsciiDocParser$Frame::flushTextToChildren → KILLED |
root.flushTextToChildren(); |
| 174 |
1
1. parseInlines : replaced return value with Collections.emptyList for pro/verron/officestamper/asciidoc/AsciiDocParser::parseInlines → KILLED |
return root.children; |
| 175 | } | |
| 176 | ||
| 177 | private static List<AsciiDocModel.Row> extractTableRowsViaReflection(Table table) { | |
| 178 | var header = table.getHeader() | |
| 179 | .stream() | |
| 180 | .map(AsciiDocParser::convertRowReflective) | |
| 181 | .toList(); | |
| 182 | var body = table.getBody() | |
| 183 | .stream() | |
| 184 | .map(AsciiDocParser::convertRowReflective) | |
| 185 | .toList(); | |
| 186 | var footer = table.getFooter() | |
| 187 | .stream() | |
| 188 | .map(AsciiDocParser::convertRowReflective) | |
| 189 | .toList(); | |
| 190 |
1
1. extractTableRowsViaReflection : replaced return value with Collections.emptyList for pro/verron/officestamper/asciidoc/AsciiDocParser::extractTableRowsViaReflection → KILLED |
return Stream.of(header, body, footer) |
| 191 | .flatMap(Collection::stream) | |
| 192 | .toList(); | |
| 193 | } | |
| 194 | ||
| 195 | private static AsciiDocModel.Row convertRowReflective(Row row) { | |
| 196 | ||
| 197 |
1
1. convertRowReflective : replaced return value with null for pro/verron/officestamper/asciidoc/AsciiDocParser::convertRowReflective → KILLED |
return new AsciiDocModel.Row(row.getCells() |
| 198 | .stream() | |
| 199 | .map(AsciiDocParser::convertCell) | |
| 200 | .toList()); | |
| 201 | ||
| 202 | } | |
| 203 | ||
| 204 | private static AsciiDocModel.Cell convertCell(Cell cell) { | |
| 205 |
1
1. convertCell : replaced return value with null for pro/verron/officestamper/asciidoc/AsciiDocParser::convertCell → KILLED |
return new AsciiDocModel.Cell(parseInlines(cell.getText())); |
| 206 | } | |
| 207 | ||
| 208 | private enum FrameType { | |
| 209 | ROOT, | |
| 210 | BOLD, | |
| 211 | ITALIC | |
| 212 | } | |
| 213 | ||
| 214 | private static final class Frame { | |
| 215 | final FrameType type; | |
| 216 | final List<Inline> children = new ArrayList<>(); | |
| 217 | final StringBuilder text = new StringBuilder(); | |
| 218 | ||
| 219 | Frame(FrameType type) {this.type = type;} | |
| 220 | ||
| 221 | void flushTextToChildren() { | |
| 222 |
1
1. flushTextToChildren : negated conditional → KILLED |
if (!text.isEmpty()) { |
| 223 | children.add(new Text(text.toString())); | |
| 224 |
1
1. flushTextToChildren : removed call to java/lang/StringBuilder::setLength → SURVIVED |
text.setLength(0); |
| 225 | } | |
| 226 | } | |
| 227 | } | |
| 228 | } | |
Mutations | ||
| 42 |
1.1 2.2 |
|
| 43 |
1.1 |
|
| 53 |
1.1 |
|
| 57 |
1.1 |
|
| 64 |
1.1 2.2 3.3 4.4 |
|
| 68 |
1.1 |
|
| 73 |
1.1 |
|
| 79 |
1.1 |
|
| 84 |
1.1 |
|
| 85 |
1.1 |
|
| 98 |
1.1 2.2 |
|
| 99 |
1.1 |
|
| 102 |
1.1 2.2 |
|
| 106 |
1.1 |
|
| 107 |
1.1 2.2 3.3 |
|
| 108 |
1.1 |
|
| 109 |
1.1 2.2 3.3 |
|
| 111 |
1.1 |
|
| 120 |
1.1 2.2 |
|
| 121 |
1.1 |
|
| 123 |
1.1 |
|
| 125 |
1.1 |
|
| 126 |
1.1 |
|
| 131 |
1.1 2.2 3.3 |
|
| 144 |
1.1 2.2 3.3 4.4 5.5 6.6 7.7 8.8 9.9 |
|
| 145 |
1.1 2.2 3.3 |
|
| 148 |
1.1 |
|
| 150 |
1.1 |
|
| 159 |
1.1 2.2 |
|
| 161 |
1.1 |
|
| 162 |
1.1 |
|
| 173 |
1.1 |
|
| 174 |
1.1 |
|
| 190 |
1.1 |
|
| 197 |
1.1 |
|
| 205 |
1.1 |
|
| 222 |
1.1 |
|
| 224 |
1.1 |