Skip to content

Commit a4c5b69

Browse files
committed
Do not repeatedly concatenate lines when parsing diffs
Because string concatenation has to allocate a new string for each allocation the time required for concatenating `n` lines is `O(n^2)` (with a constant depending on the line length). This can be optimised to `O(n)` (this is the purpose of `StringBuilder` after all). In this case a list of lines is used because this avoids problems with line ending encoding (which is currently handled by always using "\r\n" internally). This also makes it easier to insert symbols at the start of the line (for example plus or minus for diff types). The use sites of the `DiffNode` label are refactored to prevent duplicate work (for example joining lines and immediately splitting them again).
1 parent 32c9dea commit a4c5b69

6 files changed

Lines changed: 51 additions & 25 deletions

File tree

src/main/java/org/variantsync/diffdetective/diff/difftree/DiffNode.java

Lines changed: 36 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import org.variantsync.diffdetective.util.fide.FixTrueFalse;
1010

1111
import java.util.*;
12+
import java.util.stream.Collectors;
1213
import java.util.function.Function;
1314

1415
import static org.variantsync.diffdetective.util.fide.FormulaUtils.negate;
@@ -29,7 +30,7 @@ public class DiffNode {
2930
private final DiffLineNumber to = DiffLineNumber.Invalid();
3031

3132
private Node featureMapping;
32-
private String label;
33+
private List<String> lines;
3334

3435
/**
3536
* The parent {@link DiffNode} before the edit.
@@ -59,13 +60,20 @@ private DiffNode() {
5960
public DiffNode(DiffType diffType, CodeType codeType,
6061
DiffLineNumber fromLines, DiffLineNumber toLines,
6162
Node featureMapping, String label) {
63+
this(diffType, codeType, fromLines, toLines, featureMapping,
64+
new ArrayList<String>(Arrays.asList(StringUtils.LINEBREAK_REGEX.split(label, -1))));
65+
}
66+
67+
public DiffNode(DiffType diffType, CodeType codeType,
68+
DiffLineNumber fromLines, DiffLineNumber toLines,
69+
Node featureMapping, List<String> lines) {
6270
this();
6371
this.diffType = diffType;
6472
this.codeType = codeType;
6573
this.from.set(fromLines);
6674
this.to.set(toLines);
6775
this.featureMapping = featureMapping;
68-
this.label = label;
76+
this.lines = lines;
6977
}
7078

7179
/**
@@ -79,20 +87,33 @@ public static DiffNode createRoot() {
7987
new DiffLineNumber(1, 1, 1),
8088
DiffLineNumber.Invalid(),
8189
FixTrueFalse.True,
82-
""
90+
new ArrayList()
8391
);
8492
}
8593

8694
public static DiffNode createCode(DiffType diffType, DiffLineNumber fromLines, DiffLineNumber toLines, String code) {
8795
return new DiffNode(diffType, CodeType.CODE, fromLines, toLines, null, code);
8896
}
8997

90-
public void setLabel(final String label) {
91-
this.label = label;
98+
public static DiffNode createCode(DiffType diffType, DiffLineNumber fromLines, DiffLineNumber toLines, List<String> lines) {
99+
return new DiffNode(diffType, CodeType.CODE, fromLines, toLines, null, lines);
100+
}
101+
102+
public void addLines(final List<String> lines) {
103+
this.lines.addAll(lines);
104+
}
105+
106+
public List<String> getLines() {
107+
return lines;
92108
}
93109

94110
public String getLabel() {
95-
return label;
111+
return lines.stream().collect(Collectors.joining(StringUtils.LINEBREAK));
112+
}
113+
114+
public void setLabel(String label) {
115+
lines.clear();
116+
Collections.addAll(lines, StringUtils.LINEBREAK_REGEX.split(label, -1));
96117
}
97118

98119
/**
@@ -680,7 +701,7 @@ public int getID() {
680701
return id;
681702
}
682703

683-
public static DiffNode fromID(final int id) {
704+
public static DiffNode fromID(final int id, String label) {
684705
// lowest 8 bits
685706
final int lowestBitsMask = (1 << ID_OFFSET) - 1;
686707

@@ -694,7 +715,7 @@ public static DiffNode fromID(final int id) {
694715
new DiffLineNumber(fromInDiff, DiffLineNumber.InvalidLineNumber, DiffLineNumber.InvalidLineNumber),
695716
DiffLineNumber.Invalid(),
696717
null,
697-
""
718+
label
698719
);
699720
}
700721

@@ -737,12 +758,12 @@ public void assertSemanticConsistency() {
737758
}
738759
}
739760

740-
public static String toTextDiffLine(final DiffType diffType, final String text) {
741-
return diffType.symbol + StringUtils.LINEBREAK_REGEX.matcher(text).replaceAll(StringUtils.LINEBREAK + diffType.symbol);
761+
public static String toTextDiffLine(final DiffType diffType, final List<String> lines) {
762+
return lines.stream().collect(Collectors.joining(StringUtils.LINEBREAK + diffType.symbol, diffType.symbol, ""));
742763
}
743764

744765
public String toTextDiffLine() {
745-
return toTextDiffLine(diffType, this.getLabel());
766+
return toTextDiffLine(diffType, lines);
746767
}
747768

748769
public String toTextDiff() {
@@ -758,9 +779,10 @@ public String toTextDiff() {
758779
diff.append(child.toTextDiff());
759780
}
760781

782+
// Add endif after macro
761783
if (isMacro()) {
762784
diff
763-
.append(toTextDiffLine(this.diffType, CodeType.ENDIF.asMacroText()))
785+
.append(toTextDiffLine(this.diffType, List.of(CodeType.ENDIF.asMacroText())))
764786
.append(StringUtils.LINEBREAK);
765787
}
766788

@@ -786,11 +808,11 @@ public boolean equals(Object o) {
786808
if (this == o) return true;
787809
if (o == null || getClass() != o.getClass()) return false;
788810
DiffNode diffNode = (DiffNode) o;
789-
return isMultilineMacro == diffNode.isMultilineMacro && diffType == diffNode.diffType && codeType == diffNode.codeType && from.equals(diffNode.from) && to.equals(diffNode.to) && Objects.equals(featureMapping, diffNode.featureMapping) && label.equals(diffNode.label);
811+
return isMultilineMacro == diffNode.isMultilineMacro && diffType == diffNode.diffType && codeType == diffNode.codeType && from.equals(diffNode.from) && to.equals(diffNode.to) && Objects.equals(featureMapping, diffNode.featureMapping) && lines.equals(diffNode.lines);
790812
}
791813

792814
@Override
793815
public int hashCode() {
794-
return Objects.hash(diffType, codeType, isMultilineMacro, from, to, featureMapping, label);
816+
return Objects.hash(diffType, codeType, isMultilineMacro, from, to, featureMapping, lines);
795817
}
796818
}

src/main/java/org/variantsync/diffdetective/diff/difftree/parse/DiffNodeParser.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
import org.variantsync.diffdetective.diff.difftree.DiffType;
88
import org.variantsync.diffdetective.feature.CPPAnnotationParser;
99

10+
import java.util.ArrayList;
11+
1012
public record DiffNodeParser(CPPAnnotationParser annotationParser) {
1113
public static final DiffNodeParser Default = new DiffNodeParser(CPPAnnotationParser.Default);
1214

@@ -28,10 +30,12 @@ public DiffNode fromDiffLine(String diffLine) throws IllFormedAnnotationExceptio
2830
featureMapping = annotationParser.parseDiffLine(diffLine);
2931
}
3032

33+
ArrayList lines = new ArrayList();
34+
lines.add(label);
3135
return new DiffNode(
3236
diffType, codeType,
3337
DiffLineNumber.Invalid(), DiffLineNumber.Invalid(),
3438
featureMapping,
35-
label);
39+
lines);
3640
}
3741
}

src/main/java/org/variantsync/diffdetective/diff/difftree/parse/DiffTreeParser.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,8 @@ public static DiffResult<DiffTree> createDiffTree(
7979
beforeStack.push(root);
8080
afterStack.push(root);
8181

82-
String line;
83-
for (int i = 0; (line = fullDiff.readLine()) != null; i++) {
84-
final String currentLine = line; // Has to be final, because it's used in a lambda.
82+
String currentLine;
83+
for (int i = 0; (currentLine = fullDiff.readLine()) != null; i++) {
8584
final DiffType diffType = DiffType.ofDiffLine(currentLine);
8685

8786
// count line numbers
@@ -133,7 +132,7 @@ public static DiffResult<DiffTree> createDiffTree(
133132
// collapse multiple code lines
134133
if (lastCode != null) {
135134
if (collapseMultipleCodeLines && newNode.isCode() && lastCode.diffType.equals(newNode.diffType)) {
136-
lastCode.setLabel(lastCode.getLabel() + StringUtils.LINEBREAK + newNode.getLabel());
135+
lastCode.addLines(newNode.getLines());
137136
continue;
138137
} else {
139138
lastCode = endCodeBlock(lastCode, lastLineNo);
@@ -149,6 +148,7 @@ public static DiffResult<DiffTree> createDiffTree(
149148
if (newNode.isCode()) {
150149
lastCode = newNode;
151150
} else if (newNode.isEndif()) {
151+
final String currentLineFinal = currentLine;
152152
diffType.matchBeforeAfter(beforeStack, afterStack,
153153
stack -> {
154154
// Set corresponding line of now closed annotation.
@@ -160,7 +160,7 @@ public static DiffResult<DiffTree> createDiffTree(
160160
popIf(stack);
161161

162162
if (stack.isEmpty()) {
163-
errorPropagation.accept(DiffError.ENDIF_WITHOUT_IF, "ENDIF without IF at line \"" + currentLine + "\"!");
163+
errorPropagation.accept(DiffError.ENDIF_WITHOUT_IF, "ENDIF without IF at line \"" + currentLineFinal + "\"!");
164164
}
165165
});
166166
if (error.get() != null) { return error.get(); }

src/main/java/org/variantsync/diffdetective/diff/difftree/serialize/nodeformat/DiffNodeLabelFormat.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,7 @@ public interface DiffNodeLabelFormat extends LinegraphFormat {
1919
* @return The corresponding {@link DiffNode}
2020
*/
2121
default DiffNode fromLabelAndId(final String lineGraphNodeLabel, final int nodeId) {
22-
final DiffNode diffNode = DiffNode.fromID(nodeId);
23-
diffNode.setLabel(lineGraphNodeLabel);
24-
return diffNode;
22+
return DiffNode.fromID(nodeId, lineGraphNodeLabel);
2523
}
2624

2725
/**

src/main/java/org/variantsync/diffdetective/diff/difftree/transform/CollapseNestedNonEditedMacros.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,11 +113,13 @@ private static void collapseChain(Stack<DiffNode> chain) {
113113
final DiffNode beforeParent = head.getBeforeParent();
114114
final DiffNode afterParent = head.getAfterParent();
115115

116+
ArrayList lines = new ArrayList();
117+
lines.add("$Collapsed Nested Annotations$");
116118
final DiffNode merged = new DiffNode(
117119
DiffType.NON, CodeType.IF,
118120
head.getFromLine(), head.getToLine(),
119121
new And(featureMappings.toArray()),
120-
"$Collapsed Nested Annotations$");
122+
lines);
121123

122124
head.drop();
123125
merged.stealChildrenOf(end);

src/main/java/org/variantsync/diffdetective/diff/difftree/transform/Starfold.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ public void mergeArms(final DiffNode starRoot, Time time, final DiffType targetD
7777
targetDiffType,
7878
DiffLineNumber.Invalid(),
7979
DiffLineNumber.Invalid(),
80-
starArms.stream().map(DiffNode::getLabel).collect(Collectors.joining(StringUtils.LINEBREAK))
80+
starArms.stream().flatMap(node -> node.getLines().stream()).toList()
8181
),
8282
targetIndex,
8383
time

0 commit comments

Comments
 (0)