Skip to content

Commit 952464f

Browse files
ibbempmbittner
authored andcommitted
feat: improve the source tracking to reduce Source.Unknown
1 parent 23698b8 commit 952464f

14 files changed

Lines changed: 86 additions & 72 deletions

File tree

src/main/java/org/variantsync/diffdetective/diff/git/GitDiffer.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,11 @@
2121
import org.variantsync.diffdetective.variation.DiffLinesLabel;
2222
import org.variantsync.diffdetective.variation.diff.VariationDiff;
2323
import org.variantsync.diffdetective.variation.diff.parse.VariationDiffParser;
24+
import org.variantsync.diffdetective.variation.tree.source.GitSource;
2425

2526
import java.io.*;
2627
import java.nio.charset.StandardCharsets;
28+
import java.nio.file.Path;
2729
import java.util.ArrayList;
2830
import java.util.List;
2931
import java.util.Optional;
@@ -252,6 +254,7 @@ private static CommitDiffResult getPatchDiffs(
252254

253255
final VariationDiff<DiffLinesLabel> variationDiff = VariationDiffParser.createVariationDiff(
254256
fullDiff,
257+
new GitSource(repository, childCommit.getId().name(), Path.of(filename)),
255258
repository.getParseOptions().variationDiffParseOptions()
256259
);
257260

src/main/java/org/variantsync/diffdetective/examplesearch/ExampleFinder.java

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -106,14 +106,10 @@ private boolean checkIfExample(Analysis analysis, String localDiff) {
106106
// We do not want a variationDiff for the entire file but only for the local change to have a small example.
107107
final VariationDiff<DiffLinesLabel> localTree;
108108
try {
109-
localTree = VariationDiff.fromDiff(localDiff, new VariationDiffParseOptions(annotationParser, true, true));
109+
localTree = VariationDiff.fromDiff(localDiff, Source.findFirst(variationDiff, GitPatch.class), new VariationDiffParseOptions(annotationParser, true, true));
110110
// Not every local diff can be parsed to a VariationDiff because diffs are unaware of the underlying language (i.e., CPP).
111111
// We want only running examples whose diffs describe entire diff trees for easier understanding.
112-
if (isGoodExample.test(localTree)) {
113-
GitPatch variationDiffSource = Source.findFirst(variationDiff, GitPatch.class);
114-
Assert.assertNotNull(variationDiffSource);
115-
localTree.setSource(variationDiffSource.shallowClone());
116-
} else {
112+
if (!isGoodExample.test(localTree)) {
117113
return false;
118114
}
119115
} catch (DiffParseException e) {

src/main/java/org/variantsync/diffdetective/experiments/thesis_bm/ConstructionValidation.java

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import java.io.IOException;
66
import java.io.OutputStreamWriter;
77
import java.io.Writer;
8+
import java.nio.file.Path;
89
import java.util.HashMap;
910
import java.util.HashSet;
1011
import java.util.LinkedHashMap;
@@ -42,6 +43,7 @@
4243
import org.variantsync.diffdetective.variation.diff.Time;
4344
import org.variantsync.diffdetective.variation.diff.filter.VariationDiffFilter;
4445
import org.variantsync.diffdetective.variation.diff.parse.VariationDiffParser;
46+
import org.variantsync.diffdetective.variation.tree.source.GitSource;
4547
import org.variantsync.functjonal.category.InplaceSemigroup;
4648
import org.variantsync.functjonal.map.MergeMap;
4749

@@ -375,6 +377,7 @@ private void counts(VariationDiff<DiffLinesLabel> tree, VariationDiffStatistics
375377
}
376378

377379
private VariationDiff<DiffLinesLabel> parseVariationTree(Analysis analysis, RevCommit commit) throws IOException, DiffParseException {
380+
String fileName = analysis.getCurrentPatch().getFileName(AFTER);
378381
try (BufferedReader afterFile =
379382
new BufferedReader(
380383
/*
@@ -386,10 +389,14 @@ private VariationDiff<DiffLinesLabel> parseVariationTree(Analysis analysis, RevC
386389
GitDiffer.getBeforeFullFile(
387390
analysis.getRepository(),
388391
commit,
389-
analysis.getCurrentPatch().getFileName(AFTER)),
392+
fileName),
390393
0xfeff)) // BOM, same as GitDiffer.BOM_PATTERN
391394
) {
392-
return VariationDiffParser.createVariationTree(afterFile, analysis.getRepository().getParseOptions().variationDiffParseOptions());
395+
return VariationDiffParser.createVariationTree(
396+
afterFile,
397+
new GitSource(analysis.getRepository(), commit.getId().name(), Path.of(fileName)),
398+
analysis.getRepository().getParseOptions().variationDiffParseOptions()
399+
);
393400
}
394401
}
395402

src/main/java/org/variantsync/diffdetective/experiments/thesis_es/UnparseAnalysis.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ public static String parseUnparseTree(String text, VariationDiffParseOptions opt
226226
public static String parseUnparseDiff(String textDiff, VariationDiffParseOptions option) {
227227
String temp = "b";
228228
try {
229-
VariationDiff<DiffLinesLabel> diff = VariationDiff.fromDiff(textDiff, option);
229+
VariationDiff<DiffLinesLabel> diff = VariationDiff.fromDiff(textDiff, Source.Unknown, option);
230230
temp = VariationUnparser.unparseDiff(diff);
231231
} catch (Exception e) {
232232
e.printStackTrace();

src/main/java/org/variantsync/diffdetective/variation/diff/VariationDiff.java

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import org.variantsync.diffdetective.variation.diff.construction.JGitDiff;
2121
import org.variantsync.diffdetective.variation.diff.parse.VariationDiffParseOptions;
2222
import org.variantsync.diffdetective.variation.diff.parse.VariationDiffParser;
23-
import org.variantsync.diffdetective.variation.diff.source.PatchString;
2423
import org.variantsync.diffdetective.variation.diff.traverse.VariationDiffTraversal;
2524
import org.variantsync.diffdetective.variation.diff.traverse.VariationDiffVisitor;
2625
import org.variantsync.diffdetective.variation.tree.VariationTree;
@@ -91,9 +90,7 @@ public VariationDiff(DiffNode<L> root, Source source) {
9190
*/
9291
public static VariationDiff<DiffLinesLabel> fromFile(final Path p, VariationDiffParseOptions parseOptions) throws IOException, DiffParseException {
9392
try (BufferedReader file = Files.newBufferedReader(p)) {
94-
final VariationDiff<DiffLinesLabel> tree = VariationDiffParser.createVariationDiff(file, parseOptions);
95-
tree.setSource(new FileSource(p));
96-
return tree;
93+
return VariationDiffParser.createVariationDiff(file, new FileSource(p), parseOptions);
9794
}
9895
}
9996

@@ -103,13 +100,13 @@ public static VariationDiff<DiffLinesLabel> fromFile(final Path p, VariationDiff
103100
* So just lines preceded by "+", "-", or " " are expected.
104101
* @param diff The diff as text. Lines should be separated by a newline character. Each line should be preceded by either "+", "-", or " ".
105102
* @param parseOptions {@link VariationDiffParseOptions} for the parsing process.
103+
* @param source the {@link Source} of {@code diff}
106104
* @return A result either containing the parsed VariationDiff or an error message in case of failure.
107105
* @throws DiffParseException if {@code diff} couldn't be parsed
108106
*/
109-
public static VariationDiff<DiffLinesLabel> fromDiff(final String diff, final VariationDiffParseOptions parseOptions) throws DiffParseException {
110-
final VariationDiff<DiffLinesLabel> d;
107+
public static VariationDiff<DiffLinesLabel> fromDiff(final String diff, final Source source, final VariationDiffParseOptions parseOptions) throws DiffParseException {
111108
try {
112-
d = VariationDiffParser.createVariationDiff(diff, parseOptions);
109+
return VariationDiffParser.createVariationDiff(diff, source, parseOptions);
113110
} catch (DiffParseException e) {
114111
Logger.error("""
115112
Could not parse diff:
@@ -119,8 +116,6 @@ public static VariationDiff<DiffLinesLabel> fromDiff(final String diff, final Va
119116
diff);
120117
throw e;
121118
}
122-
d.setSource(new PatchString(diff));
123-
return d;
124119
}
125120

126121
/**
@@ -164,7 +159,7 @@ public static Result<VariationDiff<DiffLinesLabel>, List<DiffError>> fromPatch(f
164159

165160
/**
166161
* Create a VariationDiff from two given text files.
167-
* @see #fromLines(String, String, DiffAlgorithm.SupportedAlgorithm, VariationDiffParseOptions)
162+
* @see #fromLines(String, String, Source, Source, DiffAlgorithm.SupportedAlgorithm, VariationDiffParseOptions)
168163
*/
169164
public static VariationDiff<DiffLinesLabel> fromFiles(
170165
final Path beforeFile,
@@ -176,22 +171,24 @@ public static VariationDiff<DiffLinesLabel> fromFiles(
176171
try (BufferedReader b = Files.newBufferedReader(beforeFile);
177172
BufferedReader a = Files.newBufferedReader(afterFile)
178173
) {
179-
return fromLines(IOUtils.toString(b), IOUtils.toString(a), algorithm, options);
174+
return fromLines(IOUtils.toString(b), IOUtils.toString(a), new FileSource(beforeFile), new FileSource(afterFile), algorithm, options);
180175
}
181176
}
182177

183178
/**
184179
* Creates a variation diff from to line-based text inputs.
185180
* This method just forwards to:
186-
* @see JGitDiff#diff(String, String, DiffAlgorithm.SupportedAlgorithm, VariationDiffParseOptions)
181+
* @see JGitDiff#diff(String, String, Source, Source, DiffAlgorithm.SupportedAlgorithm, VariationDiffParseOptions)
187182
*/
188183
public static VariationDiff<DiffLinesLabel> fromLines(
189184
String before,
190185
String after,
186+
Source beforeSource,
187+
Source afterSource,
191188
DiffAlgorithm.SupportedAlgorithm algorithm,
192189
VariationDiffParseOptions options) throws IOException, DiffParseException
193190
{
194-
return JGitDiff.diff(before, after, algorithm, options);
191+
return JGitDiff.diff(before, after, beforeSource, afterSource, algorithm, options);
195192
}
196193

197194
/**

src/main/java/org/variantsync/diffdetective/variation/diff/construction/JGitDiff.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import org.eclipse.jgit.diff.*;
55
import org.variantsync.diffdetective.diff.git.GitDiffer;
66
import org.variantsync.diffdetective.diff.result.DiffParseException;
7+
import org.variantsync.diffdetective.util.CompositeSource;
8+
import org.variantsync.diffdetective.util.Source;
79
import org.variantsync.diffdetective.variation.DiffLinesLabel;
810
import org.variantsync.diffdetective.variation.diff.Time;
911
import org.variantsync.diffdetective.variation.diff.VariationDiff;
@@ -122,9 +124,11 @@ Using our own formatter without diff headers (paired with a maximum context (?))
122124
* Uses JGit to diff the two files using the specified {@code options}, and afterwards, creates the variation diff.
123125
* Creates a variation diff from to line-based text inputs.
124126
* First creates a line-based diff with {@link #textDiff(String, String, DiffAlgorithm.SupportedAlgorithm)}
125-
* and then parses that diff with {@link VariationDiff#fromDiff(String, VariationDiffParseOptions)}.
127+
* and then parses that diff with {@link VariationDiff#fromDiff(String, Source, VariationDiffParseOptions)}.
126128
* @param linesBefore State of annotated lines before the change.
127129
* @param linesAfter State of annotated lines after the change.
130+
* @param sourceBefore the {@link Source} of {@code linesBefore}
131+
* @param sourceAfter the {@link Source} of {@code linesAfter}
128132
* @param algorithm Specification of which algorithm to use for diffing with JGit.
129133
* @param options various options for parsing
130134
* @return A variation diff comprising the changes.
@@ -134,9 +138,15 @@ Using our own formatter without diff headers (paired with a maximum context (?))
134138
public static VariationDiff<DiffLinesLabel> diff(
135139
String linesBefore,
136140
String linesAfter,
141+
Source sourceBefore,
142+
Source sourceAfter,
137143
DiffAlgorithm.SupportedAlgorithm algorithm,
138144
VariationDiffParseOptions options
139145
) throws IOException, DiffParseException {
140-
return VariationDiff.fromDiff(textDiff(linesBefore, linesAfter, algorithm), options);
146+
return VariationDiff.fromDiff(
147+
textDiff(linesBefore, linesAfter, algorithm),
148+
new CompositeSource("JGitDiff.textDiff", sourceBefore, sourceAfter),
149+
options
150+
);
141151
}
142152
}

src/main/java/org/variantsync/diffdetective/variation/diff/parse/VariationDiffParser.java

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import org.variantsync.diffdetective.feature.Annotation;
1818
import org.variantsync.diffdetective.feature.AnnotationType;
1919
import org.variantsync.diffdetective.util.Assert;
20+
import org.variantsync.diffdetective.util.Source;
2021
import org.variantsync.diffdetective.variation.DiffLinesLabel;
2122
import org.variantsync.diffdetective.variation.NodeType;
2223
import org.variantsync.diffdetective.variation.diff.DiffNode;
@@ -117,17 +118,18 @@ public record DiffLine(DiffType diffType, String content) {
117118

118119

119120
/**
120-
* The same as {@link VariationDiffParser#createVariationDiff(BufferedReader, VariationDiffParseOptions)}
121+
* The same as {@link VariationDiffParser#createVariationDiff(BufferedReader, Source, VariationDiffParseOptions)}
121122
* but with the diff given as a single string with line breaks instead of a {@link BufferedReader}.
122123
*
123124
* @throws DiffParseException if {@code fullDiff} couldn't be parsed
124125
*/
125126
public static VariationDiff<DiffLinesLabel> createVariationDiff(
126127
final String fullDiff,
128+
final Source source,
127129
final VariationDiffParseOptions parseOptions
128130
) throws DiffParseException {
129131
try {
130-
return createVariationDiff(new BufferedReader(new StringReader(fullDiff)), parseOptions);
132+
return createVariationDiff(new BufferedReader(new StringReader(fullDiff)), source, parseOptions);
131133
} catch (IOException e) {
132134
throw new AssertionError("No actual IO should be performed because only a StringReader is used");
133135
}
@@ -140,18 +142,20 @@ public static VariationDiff<DiffLinesLabel> createVariationDiff(
140142
* This parsing algorithm is described in detail in Sören Viegener's bachelor's thesis.
141143
*
142144
* @param fullDiff The full diff of a patch obtained from a buffered reader.
145+
* @param source the {@link Source} of {@code fullDiff}
143146
* @param options {@link VariationDiffParseOptions} for the parsing process.
144147
* @return A parsed {@link VariationDiff} upon success or an error indicating why parsing failed.
145148
* @throws IOException when reading from {@code fullDiff} fails.
146149
* @throws DiffParseException if an error in the diff or macro syntax is detected
147150
*/
148151
public static VariationDiff<DiffLinesLabel> createVariationDiff(
149152
BufferedReader fullDiff,
153+
Source source,
150154
final VariationDiffParseOptions options
151155
) throws IOException, DiffParseException {
152156
return new VariationDiffParser(
153157
options
154-
).parse(() -> {
158+
).parse(source, () -> {
155159
String line = fullDiff.readLine();
156160
if (line == null) {
157161
return null;
@@ -162,22 +166,24 @@ public static VariationDiff<DiffLinesLabel> createVariationDiff(
162166

163167
/**
164168
* Parses a variation tree from a source file.
165-
* This method is similar to {@link #createVariationDiff(BufferedReader, VariationDiffParseOptions)}
169+
* This method is similar to {@link #createVariationDiff(BufferedReader, Source, VariationDiffParseOptions)}
166170
* but acts as if all lines where unmodified.
167171
*
168172
* @param file The source code file (not a diff) to be parsed.
173+
* @param source the {@link Source} of {@code file}
169174
* @param options {@link VariationDiffParseOptions} for the parsing process.
170175
* @return A parsed {@link VariationDiff}.
171176
* @throws IOException iff {@code file} throws an {@code IOException}
172177
* @throws DiffParseException if an error in the diff or macro syntax is detected
173178
*/
174179
public static VariationDiff<DiffLinesLabel> createVariationTree(
175180
BufferedReader file,
181+
Source source,
176182
VariationDiffParseOptions options
177183
) throws IOException, DiffParseException {
178184
return new VariationDiffParser(
179185
options
180-
).parse(() -> {
186+
).parse(source, () -> {
181187
String line = file.readLine();
182188
if (line == null) {
183189
return null;
@@ -198,7 +204,7 @@ public static VariationDiff<DiffLinesLabel> createVariationTree(
198204
/**
199205
* Initializes the parse state.
200206
*
201-
* @see #createVariationDiff(BufferedReader, VariationDiffParseOptions)
207+
* @see #createVariationDiff(BufferedReader, Source, VariationDiffParseOptions)
202208
*/
203209
private VariationDiffParser(
204210
VariationDiffParseOptions options
@@ -209,6 +215,7 @@ private VariationDiffParser(
209215
/**
210216
* Parses the line diff {@code fullDiff}.
211217
*
218+
* @param source the {@link Source} of {@code lines}
212219
* @param lines should supply successive lines of the diff to be parsed, or {@code null} if
213220
* there are no more lines to be parsed.
214221
* @return the parsed {@code VariationDiff}
@@ -217,6 +224,7 @@ private VariationDiffParser(
217224
* is detected
218225
*/
219226
private VariationDiff<DiffLinesLabel> parse(
227+
Source source,
220228
FailableSupplier<DiffLine, IOException> lines
221229
) throws IOException, DiffParseException {
222230
DiffNode<DiffLinesLabel> root = DiffNode.createRoot(new DiffLinesLabel());
@@ -297,7 +305,7 @@ private VariationDiff<DiffLinesLabel> parse(
297305
);
298306
}
299307

300-
return new VariationDiff<>(root);
308+
return new VariationDiff<>(root, source);
301309
}
302310

303311
/**

src/main/java/org/variantsync/diffdetective/variation/diff/view/DiffView.java

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import org.variantsync.diffdetective.experiments.views.Main;
77
import org.variantsync.diffdetective.util.Assert;
88
import org.variantsync.diffdetective.util.CollectionUtils;
9+
import org.variantsync.diffdetective.util.Source;
910
import org.variantsync.diffdetective.variation.DiffLinesLabel;
1011
import org.variantsync.diffdetective.variation.Label;
1112
import org.variantsync.diffdetective.variation.diff.*;
@@ -72,12 +73,19 @@ public static <L extends Label> BiPredicate<Time, Projection<L>> computeWhenNode
7273
private static <L extends Label> VariationDiff<DiffLinesLabel> naive(final VariationDiff<L> d, final Relevance rho, final String[] projectionViewText) throws IOException, DiffParseException {
7374
final VariationDiff<DiffLinesLabel> view;
7475
try {
75-
view = JGitDiff.diff(projectionViewText[0], projectionViewText[1], DiffAlgorithm.SupportedAlgorithm.MYERS, Main.VARIATION_DIFF_PARSE_OPTIONS);
76+
view = JGitDiff.diff(
77+
projectionViewText[0],
78+
projectionViewText[1],
79+
Source.Unknown, // overridden below
80+
Source.Unknown, // overridden below
81+
DiffAlgorithm.SupportedAlgorithm.MYERS,
82+
Main.VARIATION_DIFF_PARSE_OPTIONS
83+
);
7684
} catch (DiffParseException e) {
7785
Logger.error("Could not parse diff obtained with query {} at {}", d.getSource(), rho);
7886
throw e;
7987
}
80-
view.setSource(new ViewSource<>(d, rho));
88+
view.setSource(new ViewSource<>(d, rho, "naive"));
8189

8290
return view;
8391
}
@@ -153,6 +161,7 @@ public static <L extends Label> VariationDiff<L> badgood(final VariationDiff<L>
153161

154162
// unify
155163
final VariationDiff<L> goodDiff = badDiff.toGood();
164+
goodDiff.setSource(new ViewSource<>(d, rho, "badgood"));
156165
goodDiff.assertConsistency();
157166
return goodDiff;
158167
}
@@ -261,7 +270,7 @@ record Edge<L extends Label>(DiffNode<L> childCopy, DiffNode<L> parentInD, Time
261270

262271
// Step 4: Build return value
263272
Assert.assertNotNull(rootCopy[0]);
264-
return new VariationDiff<>(rootCopy[0], new ViewSource<>(d, rho));
273+
return new VariationDiff<>(rootCopy[0], new ViewSource<>(d, rho, "optimized"));
265274
}
266275

267276
/**

src/main/java/org/variantsync/diffdetective/variation/diff/view/ViewSource.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@
1313
* @param diff The original variation diff on which the variation diff with this source is a view on.
1414
* @param relevance The relevance predicate that was used to create the view.
1515
*/
16-
public record ViewSource<L extends Label>(VariationDiff<L> diff, Relevance relevance) implements Source {
16+
public record ViewSource<L extends Label>(VariationDiff<L> diff, Relevance relevance, String method) implements Source {
1717
@Override
1818
public String getSourceExplanation() {
19-
return "View";
19+
return "view";
2020
}
2121

2222
@Override

0 commit comments

Comments
 (0)