Skip to content

Commit cc6593e

Browse files
feat: exchange extraction of before and after formulas with the choice to simply ignore changes in PC
1 parent 446e1c3 commit cc6593e

12 files changed

Lines changed: 144 additions & 180 deletions

docker-resources/custom.properties

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,24 @@
1-
21
#####################################
32
# Common Extractor Parameters #
43
#####################################
5-
64
# Define the path to the dataset here
75
# You can also delete or add lines to the custom.md
8-
diff-detective.dataset-file = custom.md
9-
6+
diff-detective.dataset-file=custom.md
107
######
118
# Parameters that probably should not be changed
129
######
13-
1410
# Print the ground truth for each commit. Only activate this for the debugging of small datasets.
15-
extraction.print-enabled = false
16-
11+
extraction.print-enabled=false
12+
# Should changes to only the presence condition of source code be ignored? If set to true, VEVOS will only consider
13+
# the old presence condition for a line of source code that has otherwise not been changed
14+
extraction.ignore-pc-changes=true
1715
# Path in which diff detective keeps its intermediate results. No need to change this.
18-
diff-detective.output-dir = ground-truth/dd
19-
16+
diff-detective.output-dir=ground-truth/dd
2017
# Path in which diff detective keeps the repos. No need to change this.
21-
diff-detective.repo-storage-dir = ground-truth/REPOS
22-
18+
diff-detective.repo-storage-dir=ground-truth/REPOS
2319
# Path to which the ground truth is saved. Do NOT change this without knowing how this affects the Docker file system interface
24-
extraction.gt-save-dir = ground-truth
25-
20+
extraction.gt-save-dir=ground-truth
2621
# Number of threads to use
27-
diff-detective.num-threads = 128
28-
22+
diff-detective.num-threads=128
2923
# Number of commits to process in a single batch by one thread
30-
diff-detective.batch-size = 8
24+
diff-detective.batch-size=8
Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,18 @@
1-
21
#####################################
32
# Common Extractor Parameters #
43
#####################################
5-
64
# Path to the dataset file
7-
diff-detective.dataset-file = verification.md
8-
5+
diff-detective.dataset-file=verification.md
96
# Print the ground truth for each commit. Only activate this for the debugging of small datasets.
10-
extraction.print-enabled = false
11-
7+
extraction.print-enabled=false
8+
# Should changes to only the presence condition of source code be ignored? If set to true, VEVOS will only consider
9+
# the old presence condition for a line of source code that has otherwise not been changed
10+
extraction.ignore-pc-changes=true
1211
# Path in which diff detective keeps its intermediate results. No need to change this.
13-
diff-detective.output-dir = ground-truth/dd
14-
12+
diff-detective.output-dir=ground-truth/dd
1513
# Path in which diff detective keeps the repos. No need to change this.
16-
diff-detective.repo-storage-dir = DiffDetectiveMining
17-
14+
diff-detective.repo-storage-dir=DiffDetectiveMining
1815
# Path to which the ground truth is saved. Do NOT change this without knowing how this affects the Docker file system interface
19-
extraction.gt-save-dir = ground-truth
20-
16+
extraction.gt-save-dir=ground-truth
2117
# Number of commits to process in a single batch by one thread
22-
diff-detective.batch-size = 64
18+
diff-detective.batch-size=64
Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,20 @@
1-
21
#####################################
32
# Common Extractor Parameters #
43
#####################################
5-
64
# Path to the dataset file
7-
diff-detective.dataset-file = without_linux.md
8-
5+
diff-detective.dataset-file=without_linux.md
96
# Print the ground truth for each commit. Only activate this for the debugging of small datasets.
10-
extraction.print-enabled = false
11-
7+
extraction.print-enabled=false
8+
# Should changes to only the presence condition of source code be ignored? If set to true, VEVOS will only consider
9+
# the old presence condition for a line of source code that has otherwise not been changed
10+
extraction.ignore-pc-changes=true
1211
# Path in which diff detective keeps its intermediate results. No need to change this.
13-
diff-detective.output-dir = ground-truth/dd
14-
12+
diff-detective.output-dir=ground-truth/dd
1513
# Path in which diff detective keeps the repos. No need to change this.
16-
diff-detective.repo-storage-dir = ground-truth/REPOS
17-
14+
diff-detective.repo-storage-dir=ground-truth/REPOS
1815
# Path to which the ground truth is saved. Do NOT change this without knowing how this affects the Docker file system interface
19-
extraction.gt-save-dir = ground-truth
20-
16+
extraction.gt-save-dir=ground-truth
2117
# Number of threads to use
22-
diff-detective.num-threads = 128
23-
18+
diff-detective.num-threads=128
2419
# Number of commits to process in a single batch by one thread
25-
diff-detective.batch-size = 8
20+
diff-detective.batch-size=8

src/main/java/org/variantsync/vevos/extraction/BlockAnnotation.java

Lines changed: 17 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,17 @@ public final class BlockAnnotation implements Serializable {
1919
private static final Pattern variableEnd = Pattern.compile("}");
2020
private static final Pattern quotation = Pattern.compile("\"");
2121
private static final Pattern semicolon = Pattern.compile(";");
22-
private final FeatureMapping featureMappingBefore;
23-
private final PresenceCondition presenceConditionBefore;
24-
private final FeatureMapping featureMappingAfter;
25-
private final PresenceCondition presenceConditionAfter;
22+
private final FeatureMapping featureMapping;
23+
private final PresenceCondition presenceCondition;
2624
private int lineStartInclusive;
2725
private int lineEndInclusive;
2826

2927
public BlockAnnotation(int lineStartInclusive, int lineEndInclusive,
30-
FeatureMapping featureMappingBefore, PresenceCondition presenceConditionBefore,
31-
FeatureMapping featureMappingAfter, PresenceCondition presenceConditionAfter) {
28+
FeatureMapping featureMapping, PresenceCondition presenceCondition) {
3229
this.lineStartInclusive = lineStartInclusive;
3330
this.lineEndInclusive = lineEndInclusive;
34-
this.featureMappingBefore = featureMappingBefore;
35-
this.presenceConditionBefore = presenceConditionBefore;
36-
this.featureMappingAfter = featureMappingAfter;
37-
this.presenceConditionAfter = presenceConditionAfter;
31+
this.featureMapping = featureMapping;
32+
this.presenceCondition = presenceCondition;
3833
}
3934

4035
public void setLineStartInclusive(int lineStartInclusive) {
@@ -60,47 +55,37 @@ public boolean equals(Object o) {
6055
BlockAnnotation that = (BlockAnnotation) o;
6156
return lineStartInclusive == that.lineStartInclusive
6257
&& lineEndInclusive == that.lineEndInclusive
63-
&& Objects.equals(featureMappingBefore, that.featureMappingBefore)
64-
&& Objects.equals(presenceConditionBefore, that.presenceConditionBefore)
65-
&& Objects.equals(featureMappingAfter, that.featureMappingAfter)
66-
&& Objects.equals(presenceConditionAfter, that.presenceConditionAfter);
58+
&& Objects.equals(featureMapping, that.featureMapping)
59+
&& Objects.equals(presenceCondition, that.presenceCondition);
6760
}
6861

6962
@Override
7063
public int hashCode() {
71-
return Objects.hash(featureMappingBefore, presenceConditionBefore, featureMappingAfter, presenceConditionAfter, lineStartInclusive, lineEndInclusive);
64+
return Objects.hash(featureMapping, presenceCondition, lineStartInclusive, lineEndInclusive);
7265
}
7366

7467
public boolean annotationEquals(BlockAnnotation other) {
75-
return this.featureMappingBefore.equals(other.featureMappingBefore)
76-
&& this.featureMappingAfter.equals(other.featureMappingAfter)
77-
&& this.presenceConditionBefore.equals(other.presenceConditionBefore)
78-
&& this.presenceConditionAfter.equals(other.presenceConditionAfter);
68+
return this.featureMapping.equals(other.featureMapping)
69+
&& this.presenceCondition.equals(other.presenceCondition);
7970
}
8071

8172
public boolean annotationEquals(LineAnnotation other) {
82-
return this.featureMappingBefore.equals(other.featureMappingBefore())
83-
&& this.featureMappingAfter.equals(other.featureMappingAfter())
84-
&& this.presenceConditionBefore.equals(other.presenceConditionBefore())
85-
&& this.presenceConditionAfter.equals(other.presenceConditionAfter());
73+
return this.featureMapping.equals(other.featureMapping())
74+
&& this.presenceCondition.equals(other.presenceCondition());
8675
}
8776

8877
@Override
8978
public String toString() {
90-
return "BlockAnnotation[" +
79+
return "[" +
9180
"lineStartInclusive=" + lineStartInclusive + ", " +
9281
"lineEndExclusive=" + lineEndInclusive + ", " +
93-
"featureMappingBefore=" + featureMappingBefore + ", " +
94-
"presenceConditionBefore=" + presenceConditionBefore + ", " +
95-
"featureMappingAfter=" + featureMappingAfter + ", " +
96-
"presenceConditionAfter=" + presenceConditionAfter + ']';
82+
"featureMapping=" + featureMapping + ", " +
83+
"presenceCondition=" + presenceCondition + ']';
9784
}
9885

9986
public String asCSVLine() {
100-
return "%s;%s;%s;%s;%d;%d".formatted(normalizeCondition(this.featureMappingBefore.mapping()),
101-
normalizeCondition(this.presenceConditionBefore.condition()),
102-
normalizeCondition(this.featureMappingAfter.mapping()),
103-
normalizeCondition(this.presenceConditionAfter.condition()),
87+
return "%s;%s;%d;%d".formatted(normalizeCondition(this.featureMapping.mapping()),
88+
normalizeCondition(this.presenceCondition.condition()),
10489
this.lineStartInclusive,
10590
this.lineEndInclusive);
10691
}

src/main/java/org/variantsync/vevos/extraction/FastExtraction.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
11
package org.variantsync.vevos.extraction;
22

3-
import org.eclipse.jgit.api.Git;
4-
import org.eclipse.jgit.api.errors.GitAPIException;
5-
import org.eclipse.jgit.revwalk.RevCommit;
63
import org.tinylog.Logger;
74
import org.variantsync.diffdetective.AnalysisRunner;
85
import org.variantsync.diffdetective.analysis.Analysis;
@@ -15,7 +12,8 @@
1512
import java.io.FileInputStream;
1613
import java.io.IOException;
1714
import java.nio.file.Path;
18-
import java.util.*;
15+
import java.util.List;
16+
import java.util.Properties;
1917
import java.util.function.BiConsumer;
2018
import java.util.function.BiFunction;
2119

@@ -24,6 +22,8 @@ public class FastExtraction {
2422
= "extraction.print-enabled";
2523
public static final String GT_SAVE_DIR
2624
= "extraction.gt-save-dir";
25+
public static final String IGNORE_PC_CHANGES
26+
= "extraction.ignore-pc-changes";
2727
public static final String DATASET_FILE
2828
= "diff-detective.dataset-file";
2929
public static final String DD_OUTPUT_DIR
@@ -158,7 +158,7 @@ private BiConsumer<Repository, Path> buildRunner() {
158158
Path resultsRoot = extractionDir.resolve(repo.getRepositoryName());
159159
boolean printEnabled = Boolean.parseBoolean(this.properties.getProperty(PRINT_ENABLED));
160160

161-
FastPCAnalysis analysis = new FastPCAnalysis(printEnabled, resultsRoot);
161+
FastPCAnalysis analysis = new FastPCAnalysis(printEnabled, resultsRoot, Boolean.parseBoolean(properties.getProperty(IGNORE_PC_CHANGES)));
162162
final BiFunction<Repository, Path, Analysis> AnalysisFactory = (r, out) -> new Analysis(
163163
"PCAnalysis",
164164
List.of(

src/main/java/org/variantsync/vevos/extraction/FastPCAnalysis.java

Lines changed: 34 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,16 @@ public class FastPCAnalysis implements Analysis.Hooks, PCAnalysis {
3434
private final ConcurrentHashMap<Long, ThreadBatch> threadBatches;
3535
private final Set<String> failedCommits;
3636
private final boolean printEnabled;
37+
38+
private final boolean ignorePCChanges;
3739
private final Path resultsRoot;
3840

39-
public FastPCAnalysis(boolean printEnabled, Path resultsRoot) {
41+
public FastPCAnalysis(boolean printEnabled, Path resultsRoot, boolean ignorePCChanges) {
4042
this.printEnabled = printEnabled;
4143
this.resultsRoot = resultsRoot;
4244
this.threadBatches = new ConcurrentHashMap<>();
4345
this.failedCommits = ConcurrentHashMap.newKeySet();
46+
this.ignorePCChanges = ignorePCChanges;
4447
try {
4548
Files.createDirectories(resultsRoot);
4649
} catch (IOException e) {
@@ -49,9 +52,19 @@ public FastPCAnalysis(boolean printEnabled, Path resultsRoot) {
4952
}
5053
}
5154

52-
private record ThreadBatch(HashMap<String, GroundTruth> groundTruthMapBefore,
53-
HashMap<String, GroundTruth> groundTruthMapAfter) {
54-
55+
/**
56+
* Prints the given ground truth to console.
57+
*
58+
* @param groundTruth GT to print
59+
* @param commitName The id of the commit for which the GT has been calculated
60+
*/
61+
private static void print(GroundTruth groundTruth, String commitName) {
62+
System.out.println();
63+
System.out.printf("***************** %s ******************", commitName);
64+
System.out.println();
65+
for (String file : groundTruth.fileGTs().keySet()) {
66+
System.out.println(groundTruth.get(file));
67+
}
5568
}
5669

5770
@Override
@@ -77,7 +90,7 @@ public void endCommit(Analysis analysis) {
7790
FastPCAnalysis.numProcessed++;
7891
if (FastPCAnalysis.numProcessed % 1_000 == 0) {
7992
Logger.info("End Processing of Commit ({}): {}", FastPCAnalysis.numProcessed,
80-
commit.name());
93+
commit.name());
8194
}
8295
}
8396

@@ -94,9 +107,9 @@ public void endCommit(Analysis analysis) {
94107

95108
// Complete all new or updated file ground truths
96109
GroundTruth groundTruthBefore = groundTruthMapBefore.getOrDefault(commit.getName(),
97-
new GroundTruth(new HashMap<>(), new HashSet<>()));
110+
new GroundTruth(new HashMap<>(), new HashSet<>()));
98111
GroundTruth groundTruthAfter = groundTruthMapAfter.getOrDefault(commit.getName(),
99-
new GroundTruth(new HashMap<>(), new HashSet<>()));
112+
new GroundTruth(new HashMap<>(), new HashSet<>()));
100113
if (groundTruthBefore.isEmpty() && groundTruthAfter.isEmpty()) {
101114
// Return early and do not save any data, if the ground truths are both empty.
102115
// In this case, no changes have been analyzed, and we are not interested in the commit's
@@ -134,7 +147,7 @@ public void endCommit(Analysis analysis) {
134147
String matchingAsCSVAfter = groundTruthAfter.asMatchingCsvString();
135148

136149
Serde.writeToFile(commitSaveDir.resolve(CODE_VARIABILITY_CSV_BEFORE),
137-
pcAsCSVBefore);
150+
pcAsCSVBefore);
138151
Serde.writeToFile(commitSaveDir.resolve(CODE_VARIABILITY_CSV_AFTER),
139152
pcAsCSVAfter);
140153

@@ -146,10 +159,10 @@ public void endCommit(Analysis analysis) {
146159
Serde.writeToFile(commitSaveDir.resolve(COMMIT_MESSAGE_FILE), commit.getFullMessage());
147160

148161
Optional<String> parentIds = Arrays.stream(commit.getParents()).map(RevCommit::getName)
149-
.reduce((s, s2) -> s + " " + s2);
162+
.reduce((s, s2) -> s + " " + s2);
150163
parentIds.ifPresentOrElse(
151-
s -> Serde.writeToFile(commitSaveDir.resolve(COMMIT_PARENTS_FILE), s),
152-
() -> Serde.writeToFile(commitSaveDir.resolve(COMMIT_PARENTS_FILE), ""));
164+
s -> Serde.writeToFile(commitSaveDir.resolve(COMMIT_PARENTS_FILE), s),
165+
() -> Serde.writeToFile(commitSaveDir.resolve(COMMIT_PARENTS_FILE), ""));
153166

154167
synchronized (FastPCAnalysis.class) {
155168
Serde.appendText(resultsRoot.resolve(SUCCESS_COMMIT_FILE), commit.getName() + "\n");
@@ -186,11 +199,11 @@ public boolean analyzeVariationDiff(Analysis analysis) {
186199
HashMap<String, GroundTruth> groundTruthMapAfter = currentBatch.groundTruthMapAfter;
187200

188201
GroundTruth groundTruthBefore = groundTruthMapBefore.computeIfAbsent(
189-
analysis.getCurrentCommit().getName(),
190-
commit -> new GroundTruth(new HashMap<>(), new HashSet<>()));
202+
analysis.getCurrentCommit().getName(),
203+
commit -> new GroundTruth(new HashMap<>(), new HashSet<>()));
191204
GroundTruth groundTruthAfter = groundTruthMapAfter.computeIfAbsent(
192-
analysis.getCurrentCommit().getName(),
193-
commit -> new GroundTruth(new HashMap<>(), new HashSet<>()));
205+
analysis.getCurrentCommit().getName(),
206+
commit -> new GroundTruth(new HashMap<>(), new HashSet<>()));
194207
// Show.diff(analysis.getCurrentVariationDiff()).showAndAwait();
195208
// Get the ground truth for this file
196209
String fileNameBefore = analysis.getCurrentPatch().getFileName(Time.BEFORE);
@@ -206,26 +219,26 @@ public boolean analyzeVariationDiff(Analysis analysis) {
206219
fileGTBefore = null;
207220
} else {
208221
fileGTBefore = (FileGT.Mutable) groundTruthBefore.computeIfAbsent(fileNameBefore,
209-
k -> new FileGT.Mutable(fileNameBefore));
222+
k -> new FileGT.Mutable(fileNameBefore));
210223
}
211224
final FileGT.Mutable fileGTAfter;
212225
if (changeType == DiffEntry.ChangeType.DELETE) {
213226
fileGTAfter = null;
214227
} else {
215228
fileGTAfter = (FileGT.Mutable) groundTruthAfter.computeIfAbsent(fileNameAfter,
216-
k -> new FileGT.Mutable(fileNameAfter));
229+
k -> new FileGT.Mutable(fileNameAfter));
217230
}
218231

219232
analysis.getCurrentVariationDiff().forAll(node -> {
220233
try {
221234
// Logger.debug("Node: {}", node);
222235
// If the file is not completely new, we consider the before case
223236
if (!(changeType == DiffEntry.ChangeType.ADD)) {
224-
PCAnalysis.analyzeNode(fileGTBefore, node, Time.BEFORE);
237+
PCAnalysis.analyzeNode(fileGTBefore, node, Time.BEFORE, ignorePCChanges);
225238
}
226239
if (!(changeType == DiffEntry.ChangeType.DELETE)) {
227240
// If the file has not been deleted, we consider the after case
228-
PCAnalysis.analyzeNode(fileGTAfter, node, Time.AFTER);
241+
PCAnalysis.analyzeNode(fileGTAfter, node, Time.AFTER, ignorePCChanges);
229242
}
230243
} catch (MatchingException e) {
231244
Logger.error("unhandled exception while analyzing {} -> {} for commit {}.",
@@ -240,20 +253,9 @@ public boolean analyzeVariationDiff(Analysis analysis) {
240253
return true;
241254
}
242255

256+
private record ThreadBatch(HashMap<String, GroundTruth> groundTruthMapBefore,
257+
HashMap<String, GroundTruth> groundTruthMapAfter) {
243258

244-
/**
245-
* Prints the given ground truth to console.
246-
*
247-
* @param groundTruth GT to print
248-
* @param commitName The id of the commit for which the GT has been calculated
249-
*/
250-
private static void print(GroundTruth groundTruth, String commitName) {
251-
System.out.println();
252-
System.out.printf("***************** %s ******************", commitName);
253-
System.out.println();
254-
for (String file : groundTruth.fileGTs().keySet()) {
255-
System.out.println(groundTruth.get(file));
256-
}
257259
}
258260

259261
}

0 commit comments

Comments
 (0)