Skip to content

Commit ab61ffe

Browse files
feat: make the extraction of a code matching optional
1 parent 4624ef9 commit ab61ffe

9 files changed

Lines changed: 37 additions & 15 deletions

File tree

docker-resources/custom.properties

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ extraction.print-enabled=false
1212
# Should changes to only the presence condition of source code be ignored? If set to true, VEVOS will only consider
1313
# the old presence condition for a line of source code that has otherwise not been changed
1414
extraction.ignore-pc-changes=true
15+
# Should the extraction also extract a matching of the code before and after a commit's changes?
16+
extraction.extract-code-matching=false
1517
# Path in which diff detective keeps its intermediate results. No need to change this.
1618
diff-detective.output-dir=ground-truth/dd
1719
# Path in which diff detective keeps the repos. No need to change this.

docker-resources/verification.properties

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ extraction.print-enabled=false
88
# Should changes to only the presence condition of source code be ignored? If set to true, VEVOS will only consider
99
# the old presence condition for a line of source code that has otherwise not been changed
1010
extraction.ignore-pc-changes=true
11+
# Should the extraction also extract a matching of the code before and after a commit's changes?
12+
extraction.extract-code-matching=false
1113
# Path in which diff detective keeps its intermediate results. No need to change this.
1214
diff-detective.output-dir=ground-truth/dd
1315
# Path in which diff detective keeps the repos. No need to change this.

docker-resources/without_linux.properties

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ extraction.print-enabled=false
88
# Should changes to only the presence condition of source code be ignored? If set to true, VEVOS will only consider
99
# the old presence condition for a line of source code that has otherwise not been changed
1010
extraction.ignore-pc-changes=true
11+
# Should the extraction also extract a matching of the code before and after a commit's changes?
12+
extraction.extract-code-matching=false
1113
# Path in which diff detective keeps its intermediate results. No need to change this.
1214
diff-detective.output-dir=ground-truth/dd
1315
# Path in which diff detective keeps the repos. No need to change this.

src/main/java/org/variantsync/vevos/extraction/FastExtraction.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ public class FastExtraction {
3434
= "diff-detective.num-threads";
3535
public static final String BATCH_SIZE
3636
= "diff-detective.batch-size";
37+
private static final String EXTRACT_CODE_MATCHING
38+
= "extraction.extract-code-matching";
3739
private final Properties properties;
3840

3941
public FastExtraction(Properties properties) {
@@ -158,7 +160,9 @@ private BiConsumer<Repository, Path> buildRunner() {
158160
Path resultsRoot = extractionDir.resolve(repo.getRepositoryName());
159161
boolean printEnabled = Boolean.parseBoolean(this.properties.getProperty(PRINT_ENABLED));
160162

161-
FastPCAnalysis analysis = new FastPCAnalysis(printEnabled, resultsRoot, Boolean.parseBoolean(properties.getProperty(IGNORE_PC_CHANGES)));
163+
FastPCAnalysis analysis = new FastPCAnalysis(printEnabled, resultsRoot,
164+
Boolean.parseBoolean(properties.getProperty(IGNORE_PC_CHANGES)),
165+
Boolean.parseBoolean(properties.getProperty(EXTRACT_CODE_MATCHING)));
162166
final BiFunction<Repository, Path, Analysis> AnalysisFactory = (r, out) -> new Analysis(
163167
"PCAnalysis",
164168
List.of(
@@ -169,14 +173,14 @@ private BiConsumer<Repository, Path> buildRunner() {
169173
);
170174
final int availableProcessors;
171175
String numThreads = this.properties.getProperty(NUM_THREADS);
172-
if (numThreads == null || numThreads.trim().equals("") || numThreads.trim().equals("0")) {
176+
if (numThreads == null || numThreads.trim().isEmpty() || numThreads.trim().equals("0")) {
173177
availableProcessors = Runtime.getRuntime().availableProcessors();
174178
} else {
175179
availableProcessors = Integer.parseInt(numThreads);
176180
}
177181
final int batchSize;
178182
String configuredSize = this.properties.getProperty(BATCH_SIZE);
179-
if (configuredSize == null || configuredSize.trim().equals("") || configuredSize.trim().equals("0")) {
183+
if (configuredSize == null || configuredSize.trim().isEmpty() || configuredSize.trim().equals("0")) {
180184
batchSize = 256;
181185
} else {
182186
batchSize = Integer.parseInt(configuredSize);

src/main/java/org/variantsync/vevos/extraction/FastPCAnalysis.java

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,15 @@ public class FastPCAnalysis implements Analysis.Hooks, PCAnalysis {
3737

3838
private final boolean ignorePCChanges;
3939
private final Path resultsRoot;
40+
private final boolean extractCodeMatching;
4041

41-
public FastPCAnalysis(boolean printEnabled, Path resultsRoot, boolean ignorePCChanges) {
42+
public FastPCAnalysis(boolean printEnabled, Path resultsRoot, boolean ignorePCChanges, boolean extractCodeMatching) {
4243
this.printEnabled = printEnabled;
4344
this.resultsRoot = resultsRoot;
4445
this.threadBatches = new ConcurrentHashMap<>();
4546
this.failedCommits = ConcurrentHashMap.newKeySet();
4647
this.ignorePCChanges = ignorePCChanges;
48+
this.extractCodeMatching = extractCodeMatching;
4749
try {
4850
Files.createDirectories(resultsRoot);
4951
} catch (IOException e) {
@@ -143,18 +145,21 @@ public void endCommit(Analysis analysis) {
143145

144146
String pcAsCSVBefore = groundTruthBefore.asPcCsvString();
145147
String pcAsCSVAfter = groundTruthAfter.asPcCsvString();
146-
String matchingAsCSVBefore = groundTruthBefore.asMatchingCsvString();
147-
String matchingAsCSVAfter = groundTruthAfter.asMatchingCsvString();
148148

149149
Serde.writeToFile(commitSaveDir.resolve(CODE_VARIABILITY_CSV_BEFORE),
150150
pcAsCSVBefore);
151151
Serde.writeToFile(commitSaveDir.resolve(CODE_VARIABILITY_CSV_AFTER),
152152
pcAsCSVAfter);
153153

154-
Serde.writeToFile(commitSaveDir.resolve(CODE_MATCHING_CSV_BEFORE),
155-
matchingAsCSVBefore);
156-
Serde.writeToFile(commitSaveDir.resolve(CODE_MATCHING_CSV_AFTER),
157-
matchingAsCSVAfter);
154+
if (extractCodeMatching) {
155+
String matchingAsCSVBefore = groundTruthBefore.asMatchingCsvString();
156+
String matchingAsCSVAfter = groundTruthAfter.asMatchingCsvString();
157+
158+
Serde.writeToFile(commitSaveDir.resolve(CODE_MATCHING_CSV_BEFORE),
159+
matchingAsCSVBefore);
160+
Serde.writeToFile(commitSaveDir.resolve(CODE_MATCHING_CSV_AFTER),
161+
matchingAsCSVAfter);
162+
}
158163

159164
Serde.writeToFile(commitSaveDir.resolve(COMMIT_MESSAGE_FILE), commit.getFullMessage());
160165

src/main/java/org/variantsync/vevos/extraction/FullExtraction.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,14 @@
2323
import java.util.function.BiConsumer;
2424
import java.util.function.BiFunction;
2525

26+
2627
public class FullExtraction {
2728
public static final String PRINT_ENABLED
2829
= "extraction.print-enabled";
2930
public static final String IGNORE_PC_CHANGES
3031
= "extraction.ignore-pc-changes";
32+
public static final String EXTRACT_CODE_MATCHING
33+
= "extraction.extract-code-matching";
3134
public static final String GT_SAVE_DIR
3235
= "extraction.gt-save-dir";
3336
public static final String DATASET_FILE
@@ -176,7 +179,7 @@ public static void quitOnError() {
176179

177180
private BiConsumer<Repository, Path> buildRunner(String diffDetectiveCache) {
178181
return (repo, repoOutputDir) -> {
179-
FullPCAnalysis analysis = new FullPCAnalysis(Path.of(diffDetectiveCache), Boolean.parseBoolean(properties.getProperty(IGNORE_PC_CHANGES)));
182+
FullPCAnalysis analysis = new FullPCAnalysis(Path.of(diffDetectiveCache), Boolean.parseBoolean(properties.getProperty(IGNORE_PC_CHANGES)), Boolean.parseBoolean(properties.getProperty(EXTRACT_CODE_MATCHING)));
180183
final BiFunction<Repository, Path, Analysis> AnalysisFactory = (r, out) -> new Analysis(
181184
"PCAnalysis",
182185
List.of(

src/main/java/org/variantsync/vevos/extraction/FullPCAnalysis.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,14 @@ public class FullPCAnalysis implements Analysis.Hooks, PCAnalysis {
2323
private final Hashtable<String, GroundTruth> groundTruthMap;
2424
private final Path diffDetectiveCache;
2525
private final boolean ignorePCChanges;
26+
// TODO: extract code matching during full analysis
27+
private final boolean extractCodeMatching;
2628

27-
public FullPCAnalysis(Path diffDetectiveCache, boolean ignorePCChanges) {
29+
public FullPCAnalysis(Path diffDetectiveCache, boolean ignorePCChanges, boolean extractCodeMatching) {
2830
this.groundTruthMap = new Hashtable<>();
2931
this.diffDetectiveCache = diffDetectiveCache;
3032
this.ignorePCChanges = ignorePCChanges;
33+
this.extractCodeMatching = extractCodeMatching;
3134
}
3235

3336
@Override

src/main/java/org/variantsync/vevos/extraction/PCAnalysis.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ public interface PCAnalysis {
1717
* @param fileGT The ground truth that is modified by analyzing the node
1818
* @param node The node that is to be analyzed
1919
* @param time Whether we should handle the node as before or after the edit
20-
* @param ignorePCChanges
20+
* @param ignorePCChanges Whether changes to only the presence condition should be ignored
2121
*/
2222
static void analyzeNode(FileGT.Mutable fileGT, DiffNode<DiffLinesLabel> node, Time time, boolean ignorePCChanges) throws MatchingException {
2323
if (time == Time.BEFORE && node.diffType == DiffType.ADD) {
@@ -65,9 +65,8 @@ static void analyzeNode(FileGT.Mutable fileGT, DiffNode<DiffLinesLabel> node, Ti
6565
// Grow the root mapping
6666
fileGT.growIfRequired(toLine);
6767

68-
// Insert the matchings
68+
// Insert the matching for artifacts
6969
if (!node.isAnnotation()) {
70-
// set the matching
7170
fileGT.setMatching(currentRange, counterpartRange);
7271
}
7372

src/main/resources/extraction.properties

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
#####################################
44
extraction.print-enabled=false
55
extraction.ignore-pc-changes=true
6+
# Should the extraction also extract a matching of the code before and after a commit's changes?
7+
extraction.extract-code-matching=false
68
extraction.gt-save-dir=/home/alex/data/EXTRACTION/ground-truth
79
#diff-detective.dataset-file = src/main/resources/debug.md
810
diff-detective.dataset-file=src/main/resources/verification.md

0 commit comments

Comments
 (0)