Skip to content

Commit 1f91d72

Browse files
refactor: extract common code to reduce redundancies
1 parent bc31cfe commit 1f91d72

7 files changed

Lines changed: 101 additions & 128 deletions

File tree

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
package org.variantsync.vevos.extraction;
2+
3+
import org.variantsync.diffdetective.AnalysisRunner;
4+
import org.variantsync.diffdetective.datasets.PatchDiffParseOptions;
5+
import org.variantsync.diffdetective.diff.git.DiffFilter;
6+
import org.variantsync.diffdetective.variation.diff.parse.VariationDiffParseOptions;
7+
8+
import java.nio.file.Path;
9+
import java.util.Properties;
10+
11+
public class Config {
12+
public static final String PRINT_ENABLED
13+
= "extraction.print-enabled";
14+
public static final String GT_SAVE_DIR
15+
= "extraction.gt-save-dir";
16+
public static final String IGNORE_PC_CHANGES
17+
= "extraction.ignore-pc-changes";
18+
public static final String DATASET_FILE
19+
= "diff-detective.dataset-file";
20+
public static final String DD_OUTPUT_DIR
21+
= "diff-detective.output-dir";
22+
public static final String REPO_SAVE_DIR
23+
= "diff-detective.repo-storage-dir";
24+
public static final String NUM_THREADS
25+
= "diff-detective.num-threads";
26+
public static final String BATCH_SIZE
27+
= "diff-detective.batch-size";
28+
public static final String EXTRACT_CODE_MATCHING
29+
= "extraction.extract-code-matching";
30+
31+
/**
32+
* Options for the execution of DiffDetective
33+
*
34+
* @param properties The properties loaded by main()
35+
* @return The options instance
36+
*/
37+
public static AnalysisRunner.Options diffdetectiveOptions(Properties properties) {
38+
39+
return new AnalysisRunner.Options(
40+
Path.of(properties.getProperty(REPO_SAVE_DIR)),
41+
Path.of(properties.getProperty(DD_OUTPUT_DIR)),
42+
Path.of(properties.getProperty(DATASET_FILE)),
43+
repo -> {
44+
final PatchDiffParseOptions repoDefault = repo.getParseOptions();
45+
return new PatchDiffParseOptions(
46+
PatchDiffParseOptions.DiffStoragePolicy.DO_NOT_REMEMBER,
47+
new VariationDiffParseOptions(
48+
repoDefault.variationDiffParseOptions().annotationParser(),
49+
false,
50+
false
51+
)
52+
);
53+
},
54+
repo -> new DiffFilter.Builder()
55+
.allowMerge(true)
56+
// TODO: make configurable
57+
.allowedFileExtensions("h", "hpp", "c", "cpp")
58+
.build(),
59+
true,
60+
false
61+
);
62+
}
63+
}

src/main/java/org/variantsync/vevos/extraction/FastGroundTruthExtraction.java

Lines changed: 3 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,7 @@
33
import org.tinylog.Logger;
44
import org.variantsync.diffdetective.AnalysisRunner;
55
import org.variantsync.diffdetective.analysis.Analysis;
6-
import org.variantsync.diffdetective.datasets.PatchDiffParseOptions;
76
import org.variantsync.diffdetective.datasets.Repository;
8-
import org.variantsync.diffdetective.diff.git.DiffFilter;
9-
import org.variantsync.diffdetective.variation.diff.parse.VariationDiffParseOptions;
107
import org.variantsync.vevos.extraction.analysis.FastVariabilityAnalysis;
118

129
import java.io.File;
@@ -18,25 +15,9 @@
1815
import java.util.function.BiConsumer;
1916
import java.util.function.BiFunction;
2017

18+
import static org.variantsync.vevos.extraction.Config.*;
19+
2120
public class FastGroundTruthExtraction {
22-
public static final String PRINT_ENABLED
23-
= "extraction.print-enabled";
24-
public static final String GT_SAVE_DIR
25-
= "extraction.gt-save-dir";
26-
public static final String IGNORE_PC_CHANGES
27-
= "extraction.ignore-pc-changes";
28-
public static final String DATASET_FILE
29-
= "diff-detective.dataset-file";
30-
public static final String DD_OUTPUT_DIR
31-
= "diff-detective.output-dir";
32-
public static final String REPO_SAVE_DIR
33-
= "diff-detective.repo-storage-dir";
34-
public static final String NUM_THREADS
35-
= "diff-detective.num-threads";
36-
public static final String BATCH_SIZE
37-
= "diff-detective.batch-size";
38-
private static final String EXTRACT_CODE_MATCHING
39-
= "extraction.extract-code-matching";
4021
private final Properties properties;
4122

4223
public FastGroundTruthExtraction(Properties properties) {
@@ -62,38 +43,7 @@ public static void main(String[] args) throws IOException {
6243
extraction.run(options);
6344
}
6445

65-
/**
66-
* Options for the execution of DiffDetective
67-
*
68-
* @param properties The properties loaded by main()
69-
* @return The options instance
70-
*/
71-
public static AnalysisRunner.Options diffdetectiveOptions(Properties properties) {
72-
73-
return new AnalysisRunner.Options(
74-
Path.of(properties.getProperty(REPO_SAVE_DIR)),
75-
Path.of(properties.getProperty(DD_OUTPUT_DIR)),
76-
Path.of(properties.getProperty(DATASET_FILE)),
77-
repo -> {
78-
final PatchDiffParseOptions repoDefault = repo.getParseOptions();
79-
return new PatchDiffParseOptions(
80-
PatchDiffParseOptions.DiffStoragePolicy.DO_NOT_REMEMBER,
81-
new VariationDiffParseOptions(
82-
repoDefault.variationDiffParseOptions().annotationParser(),
83-
false,
84-
false
85-
)
86-
);
87-
},
88-
repo -> new DiffFilter.Builder()
89-
.allowMerge(true)
90-
// TODO: make configurable
91-
.allowedFileExtensions("h", "hpp", "c", "cpp")
92-
.build(),
93-
true,
94-
false
95-
);
96-
}
46+
9747

9848
/**
9949
* Parses the file in which the properties are located from the arguments.

src/main/java/org/variantsync/vevos/extraction/FullGroundTruthExtraction.java

Lines changed: 13 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,7 @@
66
import org.tinylog.Logger;
77
import org.variantsync.diffdetective.AnalysisRunner;
88
import org.variantsync.diffdetective.analysis.Analysis;
9-
import org.variantsync.diffdetective.datasets.PatchDiffParseOptions;
109
import org.variantsync.diffdetective.datasets.Repository;
11-
import org.variantsync.diffdetective.diff.git.DiffFilter;
12-
import org.variantsync.diffdetective.variation.diff.parse.VariationDiffParseOptions;
1310
import org.variantsync.vevos.extraction.analysis.FullVariabilityAnalysis;
1411
import org.variantsync.vevos.extraction.gt.GroundTruth;
1512
import org.variantsync.vevos.extraction.io.Serde;
@@ -26,27 +23,11 @@
2623
import java.util.function.BiConsumer;
2724
import java.util.function.BiFunction;
2825

26+
import static org.variantsync.vevos.extraction.Config.*;
27+
import static org.variantsync.vevos.extraction.gt.GroundTruth.*;
28+
2929

3030
public class FullGroundTruthExtraction {
31-
public static final String PRINT_ENABLED
32-
= "extraction.print-enabled";
33-
public static final String IGNORE_PC_CHANGES
34-
= "extraction.ignore-pc-changes";
35-
public static final String EXTRACT_CODE_MATCHING
36-
= "extraction.extract-code-matching";
37-
public static final String GT_SAVE_DIR
38-
= "extraction.gt-save-dir";
39-
public static final String DATASET_FILE
40-
= "diff-detective.dataset-file";
41-
public static final String DD_OUTPUT_DIR
42-
= "diff-detective.output-dir";
43-
public static final String REPO_SAVE_DIR
44-
= "diff-detective.repo-storage-dir";
45-
private final static String SUCCESS_COMMIT_FILE = "SUCCESS_COMMITS.txt";
46-
private static final String COMMIT_PARENTS_FILE = "PARENTS.txt";
47-
private static final String COMMIT_MESSAGE_FILE = "MESSAGE.txt";
48-
private static final String VARIABLES_FILE = "VARIABLES.txt";
49-
private static final String CODE_VARIABILITY_CSV = "code-variability.spl.csv";
5031
private final Properties properties;
5132

5233
public FullGroundTruthExtraction(Properties properties) {
@@ -87,40 +68,7 @@ private static void print(GroundTruth groundTruth, String commitName) {
8768
}
8869
}
8970

90-
/**
91-
* Options for the execution of DiffDetective
92-
*
93-
* @param properties The properties loaded by main()
94-
* @return The options instance
95-
*/
96-
public static AnalysisRunner.Options diffdetectiveOptions(Properties properties) {
97-
98-
return new AnalysisRunner.Options(
99-
Path.of(properties.getProperty(REPO_SAVE_DIR)),
100-
Path.of(properties.getProperty(DD_OUTPUT_DIR)),
101-
Path.of(properties.getProperty(DATASET_FILE)),
102-
repo -> {
103-
final PatchDiffParseOptions repoDefault = repo.getParseOptions();
104-
return new PatchDiffParseOptions(
105-
PatchDiffParseOptions.DiffStoragePolicy.DO_NOT_REMEMBER,
106-
new VariationDiffParseOptions(
107-
repoDefault.variationDiffParseOptions().annotationParser(),
108-
false,
109-
false
110-
)
111-
);
112-
},
113-
repo -> new DiffFilter.Builder()
114-
.allowMerge(true)
115-
// TODO: make configurable
116-
.allowedFileExtensions("h", "hpp", "c", "cpp")
117-
.build(),
118-
true,
119-
false
120-
);
121-
}
122-
123-
/**
71+
/**
12472
* Parses the file in which the properties are located from the arguments.
12573
*
12674
* @param args the arguments to parse
@@ -182,7 +130,7 @@ public static void quitOnError() {
182130

183131
private BiConsumer<Repository, Path> buildRunner(String diffDetectiveCache) {
184132
return (repo, repoOutputDir) -> {
185-
FullVariabilityAnalysis analysis = new FullVariabilityAnalysis(Path.of(diffDetectiveCache), Boolean.parseBoolean(properties.getProperty(IGNORE_PC_CHANGES)), Boolean.parseBoolean(properties.getProperty(EXTRACT_CODE_MATCHING)));
133+
FullVariabilityAnalysis analysis = new FullVariabilityAnalysis(Path.of(diffDetectiveCache), Boolean.parseBoolean(properties.getProperty(IGNORE_PC_CHANGES)));
186134
final BiFunction<Repository, Path, Analysis> AnalysisFactory = (r, out) -> new Analysis(
187135
"PCAnalysis",
188136
List.of(
@@ -292,6 +240,14 @@ private void postprocess(Repository repo, ArrayList<RevCommit> commits, Executor
292240
() -> Serde.writeToFile(commitSaveDir.resolve(COMMIT_PARENTS_FILE), "")));
293241

294242
threadPool.submit(() -> Serde.appendText(resultsRoot.resolve(SUCCESS_COMMIT_FILE), commit.getName() + "\n"));
243+
244+
if (Boolean.parseBoolean(properties.getProperty(EXTRACT_CODE_MATCHING))) {
245+
String matchingAsCSV = completedGroundTruth.asMatchingCsvString();
246+
247+
threadPool.submit(() -> Serde.writeToFile(commitSaveDir.resolve(CODE_MATCHING_CSV),
248+
matchingAsCSV));
249+
}
250+
295251
if (processedCount % 1_000 == 0) {
296252
Logger.info("Saved ground truth for commit {} of {}", processedCount + 1, commits.size());
297253
}

src/main/java/org/variantsync/vevos/extraction/analysis/FastVariabilityAnalysis.java

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,21 +19,13 @@
1919
import java.util.*;
2020
import java.util.concurrent.ConcurrentHashMap;
2121

22+
import static org.variantsync.vevos.extraction.gt.GroundTruth.*;
23+
2224
/**
2325
* Extracts ground truths for all repositories in a dataset. The ground truth consists of presence
2426
* conditions for each file, a list of all variables, and commit metadata.
2527
*/
2628
public class FastVariabilityAnalysis implements Analysis.Hooks, VariabilityAnalysis {
27-
private final static String SUCCESS_COMMIT_FILE = "SUCCESS_COMMITS.txt";
28-
private final static String ERROR_COMMIT_FILE = "ERROR_COMMITS.txt";
29-
private final static String EMPTY_COMMIT_FILE = "EMPTY_COMMITS.txt";
30-
private static final String COMMIT_PARENTS_FILE = "PARENTS.txt";
31-
private static final String COMMIT_MESSAGE_FILE = "MESSAGE.txt";
32-
private static final String VARIABLES_FILE = "VARIABLES.txt";
33-
private static final String CODE_VARIABILITY_CSV_BEFORE = "code-variability.before.spl.csv";
34-
private static final String CODE_VARIABILITY_CSV_AFTER = "code-variability.after.spl.csv";
35-
private static final String CODE_MATCHING_CSV_BEFORE = "code-matching.before.spl.csv";
36-
private static final String CODE_MATCHING_CSV_AFTER = "code-matching.after.spl.csv";
3729
public static int numProcessed = 0;
3830
private final ConcurrentHashMap<Long, ThreadBatch> threadBatches;
3931
private final Set<String> failedCommits;

src/main/java/org/variantsync/vevos/extraction/analysis/FullVariabilityAnalysis.java

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,11 @@ public class FullVariabilityAnalysis implements Analysis.Hooks, VariabilityAnaly
2727
private final Hashtable<String, GroundTruth> groundTruthMap;
2828
private final Path diffDetectiveCache;
2929
private final boolean ignorePCChanges;
30-
// TODO: extract code matching during full analysis
31-
private final boolean extractCodeMatching;
3230

33-
public FullVariabilityAnalysis(Path diffDetectiveCache, boolean ignorePCChanges, boolean extractCodeMatching) {
31+
public FullVariabilityAnalysis(Path diffDetectiveCache, boolean ignorePCChanges) {
3432
this.groundTruthMap = new Hashtable<>();
3533
this.diffDetectiveCache = diffDetectiveCache;
3634
this.ignorePCChanges = ignorePCChanges;
37-
this.extractCodeMatching = extractCodeMatching;
3835
}
3936

4037
@Override

src/main/java/org/variantsync/vevos/extraction/analysis/VariabilityAnalysis.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,8 @@ static void analyzeNode(FileGT.Mutable fileGT, DiffNode<DiffLinesLabel> node, Ti
5454
currentRange = node.getLinesAtTime(Time.AFTER);
5555
counterpartRange = node.getLinesAtTime(Time.BEFORE);
5656
}
57-
default -> {
58-
// Because Java cannot assess statically that this case will never occur *sigh*
59-
throw new IllegalStateException();
60-
}
57+
default -> // Because Java cannot assess statically that this case will never occur *sigh*
58+
throw new IllegalStateException();
6159
}
6260
int fromLine = currentRange.fromInclusive();
6361
int toLine = currentRange.toExclusive();

src/main/java/org/variantsync/vevos/extraction/gt/GroundTruth.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,23 @@
1313
*/
1414
public record GroundTruth(HashMap<String, FileGT> fileGTs, Set<String> variables)
1515
implements Serializable {
16+
// Constant file names of the ground truth
17+
public final static String SUCCESS_COMMIT_FILE = "SUCCESS_COMMITS.txt";
18+
public final static String ERROR_COMMIT_FILE = "ERROR_COMMITS.txt";
19+
public final static String EMPTY_COMMIT_FILE = "EMPTY_COMMITS.txt";
20+
public static final String COMMIT_PARENTS_FILE = "PARENTS.txt";
21+
public static final String COMMIT_MESSAGE_FILE = "MESSAGE.txt";
22+
public static final String VARIABLES_FILE = "VARIABLES.txt";
23+
// Used by the full extraction, because there is only one set of PCs for each commit
24+
public static final String CODE_VARIABILITY_CSV = "code-variability.spl.csv";
25+
public static final String CODE_MATCHING_CSV = "code-matching.spl.csv";
26+
// Used by the fast extraction, because there is one set for before and after changes for each commit
27+
public static final String CODE_VARIABILITY_CSV_BEFORE = "code-variability.before.spl.csv";
28+
public static final String CODE_VARIABILITY_CSV_AFTER = "code-variability.after.spl.csv";
29+
public static final String CODE_MATCHING_CSV_BEFORE = "code-matching.before.spl.csv";
30+
public static final String CODE_MATCHING_CSV_AFTER = "code-matching.after.spl.csv";
31+
32+
// Patterns for normalizing variables
1633
private static final Pattern variableStart = Pattern.compile("\\$\\{");
1734
private static final Pattern variableEnd = Pattern.compile("}");
1835
private static final Pattern quotation = Pattern.compile("\"");

0 commit comments

Comments
 (0)