Skip to content

Commit c852a31

Browse files
committed
patch statistics on elementary pattern matches + csv export
1 parent 64ee7dd commit c852a31

5 files changed

Lines changed: 100 additions & 5 deletions

File tree

src/main/java/org/variantsync/diffdetective/analysis/CommitHistoryAnalysisTask.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import org.variantsync.diffdetective.datasets.Repository;
77
import org.variantsync.diffdetective.diff.GitDiffer;
88
import org.variantsync.diffdetective.diff.difftree.serialize.DiffTreeLineGraphExportOptions;
9+
import org.variantsync.diffdetective.util.CSV;
910
import org.variantsync.diffdetective.util.IO;
1011
import org.variantsync.diffdetective.util.StringUtils;
1112

@@ -16,6 +17,7 @@
1617

1718
public abstract class CommitHistoryAnalysisTask implements Callable<AnalysisResult> {
1819
public static final String COMMIT_TIME_FILE_EXTENSION = ".committimes.txt";
20+
public static final String PATCH_STATISTICS_EXTENSION = ".patchStatistics.csv";
1921

2022
public record Options(
2123
Repository repository,
@@ -65,4 +67,15 @@ public static void exportCommitTimes(final List<CommitProcessTime> commitTimes,
6567
System.exit(0);
6668
}
6769
}
70+
71+
public static void exportPatchStatistics(final List<PatchStatistics> commitTimes, final Path pathToOutputFile) {
72+
final String csv = CSV.toCSV(commitTimes);
73+
74+
try {
75+
IO.write(pathToOutputFile, csv);
76+
} catch (IOException e) {
77+
Logger.error(e);
78+
System.exit(0);
79+
}
80+
}
6881
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
package org.variantsync.diffdetective.analysis;
2+
3+
import org.variantsync.diffdetective.pattern.elementary.ElementaryPattern;
4+
import org.variantsync.diffdetective.pattern.elementary.ElementaryPatternCatalogue;
5+
6+
import java.util.HashMap;
7+
import java.util.Map;
8+
import java.util.stream.Collectors;
9+
10+
public class ElementaryPatternCount {
11+
private final ElementaryPatternCatalogue catalogue;
12+
private final Map<ElementaryPattern, Integer> patterncounts;
13+
14+
public ElementaryPatternCount(final ElementaryPatternCatalogue catalogue) {
15+
this.catalogue = catalogue;
16+
this.patterncounts = new HashMap<>();
17+
catalogue.all().forEach(e -> patterncounts.put(e, 0));
18+
}
19+
20+
public void increment(final ElementaryPattern pattern) {
21+
patterncounts.computeIfPresent(pattern, (p, i) -> i + 1);
22+
}
23+
24+
public String toCSV() {
25+
return toCSV(";");
26+
}
27+
28+
public String toCSV(final String delimiter) {
29+
return catalogue.all().stream()
30+
.map(patterncounts::get)
31+
.map(Object::toString)
32+
.collect(Collectors.joining(delimiter));
33+
}
34+
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
package org.variantsync.diffdetective.analysis;
2+
3+
import org.variantsync.diffdetective.diff.PatchDiff;
4+
import org.variantsync.diffdetective.pattern.elementary.ElementaryPatternCatalogue;
5+
import org.variantsync.diffdetective.util.CSV;
6+
7+
public record PatchStatistics(
8+
PatchDiff patchDiff,
9+
ElementaryPatternCount elementaryPatternCount) implements CSV {
10+
public PatchStatistics(final PatchDiff patch, final ElementaryPatternCatalogue catalogue) {
11+
this(patch, new ElementaryPatternCount(catalogue));
12+
}
13+
14+
public String toCSV(final String delimiter) {
15+
return patchDiff.getCommitHash() + delimiter + patchDiff.getFileName() + delimiter + elementaryPatternCount.toCSV(delimiter);
16+
}
17+
}

src/main/java/org/variantsync/diffdetective/mining/MiningTask.java

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,7 @@
22

33
import org.eclipse.jgit.revwalk.RevCommit;
44
import org.tinylog.Logger;
5-
import org.variantsync.diffdetective.analysis.AnalysisResult;
6-
import org.variantsync.diffdetective.analysis.CommitHistoryAnalysisTask;
7-
import org.variantsync.diffdetective.analysis.CommitProcessTime;
8-
import org.variantsync.diffdetective.analysis.HistoryAnalysis;
5+
import org.variantsync.diffdetective.analysis.*;
96
import org.variantsync.diffdetective.diff.CommitDiff;
107
import org.variantsync.diffdetective.diff.PatchDiff;
118
import org.variantsync.diffdetective.diff.difftree.DiffTree;
@@ -14,6 +11,7 @@
1411
import org.variantsync.diffdetective.diff.difftree.transform.DiffTreeTransformer;
1512
import org.variantsync.diffdetective.diff.result.CommitDiffResult;
1613
import org.variantsync.diffdetective.metadata.ExplainedFilterSummary;
14+
import org.variantsync.diffdetective.pattern.elementary.ElementaryPattern;
1715
import org.variantsync.diffdetective.pattern.elementary.proposed.ProposedElementaryPatterns;
1816
import org.variantsync.diffdetective.util.Clock;
1917
import org.variantsync.diffdetective.util.FileUtils;
@@ -34,6 +32,7 @@ public AnalysisResult call() throws Exception {
3432
final Clock totalTime = new Clock();
3533

3634
final List<CommitProcessTime> commitTimes = new ArrayList<>(HistoryAnalysis.COMMITS_TO_PROCESS_PER_THREAD_DEFAULT);
35+
final List<PatchStatistics> patchStatistics = new ArrayList<>(HistoryAnalysis.COMMITS_TO_PROCESS_PER_THREAD_DEFAULT);
3736
final Clock commitProcessTimer = new Clock();
3837

3938
totalTime.start();
@@ -61,6 +60,8 @@ public AnalysisResult call() throws Exception {
6160
// Count elementary patterns
6261
int numDiffTrees = 0;
6362
for (final PatchDiff patch : commitDiff.getPatchDiffs()) {
63+
final PatchStatistics thisPatchesStatistics = new PatchStatistics(patch, ProposedElementaryPatterns.Instance);
64+
6465
if (patch.isValid()) {
6566
final DiffTree t = patch.getDiffTree();
6667
DiffTreeTransformer.apply(exportOptions.treePreProcessing(), t);
@@ -72,15 +73,19 @@ public AnalysisResult call() throws Exception {
7273

7374
t.forAll(node -> {
7475
if (node.isCode()) {
76+
final ElementaryPattern nodePattern = ProposedElementaryPatterns.Instance.match(node);
7577
miningResult.elementaryPatternCounts.reportOccurrenceFor(
76-
ProposedElementaryPatterns.Instance.match(node),
78+
nodePattern,
7779
commitDiff
7880
);
81+
thisPatchesStatistics.elementaryPatternCount().increment(nodePattern);
7982
}
8083
});
8184

8285
++numDiffTrees;
8386
}
87+
88+
patchStatistics.add(thisPatchesStatistics);
8489
}
8590
miningResult.exportedTrees += numDiffTrees;
8691
miningResult.filterHits.append(new ExplainedFilterSummary(exportOptions.treeFilter()));
@@ -106,6 +111,7 @@ public AnalysisResult call() throws Exception {
106111
miningResult.runtimeInSeconds = totalTime.getPassedSeconds();
107112
miningResult.exportTo(FileUtils.addExtension(options.outputPath(), AnalysisResult.EXTENSION));
108113
exportCommitTimes(commitTimes, FileUtils.addExtension(options.outputPath(), COMMIT_TIME_FILE_EXTENSION));
114+
exportPatchStatistics(patchStatistics, FileUtils.addExtension(options.outputPath(), PATCH_STATISTICS_EXTENSION));
109115
return miningResult;
110116
}
111117
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
package org.variantsync.diffdetective.util;
2+
3+
import java.util.Collection;
4+
import java.util.stream.Collectors;
5+
import java.util.stream.Stream;
6+
7+
public interface CSV {
8+
String DEFAULT_CSV_DELIMITER = ";";
9+
10+
default String toCSV() {
11+
return toCSV(DEFAULT_CSV_DELIMITER);
12+
}
13+
14+
String toCSV(final String delimiter);
15+
16+
static String toCSV(final Collection<? extends CSV> collection) {
17+
return toCSV(collection.stream());
18+
}
19+
20+
static String toCSV(final Stream<? extends CSV> collection) {
21+
return collection
22+
.map(CSV::toCSV)
23+
.collect(Collectors.joining(StringUtils.LINEBREAK));
24+
}
25+
}

0 commit comments

Comments
 (0)