Skip to content

Commit e818cf1

Browse files
committed
major refactoring to separate validation from mining and make analysis extensible
1 parent a19afcd commit e818cf1

54 files changed

Lines changed: 608 additions & 418 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.run/GenAutomationResults.run.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
<component name="ProjectRunConfigurationManager">
22
<configuration default="false" name="GenAutomationResults" type="Application" factoryName="Application">
3-
<option name="MAIN_CLASS_NAME" value="mining.FindMedianCommitTime" />
3+
<option name="MAIN_CLASS_NAME" value="main.FindMedianCommitTime" />
44
<module name="DiffDetective" />
55
<option name="PROGRAM_PARAMETERS" value="results/difftrees" />
66
<extension name="coverage">

.run/GenUltimateResults.run.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
<component name="ProjectRunConfigurationManager">
22
<configuration default="false" name="GenUltimateResults" type="Application" factoryName="Application">
3-
<option name="MAIN_CLASS_NAME" value="mining.tablegen.MiningResultAccumulator" />
3+
<option name="MAIN_CLASS_NAME" value="tablegen.MiningResultAccumulator" />
44
<module name="DiffDetective" />
55
<option name="PROGRAM_PARAMETERS" value="results/difftrees results/difftrees/" />
66
<extension name="coverage">

.run/Validation.run.xml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
<component name="ProjectRunConfigurationManager">
2+
<configuration default="false" name="Validation" type="Application" factoryName="Application" nameIsGenerated="true">
3+
<option name="MAIN_CLASS_NAME" value="validation.Validation" />
4+
<module name="DiffDetective" />
5+
<method v="2">
6+
<option name="Make" enabled="true" />
7+
</method>
8+
</configuration>
9+
</component>

pom.xml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,16 +57,16 @@
5757
<groupId>org.sat4j</groupId>
5858
<artifactId>core</artifactId>
5959
<version>2.3.5</version>
60-
<!-- <scope>system</scope>-->
61-
<!-- <systemPath>${project.basedir}/lib/org.sat4j.core.jar</systemPath>-->
60+
<scope>system</scope>
61+
<systemPath>${project.basedir}/lib/org.sat4j.core.jar</systemPath>
6262
</dependency>
6363

6464
<dependency>
6565
<groupId>de.ovgu</groupId>
6666
<artifactId>featureide.lib.fm</artifactId>
6767
<version>3.8.1</version>
68-
<!-- <scope>system</scope>-->
69-
<!-- <systemPath>${project.basedir}/lib/de.ovgu.featureide.lib.fm-v3.8.1.jar</systemPath>-->
68+
<scope>system</scope>
69+
<systemPath>${project.basedir}/lib/de.ovgu.featureide.lib.fm-v3.8.1.jar</systemPath>
7070
</dependency>
7171

7272
<dependency>

src/main/java/mining/DiffTreeMiningResult.java renamed to src/main/java/analysis/AnalysisResult.java

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
1-
package mining;
1+
package analysis;
22

33
import diff.difftree.serialize.DiffTreeSerializeDebugData;
44
import diff.result.DiffError;
55
import metadata.AtomicPatternCount;
66
import metadata.ExplainedFilterSummary;
77
import metadata.Metadata;
8-
import mining.strategies.CommitProcessTime;
98
import org.variantsync.functjonal.Functjonal;
109
import org.variantsync.functjonal.category.InplaceMonoid;
1110
import org.variantsync.functjonal.category.InplaceSemigroup;
@@ -19,17 +18,17 @@
1918
import java.util.*;
2019
import java.util.function.BiConsumer;
2120

22-
public class DiffTreeMiningResult implements Metadata<DiffTreeMiningResult> {
21+
public class AnalysisResult implements Metadata<AnalysisResult> {
2322
public final static String NO_REPO = "<NONE>";
2423
public final static String EXTENSION = ".metadata.txt";
2524
public final static String ERROR_BEGIN = "#Error[";
2625
public final static String ERROR_END = "]";
2726

28-
public static Map.Entry<String, BiConsumer<DiffTreeMiningResult, String>> storeAsCustomInfo(String key) {
27+
public static Map.Entry<String, BiConsumer<AnalysisResult, String>> storeAsCustomInfo(String key) {
2928
return Map.entry(key, (r, val) -> r.putCustomInfo(key, val));
3029
}
3130

32-
public final static InplaceSemigroup<DiffTreeMiningResult> ISEMIGROUP = (a, b) -> {
31+
public final static InplaceSemigroup<AnalysisResult> ISEMIGROUP = (a, b) -> {
3332
a.totalCommits += b.totalCommits;
3433
a.exportedCommits += b.exportedCommits;
3534
a.emptyCommits += b.emptyCommits;
@@ -46,8 +45,8 @@ public static Map.Entry<String, BiConsumer<DiffTreeMiningResult, String>> storeA
4645
a.diffErrors.append(b.diffErrors);
4746
};
4847

49-
public static InplaceMonoid<DiffTreeMiningResult> IMONOID= InplaceMonoid.From(
50-
DiffTreeMiningResult::new,
48+
public static InplaceMonoid<AnalysisResult> IMONOID= InplaceMonoid.From(
49+
AnalysisResult::new,
5150
ISEMIGROUP
5251
);
5352

@@ -79,15 +78,15 @@ public static Map.Entry<String, BiConsumer<DiffTreeMiningResult, String>> storeA
7978
private final LinkedHashMap<String, String> customInfo = new LinkedHashMap<>();
8079
private final MergeMap<DiffError, Integer> diffErrors = new MergeMap<>(new HashMap<>(), Integer::sum);
8180

82-
public DiffTreeMiningResult() {
81+
public AnalysisResult() {
8382
this(NO_REPO);
8483
}
8584

86-
public DiffTreeMiningResult(final String repoName) {
85+
public AnalysisResult(final String repoName) {
8786
this(repoName, 0, 0, 0, 0, 0, 0, 0, CommitProcessTime.Unknown(repoName, Long.MAX_VALUE), CommitProcessTime.Unknown(repoName, Long.MIN_VALUE), new DiffTreeSerializeDebugData(), new ExplainedFilterSummary());
8887
}
8988

90-
public DiffTreeMiningResult(
89+
public AnalysisResult(
9190
final String repoName,
9291
int totalCommits,
9392
int exportedCommits,
@@ -127,14 +126,14 @@ public void reportDiffErrors(final List<DiffError> errors) {
127126
}
128127

129128
/**
130-
* Imports a metadata file, which is an output of a {@link DiffTreeMiningResult}, and saves back to {@link DiffTreeMiningResult}.
129+
* Imports a metadata file, which is an output of a {@link AnalysisResult}, and saves back to {@link AnalysisResult}.
131130
*
132131
* @param p {@link Path} to the metadata file
133-
* @return The reconstructed {@link DiffTreeMiningResult}
132+
* @return The reconstructed {@link AnalysisResult}
134133
* @throws IOException
135134
*/
136-
public static DiffTreeMiningResult importFrom(final Path p, final Map<String, BiConsumer<DiffTreeMiningResult, String>> customParsers) throws IOException {
137-
DiffTreeMiningResult result = new DiffTreeMiningResult();
135+
public static AnalysisResult importFrom(final Path p, final Map<String, BiConsumer<AnalysisResult, String>> customParsers) throws IOException {
136+
AnalysisResult result = new AnalysisResult();
138137

139138
final List<String> filterHitsLines = new ArrayList<>();
140139
final List<String> atomicPatternCountsLines = new ArrayList<>();
@@ -196,7 +195,7 @@ public static DiffTreeMiningResult importFrom(final Path p, final Map<String, Bi
196195
// add DiffError
197196
result.diffErrors.put(e, Integer.parseInt(value));
198197
} else {
199-
final BiConsumer<DiffTreeMiningResult, String> customParser = customParsers.get(key);
198+
final BiConsumer<AnalysisResult, String> customParser = customParsers.get(key);
200199
if (customParser == null) {
201200
final String errorMessage = "Unknown entry \"" + line + "\"!";
202201
throw new IOException(errorMessage);
@@ -237,7 +236,7 @@ public LinkedHashMap<String, Object> snapshot() {
237236
}
238237

239238
@Override
240-
public InplaceSemigroup<DiffTreeMiningResult> semigroup() {
239+
public InplaceSemigroup<AnalysisResult> semigroup() {
241240
return ISEMIGROUP;
242241
}
243242
}

src/main/java/mining/AutomationResult.java renamed to src/main/java/analysis/AutomationResult.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
1-
package mining;
1+
package analysis;
22

33
import metadata.Metadata;
4-
import mining.strategies.CommitProcessTime;
54
import org.variantsync.functjonal.category.InplaceSemigroup;
65

76
import java.util.LinkedHashMap;

src/main/java/mining/strategies/CommitHistoryAnalysisTask.java renamed to src/main/java/analysis/CommitHistoryAnalysisTask.java

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
1-
package mining.strategies;
1+
package analysis;
22

3+
import analysis.strategies.AnalysisStrategy;
34
import datasets.Repository;
45
import diff.GitDiffer;
56
import diff.difftree.serialize.DiffTreeLineGraphExportOptions;
6-
import mining.DiffTreeMiningResult;
7-
import mining.MetadataKeys;
87
import org.eclipse.jgit.revwalk.RevCommit;
98
import org.tinylog.Logger;
109
import util.IO;
@@ -15,15 +14,15 @@
1514
import java.util.List;
1615
import java.util.concurrent.Callable;
1716

18-
public abstract class CommitHistoryAnalysisTask implements Callable<DiffTreeMiningResult> {
17+
public abstract class CommitHistoryAnalysisTask implements Callable<AnalysisResult> {
1918
public static final String COMMIT_TIME_FILE_EXTENSION = ".committimes.txt";
2019

2120
public record Options(
2221
Repository repository,
2322
GitDiffer differ,
2423
Path outputPath,
2524
DiffTreeLineGraphExportOptions exportOptions,
26-
DiffTreeMiningStrategy miningStrategy,
25+
AnalysisStrategy miningStrategy,
2726
Iterable<RevCommit> commits
2827
) {}
2928

@@ -38,10 +37,10 @@ public CommitHistoryAnalysisTask.Options getOptions() {
3837
}
3938

4039
@Override
41-
public DiffTreeMiningResult call() throws Exception {
40+
public AnalysisResult call() throws Exception {
4241
options.miningStrategy().start(options.repository(), options.outputPath(), options.exportOptions());
4342

44-
final DiffTreeMiningResult miningResult = new DiffTreeMiningResult(options.repository.getRepositoryName());
43+
final AnalysisResult miningResult = new AnalysisResult(options.repository.getRepositoryName());
4544
final DiffTreeLineGraphExportOptions exportOptions = options.exportOptions();
4645

4746
miningResult.putCustomInfo(MetadataKeys.TREEFORMAT, exportOptions.treeFormat().getName());

src/main/java/mining/strategies/CommitHistoryAnalysisTaskFactory.java renamed to src/main/java/analysis/CommitHistoryAnalysisTaskFactory.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package mining.strategies;
1+
package analysis;
22

33
import datasets.Repository;
44
import diff.GitDiffer;

src/main/java/mining/strategies/CommitProcessTime.java renamed to src/main/java/analysis/CommitProcessTime.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
package mining.strategies;
1+
package analysis;
22

3-
import mining.DiffTreeMiningResult;
43
import util.Assert;
54

65
public class CommitProcessTime {
@@ -54,7 +53,7 @@ public static CommitProcessTime max(final CommitProcessTime a, final CommitProce
5453
}
5554

5655
public static CommitProcessTime Unknown(final String repoName, long milliseconds) {
57-
return new CommitProcessTime(DiffTreeMiningResult.NO_REPO, repoName, milliseconds);
56+
return new CommitProcessTime(AnalysisResult.NO_REPO, repoName, milliseconds);
5857
}
5958

6059
public static CommitProcessTime fromString(String text) {
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
package analysis;
2+
3+
import analysis.monitoring.TaskCompletionMonitor;
4+
import analysis.strategies.AnalysisStrategy;
5+
import datasets.Repository;
6+
import diff.GitDiffer;
7+
import diff.difftree.serialize.DiffTreeLineGraphExportOptions;
8+
import metadata.Metadata;
9+
import mining.MiningTask;
10+
import org.eclipse.jgit.revwalk.RevCommit;
11+
import org.tinylog.Logger;
12+
import org.variantsync.functjonal.iteration.ClusteredIterator;
13+
import org.variantsync.functjonal.iteration.MappedIterator;
14+
import parallel.ScheduledTasksIterator;
15+
import util.Clock;
16+
import util.Diagnostics;
17+
import util.InvocationCounter;
18+
19+
import java.nio.file.Files;
20+
import java.nio.file.Path;
21+
import java.util.Iterator;
22+
import java.util.List;
23+
import java.util.function.Consumer;
24+
25+
public record HistoryAnalysis(
26+
List<Repository> repositoriesToAnalyze,
27+
Path outputDir,
28+
int commitsToProcessPerThread,
29+
CommitHistoryAnalysisTaskFactory whatToDo,
30+
Consumer<Path> postProcessingOnRepositoryOutputDir
31+
) {
32+
public static final String TOTAL_RESULTS_FILE_NAME = "totalresult" + AnalysisResult.EXTENSION;
33+
public static final int COMMITS_TO_PROCESS_PER_THREAD_DEFAULT = 1000;
34+
35+
@Deprecated
36+
public static void mine(
37+
final Repository repo,
38+
final Path outputDir,
39+
final DiffTreeLineGraphExportOptions exportOptions,
40+
final AnalysisStrategy strategy)
41+
{
42+
AnalysisResult totalResult;
43+
final GitDiffer differ = new GitDiffer(repo);
44+
final Clock clock = new Clock();
45+
46+
// prepare tasks
47+
Logger.info(">>> Scheduling synchronous mining");
48+
clock.start();
49+
List<RevCommit> commitsToProcess = differ.yieldRevCommits().toList();
50+
final CommitHistoryAnalysisTask task = new MiningTask(new CommitHistoryAnalysisTask.Options(
51+
repo,
52+
differ,
53+
outputDir.resolve(repo.getRepositoryName() + ".lg"),
54+
exportOptions,
55+
strategy,
56+
commitsToProcess
57+
));
58+
Logger.info("Scheduled " + commitsToProcess.size() + " commits.");
59+
commitsToProcess = null; // free reference to enable garbage collection
60+
Logger.info("<<< done after " + clock.printPassedSeconds());
61+
62+
Logger.info(">>> Run mining");
63+
clock.start();
64+
try {
65+
totalResult = task.call();
66+
} catch (Exception e) {
67+
Logger.error(e);
68+
Logger.info("<<< aborted after " + clock.printPassedSeconds());
69+
return;
70+
}
71+
Logger.info("<<< done after " + clock.printPassedSeconds());
72+
73+
exportMetadata(outputDir, totalResult);
74+
}
75+
76+
public static void mineAsync(
77+
final Repository repo,
78+
final Path outputDir,
79+
final CommitHistoryAnalysisTaskFactory taskFactory,
80+
int commitsToProcessPerThread)
81+
{
82+
final AnalysisResult totalResult = new AnalysisResult(repo.getRepositoryName());
83+
final GitDiffer differ = new GitDiffer(repo);
84+
final Clock clock = new Clock();
85+
86+
// prepare tasks
87+
final int nThreads = Diagnostics.INSTANCE.run().getNumberOfAvailableProcessors();
88+
Logger.info(">>> Scheduling asynchronous mining on " + nThreads + " threads.");
89+
clock.start();
90+
final InvocationCounter<RevCommit, RevCommit> numberOfTotalCommits = InvocationCounter.justCount();
91+
final Iterator<CommitHistoryAnalysisTask> tasks = new MappedIterator<>(
92+
/// 1.) Retrieve COMMITS_TO_PROCESS_PER_THREAD commits from the differ and cluster them into one list.
93+
new ClusteredIterator<>(differ.yieldRevCommitsAfter(numberOfTotalCommits), commitsToProcessPerThread),
94+
/// 2.) Create a MiningTask for the list of commits. This task will then be processed by one
95+
/// particular thread.
96+
commitList -> taskFactory.create(
97+
repo,
98+
differ,
99+
outputDir.resolve(commitList.get(0).getId().getName() + ".lg"),
100+
commitList)
101+
);
102+
Logger.info("<<< done in " + clock.printPassedSeconds());
103+
104+
final TaskCompletionMonitor commitSpeedMonitor = new TaskCompletionMonitor(0, TaskCompletionMonitor.LogProgress("commits"));
105+
Logger.info(">>> Run mining");
106+
clock.start();
107+
commitSpeedMonitor.start();
108+
try (final ScheduledTasksIterator<AnalysisResult> threads = new ScheduledTasksIterator<>(tasks, nThreads)) {
109+
while (threads.hasNext()) {
110+
final AnalysisResult threadsResult = threads.next();
111+
totalResult.append(threadsResult);
112+
commitSpeedMonitor.addFinishedTasks(threadsResult.exportedCommits);
113+
}
114+
} catch (Exception e) {
115+
Logger.error("Failed to run all mining task!");
116+
Logger.error(e);
117+
System.exit(0);
118+
}
119+
120+
final double runtime = clock.getPassedSeconds();
121+
Logger.info("<<< done in " + Clock.printPassedSeconds(runtime));
122+
123+
totalResult.runtimeWithMultithreadingInSeconds = runtime;
124+
totalResult.totalCommits = numberOfTotalCommits.invocationCount().get();
125+
126+
exportMetadata(outputDir, totalResult);
127+
}
128+
129+
public static <T> void exportMetadata(final Path outputDir, final Metadata<T> totalResult) {
130+
exportMetadataToFile(outputDir.resolve(TOTAL_RESULTS_FILE_NAME), totalResult);
131+
}
132+
133+
public static <T> void exportMetadataToFile(final Path outputFile, final Metadata<T> totalResult) {
134+
final String prettyMetadata = totalResult.exportTo(outputFile);
135+
Logger.info("Metadata:\n" + prettyMetadata);
136+
}
137+
138+
public void runAsync() {
139+
for (final Repository repo : repositoriesToAnalyze) {
140+
Logger.info(" === Begin Processing " + repo.getRepositoryName() + " ===");
141+
final Clock clock = new Clock();
142+
clock.start();
143+
144+
final Path repoOutputDir = outputDir.resolve(repo.getRepositoryName());
145+
/// Don't repeat work we already did:
146+
if (!Files.exists(repoOutputDir.resolve(TOTAL_RESULTS_FILE_NAME))) {
147+
mineAsync(repo, repoOutputDir, whatToDo, commitsToProcessPerThread);
148+
// mine(repo, repoOutputDir, ExportOptions(repo), MiningStrategy());
149+
150+
postProcessingOnRepositoryOutputDir.accept(repoOutputDir);
151+
} else {
152+
Logger.info(" Skipping repository " + repo.getRepositoryName() + " because it has already been processed.");
153+
}
154+
155+
Logger.info(" === End Processing " + repo.getRepositoryName() + " after " + clock.printPassedSeconds() + " ===");
156+
}
157+
}
158+
}

0 commit comments

Comments
 (0)