Skip to content

Commit 2963f9e

Browse files
authored
Merge pull request #123 from VariantSync/develop
Update Main with Logo and Demo Fixes
2 parents 9289044 + 570e443 commit 2963f9e

20 files changed

Lines changed: 414 additions & 350 deletions

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66

77
# DiffDetective - Variability-Aware Source Code Differencing
88

9+
10+
<img padding="30" align="right" src="docs/logo.png" alt="ACM Artifacts Evaluated Reusable" width="350"/>
11+
912
DiffDetective is an open-source Java library for variability-aware source code differencing and the **analysis of version histories of software product lines**. This means that DiffDetective can **turn a generic differencer into a variability-aware differencer** by means of a pre- or post-processing. DiffDetective is centered around **formally verified** data structures for variability (variation trees) and variability-aware diffs (variation diffs). These data structures are **generic**, and DiffDetective currently implements **C preprocessor support** to parse respective annotations when used to implement variability. The picture below depicts the process of variability-aware differencing.
1013

1114
<img alt="Variability-Aware Differencing Overview" src="docs/teaser.png" height="500" />

docs/logo.png

393 KB
Loading

src/main/java/org/variantsync/diffdetective/AnalysisRunner.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,23 @@ public record Options(
5757
*/
5858
boolean pullRepositoriesBeforeAnalysis
5959
) {
60+
/**
61+
* Creates options with the given parameters and uses default
62+
* values for all other parameters.
63+
* @see Options#Options(Path, Path, Path, Function, Function, boolean, boolean)
64+
* @see Options#DEFAULT(String[])
65+
*/
66+
public Options(Path repositoriesDirectory,
67+
Path outputDirectory,
68+
Path datasetsFile) {
69+
this(
70+
repositoriesDirectory, outputDirectory, datasetsFile,
71+
Repository::getParseOptions,
72+
Repository::getDiffFilter,
73+
true,
74+
false);
75+
}
76+
6077
public static Options DEFAULT(final String[] args) {
6178
final Path datasetsFile;
6279
if (args.length < 1) {

src/main/java/org/variantsync/diffdetective/analysis/Analysis.java

Lines changed: 75 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,9 @@
2020
import org.variantsync.diffdetective.diff.git.GitDiffer;
2121
import org.variantsync.diffdetective.diff.git.PatchDiff;
2222
import org.variantsync.diffdetective.diff.result.CommitDiffResult;
23-
import org.variantsync.diffdetective.diff.result.DiffError;
2423
import org.variantsync.diffdetective.metadata.Metadata;
2524
import org.variantsync.diffdetective.parallel.ScheduledTasksIterator;
26-
import org.variantsync.diffdetective.util.Assert;
27-
import org.variantsync.diffdetective.util.Clock;
28-
import org.variantsync.diffdetective.util.Diagnostics;
29-
import org.variantsync.diffdetective.util.InvocationCounter;
25+
import org.variantsync.diffdetective.util.*;
3026
import org.variantsync.diffdetective.variation.DiffLinesLabel;
3127
import org.variantsync.diffdetective.variation.diff.Time;
3228
import org.variantsync.diffdetective.variation.diff.VariationDiff;
@@ -42,8 +38,8 @@
4238
* provides access to the current state of the analysis in one thread. Depending on the current
4339
* {@link Hooks phase} only a subset of the state accessible via getters may be valid.
4440
*
45-
* @see forEachRepository
46-
* @see forEachCommit
41+
* @see #forEachRepository
42+
* @see #forEachCommit
4743
* @author Paul Bittner, Benjamin Moosherr
4844
*/
4945
public class Analysis {
@@ -57,7 +53,7 @@ public class Analysis {
5753
public static final String TOTAL_RESULTS_FILE_NAME = "totalresult" + EXTENSION;
5854
/**
5955
* Default value for <code>commitsToProcessPerThread</code>
60-
* @see forEachCommit(Supplier, int, int)
56+
* @see #forEachCommit(Supplier, int, int)
6157
*/
6258
public static final int COMMITS_TO_PROCESS_PER_THREAD_DEFAULT = 1000;
6359

@@ -74,6 +70,38 @@ public class Analysis {
7470
protected Path outputFile;
7571
protected final AnalysisResult result;
7672

73+
/**
74+
* The total number of commits in the observed history of the given repository.
75+
*/
76+
public final static class TotalNumberOfCommitsResult extends SimpleMetadata<Integer, TotalNumberOfCommitsResult> {
77+
public final static ResultKey<TotalNumberOfCommitsResult> KEY = new ResultKey<>(TotalNumberOfCommitsResult.class.getName());
78+
79+
public TotalNumberOfCommitsResult() {
80+
super(
81+
0,
82+
MetadataKeys.TOTAL_COMMITS,
83+
Integer::sum,
84+
Integer::parseInt
85+
);
86+
}
87+
}
88+
89+
/**
90+
* The effective runtime in seconds that we have when using multithreading.
91+
*/
92+
public final static class RuntimeWithMultithreadingResult extends SimpleMetadata<Double, RuntimeWithMultithreadingResult> {
93+
public final static ResultKey<RuntimeWithMultithreadingResult> KEY = new ResultKey<>(RuntimeWithMultithreadingResult.class.getName());
94+
95+
public RuntimeWithMultithreadingResult() {
96+
super(
97+
0.0,
98+
MetadataKeys.RUNTIME_WITH_MULTITHREADING,
99+
Double::sum,
100+
Double::parseDouble
101+
);
102+
}
103+
}
104+
77105
/**
78106
* The repository this analysis is run on.
79107
* Always valid.
@@ -133,23 +161,23 @@ public Path getOutputFile() {
133161

134162
/**
135163
* The results of the analysis. This may be modified by any hook and should be initialized in
136-
* {@link Hooks#initializeResults} (e.g. by using {@link append}).
164+
* {@link Hooks#initializeResults} (e.g. by using {@link #append}).
137165
* Always valid.
138166
*/
139167
public AnalysisResult getResult() {
140168
return result;
141169
}
142170

143171
/**
144-
* Convenience getter for {@link AnalysisResult#get} on {@link getResult}.
172+
* Convenience getter for {@link AnalysisResult#get} on {@link #getResult}.
145173
* Always valid.
146174
*/
147175
public <T extends Metadata<T>> T get(ResultKey<T> resultKey) {
148176
return result.get(resultKey);
149177
}
150178

151179
/**
152-
* Convenience function for {@link AnalysisResult#append} on {@link getResult}.
180+
* Convenience function for {@link AnalysisResult#append} on {@link #getResult}.
153181
* Always valid.
154182
*/
155183
public <T extends Metadata<T>> void append(ResultKey<T> resultKey, T value) {
@@ -179,13 +207,13 @@ public <T extends Metadata<T>> void append(ResultKey<T> resultKey, T value) {
179207
* end hooks).
180208
*
181209
* <p>An analysis implementing {@code Hooks} can perform various actions during each hook. This
182-
* includes the {@link append creation} and {@link get modification} of {@link getResult
210+
* includes the {@link #append creation} and {@link #get modification} of {@link #getResult
183211
* analysis results}, modifying their internal state, performing IO operations and throwing
184212
* exceptions. In contrast, the only analysis state hooks are allowed to modify is the {@link
185-
* getResult result} of an {@link Analysis}. All other state (e.g. {@link getCurrentCommit})
213+
* #getResult result} of an {@link Analysis}. All other state (e.g. {@link #getCurrentCommit})
186214
* must not be modified. Care must be taken to avoid the reliance of the internal state on a
187-
* specific commit batch being processed as only the {@link getResult results} of each commit
188-
* batch are merged and returned by {@link forEachCommit}.
215+
* specific commit batch being processed as only the {@link #getResult results} of each commit
216+
* batch are merged and returned by {@link #forEachCommit}.
189217
*
190218
* <p>Hooks that return a {@code boolean} are called filter hooks and can, in addition to the
191219
* above, skip any further processing in the current phase (including following inner phases) by
@@ -198,8 +226,8 @@ public <T extends Metadata<T>> void append(ResultKey<T> resultKey, T value) {
198226
*/
199227
public interface Hooks {
200228
/**
201-
* Initialization hook for {@link getResult}. All result types should be appended with a
202-
* neutral value using {@link append}. No other side effects should be performed during this
229+
* Initialization hook for {@link #getResult}. All result types should be appended with a
230+
* neutral value using {@link #append}. No other side effects should be performed during this
203231
* methods as it might be called an arbitrary amount of times.
204232
*/
205233
default void initializeResults(Analysis analysis) {}
@@ -208,7 +236,7 @@ default void beginBatch(Analysis analysis) throws Exception {}
208236
/**
209237
* Signals a parsing failure of all patches in the current commit.
210238
* Called at most once during the commit phase. If this hook is called {@link
211-
* onParsedCommit} and the following patch phase invocations are skipped.
239+
* #onParsedCommit} and the following patch phase invocations are skipped.
212240
*/
213241
default void onFailedCommit(Analysis analysis) throws Exception {}
214242
/**
@@ -235,9 +263,9 @@ default void endBatch(Analysis analysis) throws Exception {}
235263
/**
236264
* Runs {@code analyzeRepository} on each repository, skipping repositories where an analysis
237265
* was already run. This skipping mechanism doesn't distinguish between different analyses as it
238-
* only checks for the existence of {@link TOTAL_RESULTS_FILE_NAME}. Delete this file to rerun
266+
* only checks for the existence of {@link #TOTAL_RESULTS_FILE_NAME}. Delete this file to rerun
239267
* the analysis.
240-
*
268+
* <p>
241269
* For each repository a directory in {@code outputDir} is passed to {@code analyzeRepository}
242270
* where the results of the given repository should be written.
243271
*
@@ -289,17 +317,17 @@ public static AnalysisResult forSingleCommit(final String commitHash, final Anal
289317
AnalysisResult result = null;
290318
try {
291319
final RevCommit commit = analysis.differ.getCommit(commitHash);
292-
result = analysis.processCommits(List.of(commit), analysis.differ);
320+
analysis.processCommitBatch(List.of(commit));
321+
result = analysis.getResult();
293322
} catch (Exception e) {
294323
Logger.error("Failed to analyze {}. Exiting.", commitHash);
295324
System.exit(1);
296325
}
297326

298327
final double runtime = clock.getPassedSeconds();
299328
Logger.info("<<< done in {}", Clock.printPassedSeconds(runtime));
300-
301-
result.runtimeWithMultithreadingInSeconds = -1;
302-
result.totalCommits = 1;
329+
330+
result.get(TotalNumberOfCommitsResult.KEY).value++;
303331

304332
exportMetadata(analysis.getOutputDir(), result);
305333
return result;
@@ -339,8 +367,8 @@ public boolean beginPatch(Analysis analysis) {
339367
}
340368

341369
/**
342-
* Same as {@link forEachCommit(Supplier<Analysis>, int, int)}.
343-
* Defaults to {@link COMMITS_TO_PROCESS_PER_THREAD_DEFAULT} and a machine dependent number of
370+
* Same as {@link #forEachCommit(Supplier, int, int)}.
371+
* Defaults to {@link #COMMITS_TO_PROCESS_PER_THREAD_DEFAULT} and a machine dependent number of
344372
* {@link Diagnostics#getNumberOfAvailableProcessors}.
345373
*/
346374
public static AnalysisResult forEachCommit(Supplier<Analysis> analysis) {
@@ -370,6 +398,7 @@ public static AnalysisResult forEachCommit(
370398
) {
371399
var analysis = analysisFactory.get();
372400
analysis.differ = new GitDiffer(analysis.getRepository());
401+
analysis.result.append(RuntimeWithMultithreadingResult.KEY, new RuntimeWithMultithreadingResult());
373402

374403
final Clock clock = new Clock();
375404

@@ -385,7 +414,12 @@ public static AnalysisResult forEachCommit(
385414
),
386415
/// 2.) Create a MiningTask for the list of commits. This task will then be processed by one
387416
/// particular thread.
388-
commitList -> () -> analysisFactory.get().processCommits(commitList, analysis.differ)
417+
commitList -> () -> {
418+
Analysis thisThreadsAnalysis = analysisFactory.get();
419+
thisThreadsAnalysis.differ = analysis.differ;
420+
thisThreadsAnalysis.processCommitBatch(commitList);
421+
return thisThreadsAnalysis.getResult();
422+
}
389423
);
390424
Logger.info("<<< done in {}", clock.printPassedSeconds());
391425

@@ -411,8 +445,8 @@ public static AnalysisResult forEachCommit(
411445
final double runtime = clock.getPassedSeconds();
412446
Logger.info("<<< done in {}", Clock.printPassedSeconds(runtime));
413447

414-
analysis.getResult().runtimeWithMultithreadingInSeconds = runtime;
415-
analysis.getResult().totalCommits = numberOfTotalCommits.invocationCount().get();
448+
analysis.getResult().get(RuntimeWithMultithreadingResult.KEY).value = runtime;
449+
// analysis.getResult().get(TotalNumberOfCommitsResult.KEY).value = numberOfTotalCommits.invocationCount().get();
416450

417451
exportMetadata(analysis.getOutputDir(), analysis.getResult());
418452
return analysis.getResult();
@@ -435,39 +469,22 @@ public Analysis(
435469
this.hooks = hooks;
436470
this.repository = repository;
437471
this.outputDir = outputDir;
438-
this.result = new AnalysisResult();
439-
440-
this.result.repoName = repository.getRepositoryName();
472+
473+
this.result = new AnalysisResult(repository.getRepositoryName());
441474
this.result.taskName = taskName;
475+
this.result.append(TotalNumberOfCommitsResult.KEY, new TotalNumberOfCommitsResult());
476+
442477
for (var hook : hooks) {
443478
hook.initializeResults(this);
444479
}
445480
}
446481

447482
/**
448-
* Entry point into a sequential analysis of {@code commits} as one batch.
449-
* Same as {@link processCommits(List<RevCommit>, GitDiffer)} with a default {@link GitDiffer}.
483+
* Sequential analysis of all {@code commits} as one batch.
450484
*
451485
* @param commits the commit batch to be processed
452-
* @see forEachCommit
486+
* @see #forEachCommit
453487
*/
454-
public AnalysisResult processCommits(List<RevCommit> commits) throws Exception {
455-
return processCommits(commits, new GitDiffer(getRepository()));
456-
}
457-
458-
/**
459-
* Entry point into a sequential analysis of {@code commits} as one batch.
460-
*
461-
* @param commits the commit batch to be processed
462-
* @param differ the differ to use
463-
* @see forEachCommit
464-
*/
465-
public AnalysisResult processCommits(List<RevCommit> commits, GitDiffer differ) throws Exception {
466-
this.differ = differ;
467-
processCommitBatch(commits);
468-
return getResult();
469-
}
470-
471488
protected void processCommitBatch(List<RevCommit> commits) throws Exception {
472489
outputFile = outputDir.resolve(commits.get(0).getId().getName());
473490

@@ -495,6 +512,9 @@ protected void processCommitBatch(List<RevCommit> commits) throws Exception {
495512
}
496513
} finally {
497514
runReverseHook(batchHook, Hooks::endBatch);
515+
516+
// export the thread's result
517+
getResult().exportTo(FileUtils.addExtension(outputFile, Analysis.EXTENSION));
498518
}
499519
}
500520

@@ -540,6 +560,8 @@ protected void processCommit() throws Exception {
540560
runReverseHook(patchHook, Hooks::endPatch);
541561
}
542562
}
563+
564+
getResult().get(TotalNumberOfCommitsResult.KEY).value++;
543565
}
544566

545567
protected void processPatch() throws Exception {
@@ -590,7 +612,7 @@ protected <Hook> void runReverseHook(ListIterator<Hook> hook, FailableBiConsumer
590612

591613
/**
592614
* Exports the given metadata object to a file named according
593-
* {@link TOTAL_RESULTS_FILE_NAME} in the given directory.
615+
* {@link #TOTAL_RESULTS_FILE_NAME} in the given directory.
594616
* @param outputDir The directory into which the metadata object file should be written.
595617
* @param metadata The metadata to serialize
596618
* @param <T> Type of the metadata.

src/main/java/org/variantsync/diffdetective/analysis/AnalysisResult.java

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,6 @@ public final class AnalysisResult implements Metadata<AnalysisResult> {
3838
*/
3939
public String repoName = NO_REPO;
4040
public String taskName;
41-
/**
42-
* The effective runtime in seconds that we have when using multithreading.
43-
*/
44-
public double runtimeWithMultithreadingInSeconds = 0;
45-
/**
46-
* The total number of commits in the observed history of the given repository.
47-
*/
48-
public int totalCommits = 0;
4941
public final MergeMap<DiffError, Integer> diffErrors = new MergeMap<>(new HashMap<>(), Integer::sum);
5042

5143
private final Map<String, Metadata<?>> results = new HashMap<>();
@@ -106,8 +98,6 @@ public <T extends Metadata<T>> void append(ResultKey<T> resultKey, T value) {
10698
return ar + "; " + br;
10799
});
108100
a.taskName = Metadata.mergeEqual(a.taskName, b.taskName);
109-
a.runtimeWithMultithreadingInSeconds += b.runtimeWithMultithreadingInSeconds;
110-
a.totalCommits += b.totalCommits;
111101
a.diffErrors.append(b.diffErrors);
112102
b.results.forEach((key, value) -> a.unsafeAppend(key, value));
113103
};
@@ -147,12 +137,11 @@ public LinkedHashMap<String, Object> snapshot() {
147137
LinkedHashMap<String, Object> snap = new LinkedHashMap<>();
148138
snap.put(MetadataKeys.TASKNAME, taskName);
149139
snap.put(MetadataKeys.REPONAME, repoName);
150-
snap.put(MetadataKeys.RUNTIME_WITH_MULTITHREADING, runtimeWithMultithreadingInSeconds);
151-
snap.put(MetadataKeys.TOTAL_COMMITS, totalCommits);
152140

153141
var statistics = get(StatisticsAnalysis.RESULT);
142+
var globals = get(Analysis.TotalNumberOfCommitsResult.KEY);
154143
if (statistics != null) {
155-
snap.put(MetadataKeys.FILTERED_COMMITS, totalCommits - statistics.processedCommits - statistics.emptyCommits - statistics.failedCommits);
144+
snap.put(MetadataKeys.FILTERED_COMMITS, globals.value - statistics.processedCommits - statistics.emptyCommits - statistics.failedCommits);
156145
}
157146

158147
for (var result : results.values()) {
@@ -168,14 +157,6 @@ public void setFromSnapshot(LinkedHashMap<String, String> snap) {
168157
repoName = snap.get(MetadataKeys.REPONAME);
169158
taskName = snap.get(MetadataKeys.TASKNAME);
170159

171-
String runtime = snap.get(MetadataKeys.RUNTIME_WITH_MULTITHREADING);
172-
if (runtime.endsWith("s")) {
173-
runtime = runtime.substring(0, runtime.length() - 1);
174-
}
175-
runtimeWithMultithreadingInSeconds = Double.parseDouble(runtime);
176-
177-
totalCommits = Integer.parseInt(snap.get(MetadataKeys.TOTAL_COMMITS));
178-
179160
for (var entry : snap.entrySet()) {
180161
String key = entry.getKey();
181162
if (entry.getKey().startsWith(ERROR_BEGIN)) {

0 commit comments

Comments
 (0)