2020import org .variantsync .diffdetective .diff .git .GitDiffer ;
2121import org .variantsync .diffdetective .diff .git .PatchDiff ;
2222import org .variantsync .diffdetective .diff .result .CommitDiffResult ;
23- import org .variantsync .diffdetective .diff .result .DiffError ;
2423import org .variantsync .diffdetective .metadata .Metadata ;
2524import org .variantsync .diffdetective .parallel .ScheduledTasksIterator ;
26- import org .variantsync .diffdetective .util .Assert ;
27- import org .variantsync .diffdetective .util .Clock ;
28- import org .variantsync .diffdetective .util .Diagnostics ;
29- import org .variantsync .diffdetective .util .InvocationCounter ;
25+ import org .variantsync .diffdetective .util .*;
3026import org .variantsync .diffdetective .variation .DiffLinesLabel ;
3127import org .variantsync .diffdetective .variation .diff .Time ;
3228import org .variantsync .diffdetective .variation .diff .VariationDiff ;
4238 * provides access to the current state of the analysis in one thread. Depending on the current
4339 * {@link Hooks phase} only a subset of the state accessible via getters may be valid.
4440 *
45- * @see forEachRepository
46- * @see forEachCommit
41+ * @see # forEachRepository
42+ * @see # forEachCommit
4743 * @author Paul Bittner, Benjamin Moosherr
4844 */
4945public class Analysis {
@@ -57,7 +53,7 @@ public class Analysis {
5753 public static final String TOTAL_RESULTS_FILE_NAME = "totalresult" + EXTENSION ;
5854 /**
5955 * Default value for <code>commitsToProcessPerThread</code>
60- * @see forEachCommit(Supplier, int, int)
56+ * @see # forEachCommit(Supplier, int, int)
6157 */
6258 public static final int COMMITS_TO_PROCESS_PER_THREAD_DEFAULT = 1000 ;
6359
@@ -73,6 +69,38 @@ public class Analysis {
7369 protected final Path outputDir ;
7470 protected Path outputFile ;
7571 protected final AnalysisResult result ;
72+
73+ /**
74+ * The effective runtime in seconds that we have when using multithreading.
75+ */
76+ public final static class TotalNumberOfCommitsResult extends SimpleMetadata <Integer , TotalNumberOfCommitsResult > {
77+ public final static ResultKey <TotalNumberOfCommitsResult > KEY = new ResultKey <>(TotalNumberOfCommitsResult .class .getName ());
78+
79+ public TotalNumberOfCommitsResult () {
80+ super (
81+ 0 ,
82+ MetadataKeys .TOTAL_COMMITS ,
83+ Integer ::sum ,
84+ Integer ::parseInt
85+ );
86+ }
87+ }
88+
89+ /**
90+ * The effective runtime in seconds that we have when using multithreading.
91+ */
92+ public final static class RuntimeWithMultithreadingResult extends SimpleMetadata <Double , RuntimeWithMultithreadingResult > {
93+ public final static ResultKey <RuntimeWithMultithreadingResult > KEY = new ResultKey <>(RuntimeWithMultithreadingResult .class .getName ());
94+
95+ public RuntimeWithMultithreadingResult () {
96+ super (
97+ 0.0 ,
98+ MetadataKeys .RUNTIME_WITH_MULTITHREADING ,
99+ Double ::sum ,
100+ Double ::parseDouble
101+ );
102+ }
103+ }
76104
77105 /**
78106 * The repository this analysis is run on.
@@ -133,23 +161,23 @@ public Path getOutputFile() {
133161
134162 /**
135163 * The results of the analysis. This may be modified by any hook and should be initialized in
136- * {@link Hooks#initializeResults} (e.g. by using {@link append}).
164+ * {@link Hooks#initializeResults} (e.g. by using {@link # append}).
137165 * Always valid.
138166 */
139167 public AnalysisResult getResult () {
140168 return result ;
141169 }
142170
143171 /**
144- * Convenience getter for {@link AnalysisResult#get} on {@link getResult}.
172+ * Convenience getter for {@link AnalysisResult#get} on {@link # getResult}.
145173 * Always valid.
146174 */
147175 public <T extends Metadata <T >> T get (ResultKey <T > resultKey ) {
148176 return result .get (resultKey );
149177 }
150178
151179 /**
152- * Convenience function for {@link AnalysisResult#append} on {@link getResult}.
180+ * Convenience function for {@link AnalysisResult#append} on {@link # getResult}.
153181 * Always valid.
154182 */
155183 public <T extends Metadata <T >> void append (ResultKey <T > resultKey , T value ) {
@@ -179,13 +207,13 @@ public <T extends Metadata<T>> void append(ResultKey<T> resultKey, T value) {
179207 * end hooks).
180208 *
181209 * <p>An analysis implementing {@code Hooks} can perform various actions during each hook. This
182- * includes the {@link append creation} and {@link get modification} of {@link getResult
210+ * includes the {@link # append creation} and {@link # get modification} of {@link # getResult
183211 * analysis results}, modifying their internal state, performing IO operations and throwing
184212 * exceptions. In contrast, the only analysis state hooks are allowed to modify is the {@link
185- * getResult result} of an {@link Analysis}. All other state (e.g. {@link getCurrentCommit})
213+ * # getResult result} of an {@link Analysis}. All other state (e.g. {@link # getCurrentCommit})
186214 * must not be modified. Care must be taken to avoid the reliance of the internal state on a
187- * specific commit batch being processed as only the {@link getResult results} of each commit
188- * batch are merged and returned by {@link forEachCommit}.
215+ * specific commit batch being processed as only the {@link # getResult results} of each commit
216+ * batch are merged and returned by {@link # forEachCommit}.
189217 *
190218 * <p>Hooks that return a {@code boolean} are called filter hooks and can, in addition to the
191219 * above, skip any further processing in the current phase (including following inner phases) by
@@ -198,8 +226,8 @@ public <T extends Metadata<T>> void append(ResultKey<T> resultKey, T value) {
198226 */
199227 public interface Hooks {
200228 /**
201- * Initialization hook for {@link getResult}. All result types should be appended with a
202- * neutral value using {@link append}. No other side effects should be performed during this
229+ * Initialization hook for {@link # getResult}. All result types should be appended with a
230+ * neutral value using {@link # append}. No other side effects should be performed during this
203231 * methods as it might be called an arbitrary amount of times.
204232 */
205233 default void initializeResults (Analysis analysis ) {}
@@ -208,7 +236,7 @@ default void beginBatch(Analysis analysis) throws Exception {}
208236 /**
209237 * Signals a parsing failure of all patches in the current commit.
210238 * Called at most once during the commit phase. If this hook is called {@link
211- * onParsedCommit} and the following patch phase invocations are skipped.
239+ * # onParsedCommit} and the following patch phase invocations are skipped.
212240 */
213241 default void onFailedCommit (Analysis analysis ) throws Exception {}
214242 /**
@@ -235,9 +263,9 @@ default void endBatch(Analysis analysis) throws Exception {}
235263 /**
236264 * Runs {@code analyzeRepository} on each repository, skipping repositories where an analysis
237265 * was already run. This skipping mechanism doesn't distinguish between different analyses as it
238- * only checks for the existence of {@link TOTAL_RESULTS_FILE_NAME}. Delete this file to rerun
266+ * only checks for the existence of {@link # TOTAL_RESULTS_FILE_NAME}. Delete this file to rerun
239267 * the analysis.
240- *
268+ * <p>
241269 * For each repository a directory in {@code outputDir} is passed to {@code analyzeRepository}
242270 * where the results of the given repository should be written.
243271 *
@@ -289,17 +317,17 @@ public static AnalysisResult forSingleCommit(final String commitHash, final Anal
289317 AnalysisResult result = null ;
290318 try {
291319 final RevCommit commit = analysis .differ .getCommit (commitHash );
292- result = analysis .processCommits (List .of (commit ), analysis .differ );
320+ analysis .processCommitBatch (List .of (commit ));
321+ result = analysis .getResult ();
293322 } catch (Exception e ) {
294323 Logger .error ("Failed to analyze {}. Exiting." , commitHash );
295324 System .exit (1 );
296325 }
297326
298327 final double runtime = clock .getPassedSeconds ();
299328 Logger .info ("<<< done in {}" , Clock .printPassedSeconds (runtime ));
300-
301- result .runtimeWithMultithreadingInSeconds = -1 ;
302- result .totalCommits = 1 ;
329+
330+ result .get (TotalNumberOfCommitsResult .KEY ).value ++;
303331
304332 exportMetadata (analysis .getOutputDir (), result );
305333 return result ;
@@ -339,8 +367,8 @@ public boolean beginPatch(Analysis analysis) {
339367 }
340368
341369 /**
342- * Same as {@link forEachCommit(Supplier<Analysis> , int, int)}.
343- * Defaults to {@link COMMITS_TO_PROCESS_PER_THREAD_DEFAULT} and a machine dependent number of
370+ * Same as {@link # forEachCommit(Supplier, int, int)}.
371+ * Defaults to {@link # COMMITS_TO_PROCESS_PER_THREAD_DEFAULT} and a machine dependent number of
344372 * {@link Diagnostics#getNumberOfAvailableProcessors}.
345373 */
346374 public static AnalysisResult forEachCommit (Supplier <Analysis > analysis ) {
@@ -370,6 +398,7 @@ public static AnalysisResult forEachCommit(
370398 ) {
371399 var analysis = analysisFactory .get ();
372400 analysis .differ = new GitDiffer (analysis .getRepository ());
401+ analysis .result .append (RuntimeWithMultithreadingResult .KEY , new RuntimeWithMultithreadingResult ());
373402
374403 final Clock clock = new Clock ();
375404
@@ -385,7 +414,12 @@ public static AnalysisResult forEachCommit(
385414 ),
386415 /// 2.) Create a MiningTask for the list of commits. This task will then be processed by one
387416 /// particular thread.
388- commitList -> () -> analysisFactory .get ().processCommits (commitList , analysis .differ )
417+ commitList -> () -> {
418+ Analysis thisThreadsAnalysis = analysisFactory .get ();
419+ thisThreadsAnalysis .differ = analysis .differ ;
420+ thisThreadsAnalysis .processCommitBatch (commitList );
421+ return thisThreadsAnalysis .getResult ();
422+ }
389423 );
390424 Logger .info ("<<< done in {}" , clock .printPassedSeconds ());
391425
@@ -411,8 +445,8 @@ public static AnalysisResult forEachCommit(
411445 final double runtime = clock .getPassedSeconds ();
412446 Logger .info ("<<< done in {}" , Clock .printPassedSeconds (runtime ));
413447
414- analysis .getResult ().runtimeWithMultithreadingInSeconds = runtime ;
415- analysis .getResult ().totalCommits = numberOfTotalCommits .invocationCount ().get ();
448+ analysis .getResult ().get ( RuntimeWithMultithreadingResult . KEY ). value = runtime ;
449+ // analysis.getResult().get(TotalNumberOfCommitsResult.KEY).value = numberOfTotalCommits.invocationCount().get();
416450
417451 exportMetadata (analysis .getOutputDir (), analysis .getResult ());
418452 return analysis .getResult ();
@@ -435,39 +469,22 @@ public Analysis(
435469 this .hooks = hooks ;
436470 this .repository = repository ;
437471 this .outputDir = outputDir ;
438- this .result = new AnalysisResult ();
439-
440- this .result .repoName = repository .getRepositoryName ();
472+
473+ this .result = new AnalysisResult (repository .getRepositoryName ());
441474 this .result .taskName = taskName ;
475+ this .result .append (TotalNumberOfCommitsResult .KEY , new TotalNumberOfCommitsResult ());
476+
442477 for (var hook : hooks ) {
443478 hook .initializeResults (this );
444479 }
445480 }
446481
447482 /**
448- * Entry point into a sequential analysis of {@code commits} as one batch.
449- * Same as {@link processCommits(List<RevCommit>, GitDiffer)} with a default {@link GitDiffer}.
483+ * Sequential analysis of all {@code commits} as one batch.
450484 *
451485 * @param commits the commit batch to be processed
452- * @see forEachCommit
486+ * @see # forEachCommit
453487 */
454- public AnalysisResult processCommits (List <RevCommit > commits ) throws Exception {
455- return processCommits (commits , new GitDiffer (getRepository ()));
456- }
457-
458- /**
459- * Entry point into a sequential analysis of {@code commits} as one batch.
460- *
461- * @param commits the commit batch to be processed
462- * @param differ the differ to use
463- * @see forEachCommit
464- */
465- public AnalysisResult processCommits (List <RevCommit > commits , GitDiffer differ ) throws Exception {
466- this .differ = differ ;
467- processCommitBatch (commits );
468- return getResult ();
469- }
470-
471488 protected void processCommitBatch (List <RevCommit > commits ) throws Exception {
472489 outputFile = outputDir .resolve (commits .get (0 ).getId ().getName ());
473490
@@ -495,6 +512,9 @@ protected void processCommitBatch(List<RevCommit> commits) throws Exception {
495512 }
496513 } finally {
497514 runReverseHook (batchHook , Hooks ::endBatch );
515+
516+ // export the thread's result
517+ getResult ().exportTo (FileUtils .addExtension (outputFile , Analysis .EXTENSION ));
498518 }
499519 }
500520
@@ -540,6 +560,8 @@ protected void processCommit() throws Exception {
540560 runReverseHook (patchHook , Hooks ::endPatch );
541561 }
542562 }
563+
564+ getResult ().get (TotalNumberOfCommitsResult .KEY ).value ++;
543565 }
544566
545567 protected void processPatch () throws Exception {
@@ -590,7 +612,7 @@ protected <Hook> void runReverseHook(ListIterator<Hook> hook, FailableBiConsumer
590612
591613 /**
592614 * Exports the given metadata object to a file named according
593- * {@link TOTAL_RESULTS_FILE_NAME} in the given directory.
615+ * {@link # TOTAL_RESULTS_FILE_NAME} in the given directory.
594616 * @param outputDir The directory into which the metadata object file should be written.
595617 * @param metadata The metadata to serialize
596618 * @param <T> Type of the metadata.
0 commit comments