Skip to content

Commit 7678170

Browse files
authored
Merge pull request #163 from VariantSync/GitDiffer-refactorings
Abstract `git log` calls
2 parents 5bf9c77 + 58afdb5 commit 7678170

12 files changed

Lines changed: 210 additions & 199 deletions

File tree

src/main/java/org/variantsync/diffdetective/AnalysisRunner.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,14 +132,14 @@ public static void run(Options options, BiConsumer<Repository, Path> validation)
132132
if (options.preloadReposBeforeAnalysis()) {
133133
Logger.info("Preloading repositories:");
134134
for (final Repository repo : repos) {
135-
repo.getGitRepo().run();
135+
repo.preload();
136136
}
137137

138138
if (options.pullRepositoriesBeforeAnalysis()) {
139139
Logger.info("Updating repositories:");
140140
for (final Repository repo : repos) {
141141
try {
142-
Assert.assertTrue(repo.getGitRepo().run().pull().call().isSuccessful());
142+
Assert.assertTrue(repo.getGitRepo().pull().call().isSuccessful());
143143
} catch (GitAPIException e) {
144144
Logger.error(e, "Failed to pull repository '{}'", repo.getRepositoryName());
145145
}

src/main/java/org/variantsync/diffdetective/analysis/Analysis.java

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
import org.apache.commons.lang3.function.FailableBiConsumer;
1313
import org.apache.commons.lang3.function.FailableBiFunction;
14+
import org.eclipse.jgit.api.Git;
1415
import org.eclipse.jgit.revwalk.RevCommit;
1516
import org.tinylog.Logger;
1617
import org.variantsync.diffdetective.analysis.AnalysisResult.ResultKey;
@@ -60,7 +61,7 @@ public class Analysis {
6061
protected final List<Hooks> hooks;
6162
protected final Repository repository;
6263

63-
protected GitDiffer differ;
64+
protected Git git;
6465
protected RevCommit currentCommit;
6566
protected CommitDiff currentCommitDiff;
6667
protected PatchDiff currentPatch;
@@ -307,16 +308,14 @@ public static void forEachRepository(
307308
* @param analysis the analysis to run
308309
*/
309310
public static AnalysisResult forSingleCommit(final String commitHash, final Analysis analysis) {
310-
analysis.differ = new GitDiffer(analysis.getRepository());
311-
312311
final Clock clock = new Clock();
313312
// prepare tasks
314313
Logger.info(">>> Running Analysis on single commit {} in {}", commitHash, analysis.getRepository().getRepositoryName());
315314
clock.start();
316315

317316
AnalysisResult result = null;
318317
try {
319-
final RevCommit commit = analysis.differ.getCommit(commitHash);
318+
final RevCommit commit = analysis.getRepository().getCommit(commitHash);
320319
analysis.processCommitBatch(List.of(commit));
321320
result = analysis.getResult();
322321
} catch (Exception e) {
@@ -397,7 +396,6 @@ public static AnalysisResult forEachCommit(
397396
final int nThreads
398397
) {
399398
var analysis = analysisFactory.get();
400-
analysis.differ = new GitDiffer(analysis.getRepository());
401399
analysis.result.append(RuntimeWithMultithreadingResult.KEY, new RuntimeWithMultithreadingResult());
402400

403401
final Clock clock = new Clock();
@@ -409,14 +407,18 @@ public static AnalysisResult forEachCommit(
409407
final Iterator<Callable<AnalysisResult>> tasks = new MappedIterator<>(
410408
/// 1.) Retrieve COMMITS_TO_PROCESS_PER_THREAD commits from the differ and cluster them into one list.
411409
new ClusteredIterator<>(
412-
analysis.differ.yieldRevCommitsAfter(numberOfTotalCommits),
410+
analysis.getRepository().getDiffFilter().filter(
411+
new MappedIterator<>(
412+
analysis.getRepository().getCommits(),
413+
numberOfTotalCommits
414+
)
415+
),
413416
commitsToProcessPerThread
414417
),
415418
/// 2.) Create a MiningTask for the list of commits. This task will then be processed by one
416419
/// particular thread.
417420
commitList -> () -> {
418421
Analysis thisThreadsAnalysis = analysisFactory.get();
419-
thisThreadsAnalysis.differ = analysis.differ;
420422
thisThreadsAnalysis.processCommitBatch(commitList);
421423
return thisThreadsAnalysis.getResult();
422424
}
@@ -519,7 +521,7 @@ protected void processCommitBatch(List<RevCommit> commits) throws Exception {
519521

520522
protected void processCommit() throws Exception {
521523
// parse the commit
522-
final CommitDiffResult commitDiffResult = differ.createCommitDiff(currentCommit);
524+
final CommitDiffResult commitDiffResult = GitDiffer.createCommitDiffFromFirstParent(repository, currentCommit);
523525

524526
// report any errors that occurred and exit in case no VariationDiff could be parsed.
525527
getResult().reportDiffErrors(commitDiffResult.errors());
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
package org.variantsync.diffdetective.datasets;
2+
3+
import java.io.IOException;
4+
import java.util.Iterator;
5+
6+
import org.eclipse.jgit.api.errors.GitAPIException;
7+
import org.eclipse.jgit.revwalk.RevCommit;
8+
import org.tinylog.Logger;
9+
10+
/**
11+
* A functional interface for listing the commits in a {@link Repository}.
12+
* Apart from listing a fixed set of commits, this is mainly useful to configure a {@link org.eclipse.jgit.api.Git#log() Git log} call.
13+
*<p>
14+
* This is mainly intended to be used as an argument for the {@code listCommits} argument of
15+
* {@link Repository#Repository(RepositoryLocationType, Path, URI, String, Function<Repository, Iterator<RevCommit>>, PatchDiffParseOptions, DiffFilter)}.
16+
*/
17+
@FunctionalInterface
18+
public interface CommitLister {
19+
Iterator<RevCommit> listCommits(Repository repository);
20+
21+
/**
22+
* List all commits reachable by the current {@code HEAD} of the repository.
23+
*/
24+
public static final CommitLister TraverseHEAD =
25+
(Repository repository) -> {
26+
try {
27+
return repository.getGitRepo().log().call().iterator();
28+
} catch (GitAPIException e) {
29+
Logger.warn("Could not get log for git repository {}", repository.getRepositoryName());
30+
throw new RuntimeException(e);
31+
}
32+
};
33+
34+
/**
35+
* List all commits reachable from all branches of the repository.
36+
*/
37+
public static final CommitLister AllCommits =
38+
(Repository repository) -> {
39+
try {
40+
return repository.getGitRepo().log().all().call().iterator();
41+
} catch (GitAPIException | IOException e) {
42+
Logger.warn("Could not get log for git repository {}", repository.getRepositoryName());
43+
throw new RuntimeException(e);
44+
}
45+
};
46+
}

src/main/java/org/variantsync/diffdetective/datasets/DatasetFactory.java

Lines changed: 3 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -33,23 +33,6 @@ public class DatasetFactory {
3333
*/
3434
public static final String PHP = "PHP";
3535

36-
/**
37-
* Default value for diff filters.
38-
* It disallows merge commits, only considers patches that modified files,
39-
* and only allows source files of C/C++ projects ("h", "hpp", "c", "cpp").
40-
*/
41-
public static final DiffFilter DEFAULT_DIFF_FILTER =
42-
new DiffFilter.Builder()
43-
.allowMerge(false)
44-
.allowCommitsWithoutParents(false)
45-
.allowedChangeTypes(DiffEntry.ChangeType.MODIFY)
46-
.allowedFileExtensions("h", "hpp", "c", "cpp")
47-
.build();
48-
// public static final DiffFilter PHP_DIFF_FILTER =
49-
// new DiffFilter.Builder(DEFAULT_DIFF_FILTER)
50-
//// .blockedPaths("ext/fileinfo/data_file.c")
51-
// .build();
52-
5336
private final Path cloneDirectory;
5437

5538
/**
@@ -69,10 +52,7 @@ public static DiffFilter getDefaultDiffFilterFor(final String repositoryName) {
6952
if (repositoryName.equalsIgnoreCase(MARLIN)) {
7053
return StanciulescuMarlin.DIFF_FILTER;
7154
}
72-
// if (repositoryName.equalsIgnoreCase(PHP)) {
73-
// return PHP_DIFF_FILTER;
74-
// }
75-
return DEFAULT_DIFF_FILTER;
55+
return DiffFilter.DEFAULT_DIFF_FILTER;
7656
}
7757

7858
/**
@@ -124,15 +104,15 @@ public List<Repository> createAll(final Collection<DatasetDescription> datasets,
124104
if (preload) {
125105
Logger.info("Preloading repositories:");
126106
for (final Repository repo : repos) {
127-
repo.getGitRepo().run();
107+
repo.preload();
128108
}
129109
}
130110

131111
if (pull) {
132112
Logger.info("Pulling repositories:");
133113
for (final Repository repo : repos) {
134114
try {
135-
Assert.assertTrue(repo.getGitRepo().run().pull().call().isSuccessful());
115+
Assert.assertTrue(repo.getGitRepo().pull().call().isSuccessful());
136116
} catch (GitAPIException e) {
137117
Logger.error(e, "Failed to pull repository '{}'", repo.getRepositoryName());
138118
}

src/main/java/org/variantsync/diffdetective/datasets/Repository.java

Lines changed: 48 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,18 @@
11
package org.variantsync.diffdetective.datasets;
22

33
import org.eclipse.jgit.api.Git;
4+
import org.eclipse.jgit.lib.ObjectId;
5+
import org.eclipse.jgit.revwalk.RevCommit;
46
import org.tinylog.Logger;
57
import org.variantsync.diffdetective.diff.git.DiffFilter;
68
import org.variantsync.diffdetective.load.GitLoader;
79
import org.variantsync.diffdetective.util.IO;
810
import org.variantsync.functjonal.Lazy;
911

12+
import java.io.IOException;
1013
import java.net.URI;
1114
import java.nio.file.Path;
15+
import java.util.Iterator;
1216
import java.util.Optional;
1317

1418
/**
@@ -39,6 +43,11 @@ public class Repository {
3943
*/
4044
private final String repositoryName;
4145

46+
/**
47+
* A function that extracts the list of commits that are represented by this repository instance.
48+
*/
49+
private CommitLister commitLister;
50+
4251
/**
4352
* Filter determining which files and commits to consider for diffs.
4453
*/
@@ -50,14 +59,15 @@ public class Repository {
5059
private PatchDiffParseOptions parseOptions;
5160

5261
private final Lazy<Git> git = Lazy.of(this::load);
53-
62+
5463
/**
5564
* Creates a repository.
5665
*
5766
* @param repoLocation {@link RepositoryLocationType} From which location the repository is read from
5867
* @param localPath The local path where the repository can be found or should be cloned to.
5968
* @param remote The remote url of the repository. May be <code>null</code> if local.
6069
* @param repositoryName Name of the cloned repository (<code>null</code> if local)
70+
* @param commitLister extracts the commits from {@link #getGitRepo()} that should be represented.
6171
* @param parseOptions Omit some debug data to save RAM.
6272
* @param diffFilter Filter determining which files and commits to consider for diffs.
6373
*/
@@ -66,27 +76,38 @@ public Repository(
6676
final Path localPath,
6777
final URI remote,
6878
final String repositoryName,
79+
final CommitLister commitLister,
6980
final PatchDiffParseOptions parseOptions,
7081
final DiffFilter diffFilter) {
7182
this.repoLocation = repoLocation;
7283
this.localPath = localPath;
7384
this.remote = remote;
7485
this.repositoryName = repositoryName;
86+
this.commitLister = commitLister;
7587
this.parseOptions = parseOptions;
7688
this.diffFilter = diffFilter;
7789
}
7890

7991
/**
8092
* Creates repository of the given source and with all other settings set to default values.
8193
* @see Repository
94+
* <p>
95+
* Defaults to {@link CommitLister#TraverseHEAD}, {@link PatchDiffParseOptions#Default} and {@link DiffFilter#ALLOW_ALL}.
8296
*/
8397
public Repository(
8498
final RepositoryLocationType repoLocation,
8599
final Path localPath,
86100
final URI remote,
87101
final String repositoryName) {
88-
this(repoLocation, localPath, remote, repositoryName,
89-
PatchDiffParseOptions.Default, DiffFilter.ALLOW_ALL);
102+
this(
103+
repoLocation,
104+
localPath,
105+
remote,
106+
repositoryName,
107+
CommitLister.TraverseHEAD,
108+
PatchDiffParseOptions.Default,
109+
DiffFilter.ALLOW_ALL
110+
);
90111
}
91112

92113
/**
@@ -223,8 +244,30 @@ public PatchDiffParseOptions getParseOptions() {
223244
/**
224245
* Returns the internal jgit representation of this repository that allows to inspect the repositories history and content.
225246
*/
226-
public Lazy<Git> getGitRepo() {
227-
return git;
247+
public Git getGitRepo() {
248+
return git.run();
249+
}
250+
251+
/**
252+
* Prepares the Git repository (e.g., clones it if necessary).
253+
*/
254+
public void preload() {
255+
getGitRepo();
256+
}
257+
258+
/**
259+
* Returns a single commit from the repository.
260+
* Note that this commit may not be part of {@link #getCommits}.
261+
*/
262+
public RevCommit getCommit(String commitHash) throws IOException {
263+
return getGitRepo().getRepository().parseCommit(ObjectId.fromString(commitHash));
264+
}
265+
266+
/**
267+
* Returns all commits in the repository's history.
268+
*/
269+
public Iterator<RevCommit> getCommits() {
270+
return commitLister.listCommits(this);
228271
}
229272

230273
/**

src/main/java/org/variantsync/diffdetective/diff/git/DiffFilter.java

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@
44
import org.eclipse.jgit.diff.DiffEntry;
55
import org.eclipse.jgit.revwalk.RevCommit;
66
import org.variantsync.diffdetective.variation.diff.Time;
7+
import org.variantsync.functjonal.iteration.Yield;
78

89
import java.util.ArrayList;
910
import java.util.Arrays;
11+
import java.util.Iterator;
1012
import java.util.List;
1113

1214
/**
@@ -27,6 +29,19 @@ public class DiffFilter {
2729
.allowAllFileExtensions()
2830
.build();
2931

32+
/**
33+
* Default value for diff filters.
34+
* It disallows merge commits, only considers patches that modified files,
35+
* and only allows source files of C/C++ projects ("h", "hpp", "c", "cpp").
36+
*/
37+
public static final DiffFilter DEFAULT_DIFF_FILTER =
38+
new DiffFilter.Builder()
39+
.allowMerge(false)
40+
.allowCommitsWithoutParents(false)
41+
.allowedChangeTypes(DiffEntry.ChangeType.MODIFY)
42+
.allowedFileExtensions("h", "hpp", "c", "cpp")
43+
.build();
44+
3045
/**
3146
* A list of allowed file extensions for patches.
3247
* When this list is not empty all file extension that it does not contain will be filtered.
@@ -317,6 +332,31 @@ public boolean filter(RevCommit commit) {
317332
;
318333
}
319334

335+
/**
336+
* Filters all undesired commits from the given list of commits.
337+
* @param commitsIterator Commits to filter.
338+
* @return All commits from the given set that should not be filtered out.
339+
*/
340+
public Iterator<RevCommit> filter(final Iterator<RevCommit> commitsIterator) {
341+
return new Yield<>(
342+
() -> {
343+
while (commitsIterator.hasNext()) {
344+
final RevCommit c = commitsIterator.next();
345+
// If this commit is filtered, go to the next one.
346+
// filter returns true if we want to include the commit
347+
// so if we do not want to filter it, we do not want to have it. Thus skip.
348+
if (!filter(c)) {
349+
continue;
350+
}
351+
352+
return c;
353+
}
354+
355+
return null;
356+
}
357+
);
358+
}
359+
320360
private boolean isAllowedPath(String filename) {
321361
return allowedPaths.stream().anyMatch(filename::matches);
322362
}

0 commit comments

Comments
 (0)