Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -132,14 +132,14 @@ public static void run(Options options, BiConsumer<Repository, Path> validation)
if (options.preloadReposBeforeAnalysis()) {
Logger.info("Preloading repositories:");
for (final Repository repo : repos) {
repo.getGitRepo().run();
repo.preload();
}

if (options.pullRepositoriesBeforeAnalysis()) {
Logger.info("Updating repositories:");
for (final Repository repo : repos) {
try {
Assert.assertTrue(repo.getGitRepo().run().pull().call().isSuccessful());
Assert.assertTrue(repo.getGitRepo().pull().call().isSuccessful());
} catch (GitAPIException e) {
Logger.error(e, "Failed to pull repository '{}'", repo.getRepositoryName());
}
Expand Down
18 changes: 10 additions & 8 deletions src/main/java/org/variantsync/diffdetective/analysis/Analysis.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import org.apache.commons.lang3.function.FailableBiConsumer;
import org.apache.commons.lang3.function.FailableBiFunction;
import org.eclipse.jgit.api.Git;
import org.eclipse.jgit.revwalk.RevCommit;
import org.tinylog.Logger;
import org.variantsync.diffdetective.analysis.AnalysisResult.ResultKey;
Expand Down Expand Up @@ -60,7 +61,7 @@ public class Analysis {
protected final List<Hooks> hooks;
protected final Repository repository;

protected GitDiffer differ;
protected Git git;
protected RevCommit currentCommit;
protected CommitDiff currentCommitDiff;
protected PatchDiff currentPatch;
Expand Down Expand Up @@ -307,16 +308,14 @@ public static void forEachRepository(
* @param analysis the analysis to run
*/
public static AnalysisResult forSingleCommit(final String commitHash, final Analysis analysis) {
analysis.differ = new GitDiffer(analysis.getRepository());

final Clock clock = new Clock();
// prepare tasks
Logger.info(">>> Running Analysis on single commit {} in {}", commitHash, analysis.getRepository().getRepositoryName());
clock.start();

AnalysisResult result = null;
try {
final RevCommit commit = analysis.differ.getCommit(commitHash);
final RevCommit commit = analysis.getRepository().getCommit(commitHash);
analysis.processCommitBatch(List.of(commit));
result = analysis.getResult();
} catch (Exception e) {
Expand Down Expand Up @@ -397,7 +396,6 @@ public static AnalysisResult forEachCommit(
final int nThreads
) {
var analysis = analysisFactory.get();
analysis.differ = new GitDiffer(analysis.getRepository());
analysis.result.append(RuntimeWithMultithreadingResult.KEY, new RuntimeWithMultithreadingResult());

final Clock clock = new Clock();
Expand All @@ -409,14 +407,18 @@ public static AnalysisResult forEachCommit(
final Iterator<Callable<AnalysisResult>> tasks = new MappedIterator<>(
/// 1.) Retrieve COMMITS_TO_PROCESS_PER_THREAD commits from the differ and cluster them into one list.
new ClusteredIterator<>(
analysis.differ.yieldRevCommitsAfter(numberOfTotalCommits),
analysis.getRepository().getDiffFilter().filter(
new MappedIterator<>(
analysis.getRepository().getCommits(),
numberOfTotalCommits
)
),
commitsToProcessPerThread
),
/// 2.) Create a MiningTask for the list of commits. This task will then be processed by one
/// particular thread.
commitList -> () -> {
Analysis thisThreadsAnalysis = analysisFactory.get();
thisThreadsAnalysis.differ = analysis.differ;
thisThreadsAnalysis.processCommitBatch(commitList);
return thisThreadsAnalysis.getResult();
}
Expand Down Expand Up @@ -519,7 +521,7 @@ protected void processCommitBatch(List<RevCommit> commits) throws Exception {

protected void processCommit() throws Exception {
// parse the commit
final CommitDiffResult commitDiffResult = differ.createCommitDiff(currentCommit);
final CommitDiffResult commitDiffResult = GitDiffer.createCommitDiffFromFirstParent(repository, currentCommit);

// report any errors that occurred and exit in case no VariationDiff could be parsed.
getResult().reportDiffErrors(commitDiffResult.errors());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package org.variantsync.diffdetective.datasets;

import java.io.IOException;
import java.util.Iterator;

import org.eclipse.jgit.api.errors.GitAPIException;
import org.eclipse.jgit.revwalk.RevCommit;
import org.tinylog.Logger;

/**
* A functional interface for listing the commits in a {@link Repository}.
* Apart from listing a fixed set of commits, this is mainly useful to configure a {@link org.eclipse.jgit.api.Git#log() Git log} call.
*<p>
* This is mainly intended to be used as an argument for the {@code listCommits} argument of
* {@link Repository#Repository(RepositoryLocationType, Path, URI, String, Function<Repository, Iterator<RevCommit>>, PatchDiffParseOptions, DiffFilter)}.
*/
@FunctionalInterface
public interface CommitLister {
Iterator<RevCommit> listCommits(Repository repository);

/**
* List all commits reachable by the current {@code HEAD} of the repository.
*/
public static final CommitLister TraverseHEAD =
(Repository repository) -> {
try {
return repository.getGitRepo().log().call().iterator();
} catch (GitAPIException e) {
Logger.warn("Could not get log for git repository {}", repository.getRepositoryName());
throw new RuntimeException(e);
}
};

/**
* List all commits reachable from all branches of the repository.
*/
public static final CommitLister AllCommits =
(Repository repository) -> {
try {
return repository.getGitRepo().log().all().call().iterator();
} catch (GitAPIException | IOException e) {
Logger.warn("Could not get log for git repository {}", repository.getRepositoryName());
throw new RuntimeException(e);
}
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,23 +33,6 @@ public class DatasetFactory {
*/
public static final String PHP = "PHP";

/**
* Default value for diff filters.
* It disallows merge commits, only considers patches that modified files,
* and only allows source files of C/C++ projects ("h", "hpp", "c", "cpp").
*/
public static final DiffFilter DEFAULT_DIFF_FILTER =
new DiffFilter.Builder()
.allowMerge(false)
.allowCommitsWithoutParents(false)
.allowedChangeTypes(DiffEntry.ChangeType.MODIFY)
.allowedFileExtensions("h", "hpp", "c", "cpp")
.build();
// public static final DiffFilter PHP_DIFF_FILTER =
// new DiffFilter.Builder(DEFAULT_DIFF_FILTER)
//// .blockedPaths("ext/fileinfo/data_file.c")
// .build();

private final Path cloneDirectory;

/**
Expand All @@ -69,10 +52,7 @@ public static DiffFilter getDefaultDiffFilterFor(final String repositoryName) {
if (repositoryName.equalsIgnoreCase(MARLIN)) {
return StanciulescuMarlin.DIFF_FILTER;
}
// if (repositoryName.equalsIgnoreCase(PHP)) {
// return PHP_DIFF_FILTER;
// }
return DEFAULT_DIFF_FILTER;
return DiffFilter.DEFAULT_DIFF_FILTER;
}

/**
Expand Down Expand Up @@ -124,15 +104,15 @@ public List<Repository> createAll(final Collection<DatasetDescription> datasets,
if (preload) {
Logger.info("Preloading repositories:");
for (final Repository repo : repos) {
repo.getGitRepo().run();
repo.preload();
}
}

if (pull) {
Logger.info("Pulling repositories:");
for (final Repository repo : repos) {
try {
Assert.assertTrue(repo.getGitRepo().run().pull().call().isSuccessful());
Assert.assertTrue(repo.getGitRepo().pull().call().isSuccessful());
} catch (GitAPIException e) {
Logger.error(e, "Failed to pull repository '{}'", repo.getRepositoryName());
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
package org.variantsync.diffdetective.datasets;

import org.eclipse.jgit.api.Git;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.revwalk.RevCommit;
import org.tinylog.Logger;
import org.variantsync.diffdetective.diff.git.DiffFilter;
import org.variantsync.diffdetective.load.GitLoader;
import org.variantsync.diffdetective.util.IO;
import org.variantsync.functjonal.Lazy;

import java.io.IOException;
import java.net.URI;
import java.nio.file.Path;
import java.util.Iterator;
import java.util.Optional;

/**
Expand Down Expand Up @@ -39,6 +43,11 @@ public class Repository {
*/
private final String repositoryName;

/**
* A function that extracts the list of commits that are represented by this repository instance.
*/
private CommitLister commitLister;

/**
* Filter determining which files and commits to consider for diffs.
*/
Expand All @@ -50,14 +59,15 @@ public class Repository {
private PatchDiffParseOptions parseOptions;

private final Lazy<Git> git = Lazy.of(this::load);

/**
* Creates a repository.
*
* @param repoLocation {@link RepositoryLocationType} From which location the repository is read from
* @param localPath The local path where the repository can be found or should be cloned to.
* @param remote The remote url of the repository. May be <code>null</code> if local.
* @param repositoryName Name of the cloned repository (<code>null</code> if local)
* @param commitLister extracts the commits from {@link #getGitRepo()} that should be represented.
* @param parseOptions Omit some debug data to save RAM.
* @param diffFilter Filter determining which files and commits to consider for diffs.
*/
Expand All @@ -66,27 +76,38 @@ public Repository(
final Path localPath,
final URI remote,
final String repositoryName,
final CommitLister commitLister,
final PatchDiffParseOptions parseOptions,
final DiffFilter diffFilter) {
this.repoLocation = repoLocation;
this.localPath = localPath;
this.remote = remote;
this.repositoryName = repositoryName;
this.commitLister = commitLister;
this.parseOptions = parseOptions;
this.diffFilter = diffFilter;
}

/**
* Creates repository of the given source and with all other settings set to default values.
* @see Repository
* <p>
* Defaults to {@link CommitLister#TraverseHEAD}, {@link PatchDiffParseOptions#Default} and {@link DiffFilter#ALLOW_ALL}.
*/
public Repository(
final RepositoryLocationType repoLocation,
final Path localPath,
final URI remote,
final String repositoryName) {
this(repoLocation, localPath, remote, repositoryName,
PatchDiffParseOptions.Default, DiffFilter.ALLOW_ALL);
this(
repoLocation,
localPath,
remote,
repositoryName,
CommitLister.TraverseHEAD,
PatchDiffParseOptions.Default,
DiffFilter.ALLOW_ALL
);
}

/**
Expand Down Expand Up @@ -223,8 +244,30 @@ public PatchDiffParseOptions getParseOptions() {
/**
* Returns the internal jgit representation of this repository that allows to inspect the repositories history and content.
*/
public Lazy<Git> getGitRepo() {
return git;
public Git getGitRepo() {
return git.run();
}

/**
* Prepares the Git repository (e.g., clones it if necessary).
*/
public void preload() {
getGitRepo();
}

/**
* Returns a single commit from the repository.
* Note that this commit may not be part of {@link #getCommits}.
*/
public RevCommit getCommit(String commitHash) throws IOException {
return getGitRepo().getRepository().parseCommit(ObjectId.fromString(commitHash));
}

/**
* Returns all commits in the repository's history.
*/
public Iterator<RevCommit> getCommits() {
return commitLister.listCommits(this);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
import org.eclipse.jgit.diff.DiffEntry;
import org.eclipse.jgit.revwalk.RevCommit;
import org.variantsync.diffdetective.variation.diff.Time;
import org.variantsync.functjonal.iteration.Yield;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;

/**
Expand All @@ -27,6 +29,19 @@ public class DiffFilter {
.allowAllFileExtensions()
.build();

/**
* Default value for diff filters.
* It disallows merge commits, only considers patches that modified files,
* and only allows source files of C/C++ projects ("h", "hpp", "c", "cpp").
*/
public static final DiffFilter DEFAULT_DIFF_FILTER =
new DiffFilter.Builder()
.allowMerge(false)
.allowCommitsWithoutParents(false)
.allowedChangeTypes(DiffEntry.ChangeType.MODIFY)
.allowedFileExtensions("h", "hpp", "c", "cpp")
.build();

/**
* A list of allowed file extensions for patches.
* When this list is not empty all file extension that it does not contain will be filtered.
Expand Down Expand Up @@ -317,6 +332,31 @@ public boolean filter(RevCommit commit) {
;
}

/**
* Filters all undesired commits from the given list of commits.
* @param commitsIterator Commits to filter.
* @return All commits from the given set that should not be filtered out.
*/
public Iterator<RevCommit> filter(final Iterator<RevCommit> commitsIterator) {
return new Yield<>(
() -> {
while (commitsIterator.hasNext()) {
final RevCommit c = commitsIterator.next();
// If this commit is filtered, go to the next one.
// filter returns true if we want to include the commit
// so if we do not want to filter it, we do not want to have it. Thus skip.
if (!filter(c)) {
continue;
}

return c;
}

return null;
}
);
}

private boolean isAllowedPath(String filename) {
return allowedPaths.stream().anyMatch(filename::matches);
}
Expand Down
Loading