Skip to content

Commit f67b0b1

Browse files
committed
Use streams to aggregate commit times
Streams make the code easier to understand and maintain. By using `Files.lines` all the platform dependent line endings are also handled automatically. Because streams do not handle `IOException`s it is necessary to wrap them in `UncheckedIOException`s and unpack them outside the stream. The Apache version `org.apache.commons.lang3.stream.Streams.FailableStream` would have been a great substitute, but it doesn't provide a `flatMap` method. The order of `hasExtension` and `isRegularFile` changed to limit (comparably) slow IO actions by performing pure filters first.
1 parent b27d445 commit f67b0b1

1 file changed

Lines changed: 57 additions & 45 deletions

File tree

Lines changed: 57 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,23 @@
11
package org.variantsync.diffdetective.main;
22

3+
import org.apache.commons.lang3.tuple.ImmutablePair;
34
import org.tinylog.Logger;
45
import org.variantsync.diffdetective.analysis.AutomationResult;
56
import org.variantsync.diffdetective.analysis.CommitHistoryAnalysisTask;
67
import org.variantsync.diffdetective.analysis.CommitProcessTime;
78
import org.variantsync.diffdetective.util.FileUtils;
8-
import org.variantsync.diffdetective.util.IO;
99
import org.variantsync.diffdetective.util.StringUtils;
1010

1111
import java.io.IOException;
12+
import java.io.UncheckedIOException;
1213
import java.nio.file.Files;
1314
import java.nio.file.Path;
1415
import java.util.ArrayList;
1516
import java.util.Arrays;
1617
import java.util.Comparator;
1718
import java.util.List;
19+
import java.util.stream.Collectors;
20+
import java.util.stream.Stream;
1821

1922
public class FindMedianCommitTime {
2023
public static final int NUM_EXPECTED_COMMITS = 1_708_172;
@@ -44,68 +47,77 @@ public static AutomationResult getResultOfDirectory(final Path directory, int nu
4447
throw new IllegalArgumentException("Expected path to directory but the given path is not a directory!");
4548
}
4649

47-
final List<Path> paths = Files.walk(directory)
48-
.filter(Files::isRegularFile)
50+
// This stream needs {@code O(n log(n))} time (because of the sort) and {@code O(n)} space
51+
// (because the list has to be captured). This can be improved to {@code O(n)} time and
52+
// {@code O(1)} space by using the Median of medians algorithm and computing the minimum and
53+
// maximum like {@code LongSummaryStatistics} does.
54+
ImmutablePair<Long, List<CommitProcessTime>> result;
55+
try (Stream<Path> paths = Files.walk(directory)) {
56+
result = paths
57+
.parallel()
4958
.filter(p -> FileUtils.hasExtension(p, CommitHistoryAnalysisTask.COMMIT_TIME_FILE_EXTENSION))
59+
.filter(Files::isRegularFile)
5060
// .peek(path -> Logger.info("Processing file {}", path))
51-
.toList();
61+
.flatMap(FindMedianCommitTime::parse)
62+
.sorted(Comparator.comparingDouble(CommitProcessTime::milliseconds))
63+
.collect(Collectors.teeing(
64+
Collectors.summingLong(CommitProcessTime::milliseconds),
65+
Collectors.toList(),
66+
ImmutablePair::new)
67+
);
68+
} catch (UncheckedIOException e) {
69+
throw e.getCause();
70+
}
5271

53-
final ArrayList<CommitProcessTime> alltimes = new ArrayList<>(numExpectedCommits);
72+
long totalTimeMS = result.getLeft();
73+
List<CommitProcessTime> alltimes = result.getRight();
5474

55-
long totalTimeMS = 0;
56-
for (final Path p : paths) {
57-
totalTimeMS += parse(p, alltimes);
75+
if (alltimes.size() != numExpectedCommits) {
76+
Logger.error("Expected {} commits but got {}! {} commits are missing!",
77+
numExpectedCommits,
78+
alltimes.size(),
79+
numExpectedCommits - alltimes.size());
5880
}
5981

60-
final CommitProcessTime[] alltimesArray = alltimes.toArray(CommitProcessTime[]::new);
61-
Arrays.parallelSort(alltimesArray, Comparator.comparingDouble(CommitProcessTime::milliseconds));
62-
final int numTotalCommits = alltimesArray.length;
63-
64-
final AutomationResult automationResult;
65-
if (numTotalCommits == 0) {
82+
if (alltimes.size() == 0) {
6683
final String repoName = directory.getFileName().toString();
67-
automationResult = new AutomationResult(
68-
numTotalCommits,
84+
return new AutomationResult(
85+
alltimes.size(),
6986
totalTimeMS,
7087
CommitProcessTime.Invalid(repoName),
7188
CommitProcessTime.Invalid(repoName),
7289
CommitProcessTime.Invalid(repoName)
7390
);
7491
} else {
75-
automationResult = new AutomationResult(
76-
numTotalCommits,
92+
return new AutomationResult(
93+
alltimes.size(),
7794
totalTimeMS,
78-
alltimesArray[0],
79-
alltimesArray[alltimesArray.length - 1],
80-
alltimesArray[alltimesArray.length / 2]
95+
alltimes.get(0),
96+
alltimes.get(alltimes.size() - 1),
97+
alltimes.get(alltimes.size() / 2)
8198
);
8299
}
83-
84-
if (automationResult.numMeasuredCommits() != numExpectedCommits) {
85-
Logger.error("Expected {} commits but got {}! {} commits are missing!",
86-
numExpectedCommits,
87-
automationResult.numMeasuredCommits(),
88-
(numExpectedCommits - automationResult.numMeasuredCommits()));
89-
}
90-
91-
return automationResult;
92100
}
93101

94-
private static long parse(final Path file, final List<CommitProcessTime> times) throws IOException {
95-
final String fileInput = IO.readAsString(file);
96-
final String[] lines = fileInput.split(StringUtils.LINEBREAK_REGEX);
97-
long sumSeconds = 0;
98-
99-
for (final String line : lines) {
100-
if (!line.isBlank()) {
101-
final CommitProcessTime lineTime = CommitProcessTime.fromString(line);
102-
sumSeconds += lineTime.milliseconds();
103-
times.add(lineTime);
104-
} else {
105-
Logger.warn("Found blank line '{}' in {}", line, file);
106-
}
102+
private static Stream<CommitProcessTime> parse(final Path file) {
103+
try {
104+
// This stream has to be closed by the caller of {@code parse}, because the returned
105+
// stream is not consumed inside of this method.
106+
return Files.lines(file)
107+
.filter(
108+
line -> {
109+
if (line.isBlank()) {
110+
Logger.warn("Found blank line in {}", file);
111+
return false;
112+
} else {
113+
return true;
114+
}
115+
}
116+
).map(CommitProcessTime::fromString);
117+
} catch (IOException e) {
118+
// Checked exceptions can't be propagated because the caller of {@code parse} requires
119+
// a method wich doesn't throw any checked exception.
120+
throw new UncheckedIOException(e);
107121
}
108-
109-
return sumSeconds;
110122
}
111123
}

0 commit comments

Comments
 (0)