|
1 | 1 | package org.variantsync.diffdetective.main; |
2 | 2 |
|
| 3 | +import org.apache.commons.lang3.tuple.ImmutablePair; |
3 | 4 | import org.tinylog.Logger; |
4 | 5 | import org.variantsync.diffdetective.analysis.AutomationResult; |
5 | 6 | import org.variantsync.diffdetective.analysis.CommitHistoryAnalysisTask; |
6 | 7 | import org.variantsync.diffdetective.analysis.CommitProcessTime; |
7 | 8 | import org.variantsync.diffdetective.util.FileUtils; |
8 | | -import org.variantsync.diffdetective.util.IO; |
9 | 9 | import org.variantsync.diffdetective.util.StringUtils; |
10 | 10 |
|
11 | 11 | import java.io.IOException; |
| 12 | +import java.io.UncheckedIOException; |
12 | 13 | import java.nio.file.Files; |
13 | 14 | import java.nio.file.Path; |
14 | 15 | import java.util.ArrayList; |
15 | 16 | import java.util.Arrays; |
16 | 17 | import java.util.Comparator; |
17 | 18 | import java.util.List; |
| 19 | +import java.util.stream.Collectors; |
| 20 | +import java.util.stream.Stream; |
18 | 21 |
|
19 | 22 | public class FindMedianCommitTime { |
20 | 23 | public static final int NUM_EXPECTED_COMMITS = 1_708_172; |
@@ -44,68 +47,77 @@ public static AutomationResult getResultOfDirectory(final Path directory, int nu |
44 | 47 | throw new IllegalArgumentException("Expected path to directory but the given path is not a directory!"); |
45 | 48 | } |
46 | 49 |
|
47 | | - final List<Path> paths = Files.walk(directory) |
48 | | - .filter(Files::isRegularFile) |
| 50 | + // This stream needs {@code O(n log(n))} time (because of the sort) and {@code O(n)} space |
| 51 | + // (because the list has to be captured). This can be improved to {@code O(n)} time and |
| 52 | + // {@code O(1)} space by using the Median of medians algorithm and computing the minimum and |
| 53 | + // maximum like {@code LongSummaryStatistics} does. |
| 54 | + ImmutablePair<Long, List<CommitProcessTime>> result; |
| 55 | + try (Stream<Path> paths = Files.walk(directory)) { |
| 56 | + result = paths |
| 57 | + .parallel() |
49 | 58 | .filter(p -> FileUtils.hasExtension(p, CommitHistoryAnalysisTask.COMMIT_TIME_FILE_EXTENSION)) |
| 59 | + .filter(Files::isRegularFile) |
50 | 60 | // .peek(path -> Logger.info("Processing file {}", path)) |
51 | | - .toList(); |
| 61 | + .flatMap(FindMedianCommitTime::parse) |
| 62 | + .sorted(Comparator.comparingDouble(CommitProcessTime::milliseconds)) |
| 63 | + .collect(Collectors.teeing( |
| 64 | + Collectors.summingLong(CommitProcessTime::milliseconds), |
| 65 | + Collectors.toList(), |
| 66 | + ImmutablePair::new) |
| 67 | + ); |
| 68 | + } catch (UncheckedIOException e) { |
| 69 | + throw e.getCause(); |
| 70 | + } |
52 | 71 |
|
53 | | - final ArrayList<CommitProcessTime> alltimes = new ArrayList<>(numExpectedCommits); |
| 72 | + long totalTimeMS = result.getLeft(); |
| 73 | + List<CommitProcessTime> alltimes = result.getRight(); |
54 | 74 |
|
55 | | - long totalTimeMS = 0; |
56 | | - for (final Path p : paths) { |
57 | | - totalTimeMS += parse(p, alltimes); |
| 75 | + if (alltimes.size() != numExpectedCommits) { |
| 76 | + Logger.error("Expected {} commits but got {}! {} commits are missing!", |
| 77 | + numExpectedCommits, |
| 78 | + alltimes.size(), |
| 79 | + numExpectedCommits - alltimes.size()); |
58 | 80 | } |
59 | 81 |
|
60 | | - final CommitProcessTime[] alltimesArray = alltimes.toArray(CommitProcessTime[]::new); |
61 | | - Arrays.parallelSort(alltimesArray, Comparator.comparingDouble(CommitProcessTime::milliseconds)); |
62 | | - final int numTotalCommits = alltimesArray.length; |
63 | | - |
64 | | - final AutomationResult automationResult; |
65 | | - if (numTotalCommits == 0) { |
| 82 | + if (alltimes.size() == 0) { |
66 | 83 | final String repoName = directory.getFileName().toString(); |
67 | | - automationResult = new AutomationResult( |
68 | | - numTotalCommits, |
| 84 | + return new AutomationResult( |
| 85 | + alltimes.size(), |
69 | 86 | totalTimeMS, |
70 | 87 | CommitProcessTime.Invalid(repoName), |
71 | 88 | CommitProcessTime.Invalid(repoName), |
72 | 89 | CommitProcessTime.Invalid(repoName) |
73 | 90 | ); |
74 | 91 | } else { |
75 | | - automationResult = new AutomationResult( |
76 | | - numTotalCommits, |
| 92 | + return new AutomationResult( |
| 93 | + alltimes.size(), |
77 | 94 | totalTimeMS, |
78 | | - alltimesArray[0], |
79 | | - alltimesArray[alltimesArray.length - 1], |
80 | | - alltimesArray[alltimesArray.length / 2] |
| 95 | + alltimes.get(0), |
| 96 | + alltimes.get(alltimes.size() - 1), |
| 97 | + alltimes.get(alltimes.size() / 2) |
81 | 98 | ); |
82 | 99 | } |
83 | | - |
84 | | - if (automationResult.numMeasuredCommits() != numExpectedCommits) { |
85 | | - Logger.error("Expected {} commits but got {}! {} commits are missing!", |
86 | | - numExpectedCommits, |
87 | | - automationResult.numMeasuredCommits(), |
88 | | - (numExpectedCommits - automationResult.numMeasuredCommits())); |
89 | | - } |
90 | | - |
91 | | - return automationResult; |
92 | 100 | } |
93 | 101 |
|
94 | | - private static long parse(final Path file, final List<CommitProcessTime> times) throws IOException { |
95 | | - final String fileInput = IO.readAsString(file); |
96 | | - final String[] lines = fileInput.split(StringUtils.LINEBREAK_REGEX); |
97 | | - long sumSeconds = 0; |
98 | | - |
99 | | - for (final String line : lines) { |
100 | | - if (!line.isBlank()) { |
101 | | - final CommitProcessTime lineTime = CommitProcessTime.fromString(line); |
102 | | - sumSeconds += lineTime.milliseconds(); |
103 | | - times.add(lineTime); |
104 | | - } else { |
105 | | - Logger.warn("Found blank line '{}' in {}", line, file); |
106 | | - } |
| 102 | + private static Stream<CommitProcessTime> parse(final Path file) { |
| 103 | + try { |
| 104 | + // This stream has to be closed by the caller of {@code parse}, because the returned |
| 105 | + // stream is not consumed inside of this method. |
| 106 | + return Files.lines(file) |
| 107 | + .filter( |
| 108 | + line -> { |
| 109 | + if (line.isBlank()) { |
| 110 | + Logger.warn("Found blank line in {}", file); |
| 111 | + return false; |
| 112 | + } else { |
| 113 | + return true; |
| 114 | + } |
| 115 | + } |
| 116 | + ).map(CommitProcessTime::fromString); |
| 117 | + } catch (IOException e) { |
| 118 | + // Checked exceptions can't be propagated because the caller of {@code parse} requires |
| 119 | + // a method wich doesn't throw any checked exception. |
| 120 | + throw new UncheckedIOException(e); |
107 | 121 | } |
108 | | - |
109 | | - return sumSeconds; |
110 | 122 | } |
111 | 123 | } |
0 commit comments