Skip to content

Commit b27d445

Browse files
committed
Use streams to parse the dataset description
Streams make the code easier to understand and maintain. Simultaneously this code will use less memory because the file is read lazily. By using `Files.lines` all the platform dependent line endings are also handled automatically.
1 parent fc0c087 commit b27d445

1 file changed

Lines changed: 16 additions & 23 deletions

File tree

src/main/java/org/variantsync/diffdetective/datasets/DatasetDescription.java

Lines changed: 16 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
package org.variantsync.diffdetective.datasets;
22

3-
import org.variantsync.diffdetective.util.FileUtils;
43
import org.variantsync.diffdetective.util.LaTeX;
54
import org.variantsync.diffdetective.util.StringUtils;
65

76
import java.io.IOException;
7+
import java.nio.file.Files;
88
import java.nio.file.Path;
9-
import java.util.ArrayList;
109
import java.util.List;
10+
import java.util.stream.Collectors;
11+
import java.util.stream.Stream;
1112

1213
public record DatasetDescription(
1314
String name,
@@ -16,28 +17,20 @@ public record DatasetDescription(
1617
String commits
1718
) {
1819
public static List<DatasetDescription> fromMarkdown(final Path markdownFile) throws IOException {
19-
final String markdown = FileUtils.readUTF8(markdownFile);
20-
final String[] lines = markdown.split(StringUtils.LINEBREAK_REGEX);
21-
22-
final List<DatasetDescription> datasets = new ArrayList<>(lines.length - 2);
23-
// Start at 2 to skip header and separator line of table
24-
for (int i = 2; i < lines.length; ++i) {
25-
final String[] cells = lines[i].split("\\|");
26-
27-
final String hasCode = cells[2];
28-
final String isGitRepo = cells[3];
29-
30-
if (isYes(hasCode) && isYes(isGitRepo)) {
31-
datasets.add(new DatasetDescription(
32-
cells[0].trim(), // name
33-
cells[5].trim(), // clone URL,
34-
cells[1].trim(), // domain
35-
cells[6].trim() // #commits
36-
));
37-
}
20+
try (Stream<String> lines = Files.lines(markdownFile)) {
21+
return lines
22+
.skip(2) // Skip header
23+
.map(line -> line.split("\\|"))
24+
.filter(cells ->
25+
isYes(cells[2]) && // hasCode
26+
isYes(cells[3]) // isGitRepo
27+
).map(cells -> new DatasetDescription(
28+
cells[0].trim(), // name
29+
cells[5].trim(), // clone URL
30+
cells[1].trim(), // domain
31+
cells[6].trim()) // #commits
32+
).collect(Collectors.toList());
3833
}
39-
40-
return datasets;
4134
}
4235

4336
public static String asLaTeXTable(final List<DatasetDescription> datasets) {

0 commit comments

Comments
 (0)