11package org .variantsync .diffdetective .datasets ;
22
3- import org .variantsync .diffdetective .util .FileUtils ;
43import org .variantsync .diffdetective .util .LaTeX ;
54import org .variantsync .diffdetective .util .StringUtils ;
65
76import java .io .IOException ;
7+ import java .nio .file .Files ;
88import java .nio .file .Path ;
9- import java .util .ArrayList ;
109import java .util .List ;
10+ import java .util .stream .Collectors ;
11+ import java .util .stream .Stream ;
1112
1213public record DatasetDescription (
1314 String name ,
@@ -16,28 +17,20 @@ public record DatasetDescription(
1617 String commits
1718) {
1819 public static List <DatasetDescription > fromMarkdown (final Path markdownFile ) throws IOException {
19- final String markdown = FileUtils .readUTF8 (markdownFile );
20- final String [] lines = markdown .split (StringUtils .LINEBREAK_REGEX );
21-
22- final List <DatasetDescription > datasets = new ArrayList <>(lines .length - 2 );
23- // Start at 2 to skip header and separator line of table
24- for (int i = 2 ; i < lines .length ; ++i ) {
25- final String [] cells = lines [i ].split ("\\ |" );
26-
27- final String hasCode = cells [2 ];
28- final String isGitRepo = cells [3 ];
29-
30- if (isYes (hasCode ) && isYes (isGitRepo )) {
31- datasets .add (new DatasetDescription (
32- cells [0 ].trim (), // name
33- cells [5 ].trim (), // clone URL,
34- cells [1 ].trim (), // domain
35- cells [6 ].trim () // #commits
36- ));
37- }
20+ try (Stream <String > lines = Files .lines (markdownFile )) {
21+ return lines
22+ .skip (2 ) // Skip header
23+ .map (line -> line .split ("\\ |" ))
24+ .filter (cells ->
25+ isYes (cells [2 ]) && // hasCode
26+ isYes (cells [3 ]) // isGitRepo
27+ ).map (cells -> new DatasetDescription (
28+ cells [0 ].trim (), // name
29+ cells [5 ].trim (), // clone URL
30+ cells [1 ].trim (), // domain
31+ cells [6 ].trim ()) // #commits
32+ ).collect (Collectors .toList ());
3833 }
39-
40- return datasets ;
4134 }
4235
4336 public static String asLaTeXTable (final List <DatasetDescription > datasets ) {
0 commit comments