Skip to content

Commit 3cb5ec8

Browse files
committed
Use a BufferedReader to parse full diffs
By using `BufferedReader`s all the platform dependent line endings are handled automatically. This also reduces space usage if the input is read from a file, because not all lines are duplicated in memory at the same time. Call sites which generate a `String` and have to use `StringReader` are subject to future refactoring.
1 parent 5e956b7 commit 3cb5ec8

1 file changed

Lines changed: 47 additions & 41 deletions

File tree

src/main/java/org/variantsync/diffdetective/diff/GitDiffer.java

Lines changed: 47 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,19 @@
2323
import org.variantsync.diffdetective.diff.result.CommitDiffResult;
2424
import org.variantsync.diffdetective.diff.result.DiffError;
2525
import org.variantsync.diffdetective.diff.result.DiffResult;
26+
import org.variantsync.diffdetective.util.StringUtils;
2627
import org.variantsync.functjonal.Result;
2728
import org.variantsync.functjonal.iteration.MappedIterator;
2829
import org.variantsync.functjonal.iteration.SideEffectIterator;
2930
import org.variantsync.functjonal.iteration.Yield;
3031

32+
import java.io.BufferedReader;
3133
import java.io.ByteArrayOutputStream;
3234
import java.io.IOException;
35+
import java.io.InputStreamReader;
36+
import java.io.LineNumberReader;
37+
import java.io.StringReader;
38+
import java.io.UncheckedIOException;
3339
import java.nio.charset.StandardCharsets;
3440
import java.util.ArrayList;
3541
import java.util.Iterator;
@@ -39,8 +45,6 @@
3945
import java.util.regex.Matcher;
4046
import java.util.regex.Pattern;
4147

42-
import static org.variantsync.diffdetective.util.StringUtils.LINEBREAK_REGEX;
43-
4448
/**
4549
* This class creates a GitDiff-object from a git repository (Git-object).
4650
* <p>
@@ -336,8 +340,8 @@ private static CommitDiffResult getPatchDiffs(
336340
private static DiffResult<PatchDiff> createPatchDiff(
337341
CommitDiff commitDiff,
338342
DiffEntry diffEntry,
339-
final String gitDiff,
340-
String beforeFullFile,
343+
String gitDiff,
344+
BufferedReader beforeFullFile,
341345
final ParseOptions parseOptions) {
342346
final Matcher matcher = DIFF_HEADER_PATTERN.matcher(gitDiff);
343347
final String strippedDiff;
@@ -347,7 +351,7 @@ private static DiffResult<PatchDiff> createPatchDiff(
347351
strippedDiff = gitDiff;
348352
}
349353

350-
final String fullDiff = getFullDiff(beforeFullFile, strippedDiff);
354+
final String fullDiff = getFullDiff(beforeFullFile, new BufferedReader(new StringReader(strippedDiff)));
351355
final DiffResult<DiffTree> diffTree = DiffTreeParser.createDiffTree(fullDiff, true, true, parseOptions.annotationParser());
352356

353357
// if (diffTree.isFailure()) {
@@ -376,47 +380,51 @@ private static DiffResult<PatchDiff> createPatchDiff(
376380
* @param gitDiff The git diff containing only the changed lines
377381
* @return A full git diff containing the complete file and all changes
378382
*/
379-
public static String getFullDiff(String beforeFile, String gitDiff) {
380-
String[] beforeLines = LINEBREAK_REGEX.split(beforeFile, -1);
381-
String[] diffLines = LINEBREAK_REGEX.split(gitDiff);
382-
383-
int beforeIndex = 0;
383+
public static String getFullDiff(BufferedReader beforeFile, BufferedReader gitDiff) {
384+
try {
385+
LineNumberReader before = new LineNumberReader(beforeFile);
384386

385-
List<String> fullDiffLines = new ArrayList<>();
387+
List<String> fullDiffLines = new ArrayList<>();
386388

387-
for (String diffLine : diffLines) {
388-
Matcher matcher = DIFF_HUNK_PATTERN.matcher(diffLine);
389+
String diffLine;
390+
while ((diffLine = gitDiff.readLine()) != null) {
391+
Matcher matcher = DIFF_HUNK_PATTERN.matcher(diffLine);
389392

390-
if (matcher.find()) {
391-
// found diffHunkRegex
393+
if (matcher.find()) {
394+
// found diffHunkRegex
392395

393-
// subtract 1 because line numbers start at 1
394-
int beforeDiffIndex = Integer.parseInt(matcher.group(1)) - 1;
396+
// subtract 1 because line numbers start at 1
397+
int beforeDiffIndex = Integer.parseInt(matcher.group(1)) - 1;
395398

396-
while (beforeIndex < beforeDiffIndex) {
397-
fullDiffLines.add(" " + beforeLines[beforeIndex]);
398-
beforeIndex++;
399-
}
400-
} else if (diffLine.equals(NO_NEW_LINE)) {
401-
fullDiffLines.add("\n");
402-
} else {
403-
fullDiffLines.add(diffLine);
404-
if (!diffLine.startsWith("+")) {
405-
beforeIndex++;
399+
while (before.getLineNumber() < beforeDiffIndex) {
400+
fullDiffLines.add(" " + before.readLine());
401+
}
402+
} else if (diffLine.equals(NO_NEW_LINE)) {
403+
fullDiffLines.add(StringUtils.LINEBREAK);
404+
} else {
405+
fullDiffLines.add(diffLine);
406+
if (!diffLine.startsWith("+")) {
407+
before.readLine();
408+
}
406409
}
407410
}
408-
}
409-
while (beforeIndex < beforeLines.length) {
410-
fullDiffLines.add(" " + beforeLines[beforeIndex]);
411-
beforeIndex++;
412-
}
413-
String fullDiff = String.join("\n", fullDiffLines);
414411

415-
// JGit seems to put BOMs in weird locations somewhere in the files
416-
// We need to remove those or the regex matching for the lines fails
417-
fullDiff = BOM_PATTERN.matcher(fullDiff).replaceAll("");
412+
String beforeLine;
413+
while ((beforeLine = before.readLine()) != null) {
414+
fullDiffLines.add(" " + beforeLine);
415+
}
416+
String fullDiff = String.join(StringUtils.LINEBREAK, fullDiffLines);
417+
418+
// JGit seems to put BOMs in weird locations somewhere in the files
419+
// We need to remove those or the regex matching for the lines fails
420+
fullDiff = BOM_PATTERN.matcher(fullDiff).replaceAll("");
418421

419-
return fullDiff;
422+
return fullDiff;
423+
} catch (IOException e) {
424+
// Going up the call chain, at can be seen, that all callers need functions which do
425+
// not throw any checked exception, so just rethrow an unchecked one.
426+
throw new UncheckedIOException(e);
427+
}
420428
}
421429

422430
/**
@@ -426,7 +434,7 @@ public static String getFullDiff(String beforeFile, String gitDiff) {
426434
* @param filename The name of the file
427435
* @return The full content of the file before the commit
428436
*/
429-
public static DiffResult<String> getBeforeFullFile(Git git, RevCommit commit, String filename) {
437+
public static DiffResult<BufferedReader> getBeforeFullFile(Git git, RevCommit commit, String filename) {
430438
RevTree tree = commit.getTree();
431439

432440
try (TreeWalk treeWalk = new TreeWalk(git.getRepository())) {
@@ -441,9 +449,7 @@ public static DiffResult<String> getBeforeFullFile(Git git, RevCommit commit, St
441449

442450
ObjectId objectId = treeWalk.getObjectId(0);
443451
ObjectLoader loader = git.getRepository().open(objectId);
444-
ByteArrayOutputStream stream = new ByteArrayOutputStream();
445-
loader.copyTo(stream);
446-
return DiffResult.Success(stream.toString(StandardCharsets.UTF_8));
452+
return DiffResult.Success(new BufferedReader(new InputStreamReader(loader.openStream())));
447453
} catch (IOException e) {
448454
return DiffResult.Failure(DiffError.COULD_NOT_OBTAIN_FULLDIFF, "Could not obtain full diff of file " + filename + " before commit " + commit + "!");
449455
}

0 commit comments

Comments
 (0)