Skip to content

Commit a9009d3

Browse files
committed
Document the most important parts of my thesis code
1 parent 29a9064 commit a9009d3

2 files changed

Lines changed: 91 additions & 0 deletions

File tree

src/main/java/org/variantsync/diffdetective/validation/ConstructionValidation.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,24 @@
5151
import static org.variantsync.diffdetective.variation.diff.Time.BEFORE;
5252

5353
/**
54+
* Validates, evaluates and benchmarks the construction of {@link DiffTree}s using Gumtree.
55+
*
56+
* This experiment computes the variation diff from
57+
* <ol>
58+
* <li>a line matching ({@link DiffTreeParser#createDiffTree Viegener's algorithm}
59+
* <li>a tree matching computed by Gumtree ({@link DiffTree#compareUsingMatching}
60+
* <li>a hybrid matching ({@link DiffTree#improveMatching})
61+
* </ol>
62+
* compares them using some quality metrics and stores timing statistics.
63+
*
5464
* @author Benjamin Moosherr
65+
* @see "Constructing Variation Diffs Using Tree Diffing Algorithms"
5566
*/
5667
public class ConstructionValidation implements Analysis.Hooks {
5768
public static final ResultKey<Result> RESULT = new ResultKey<>("ConstructionValidation");
69+
/**
70+
* Aggregate of the results of the three comparisons.
71+
*/
5872
public static final class Result implements Metadata<Result> {
5973
public ComparisonResult[] comparisons = new ComparisonResult[] {
6074
new ComparisonResult("old vs new"),
@@ -88,11 +102,19 @@ public void setFromSnapshot(LinkedHashMap<String, String> snap) {
88102
}
89103
}
90104

105+
/**
106+
* Timing of a variation diff construction with a specific matching algorithm and quality results compared to another variation diff.
107+
*/
91108
public static final class ComparisonResult implements Metadata<ComparisonResult> {
92109
public String name;
110+
/** Duration of the matching computation. */
93111
public long comparisonDuration = 0;
112+
/** How many variation diffs are equal to the compared variation diff. */
94113
public int equal = 0;
114+
/** How many variation diffs are different to the compared variation diff. */
95115
public int different = 0;
116+
/** Counts of edit class flows (edit class pair of a projection of the compared
117+
* variation diffs) */
96118
public MergeMap<EditClass, MergeMap<EditClass, Integer>> editClassMovements =
97119
new MergeMap<>(new HashMap<>(), (a, b) -> {
98120
a.putAll(b);
@@ -352,6 +374,11 @@ private void counts(DiffTree tree, DiffTreeStatistics statistics) {
352374
private DiffTree parseVariationTree(Analysis analysis, RevCommit commit) throws IOException, DiffParseException {
353375
try (BufferedReader afterFile =
354376
new BufferedReader(
377+
/*
378+
* JGit may insert a BOM (byte order mask, a Unicode feature) at unfortunate places
379+
* (e.g. at the start of a diff, right before a {@code +}, {@code -} or space.
380+
* Hence, BOMs need to be removed. A similar heuristic is implemented in {@link GitDiffer#getFullDiff()}.
381+
*/
355382
new CharacterFilterReader(
356383
GitDiffer.getBeforeFullFile(
357384
analysis.getRepository().getGitRepo().run(),

src/main/java/org/variantsync/diffdetective/variation/diff/DiffTree.java

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,11 +501,14 @@ public static <A extends VariationNode<A>, B extends VariationNode<B>> DiffNode
501501
* {@code matcher}. The result of this function is {@code before} which is modified in-place. In
502502
* contrast, {@code after} is kept in tact.
503503
*
504+
* Warning: Modifications to {@code before} shouldn't concurrently modify {@code after}.
505+
*
504506
* Note: There are currently no guarantees about the line numbers. But it is guaranteed that
505507
* {@link DiffNode#getID} is unique.
506508
*
507509
* @param before the variation tree before an edit
508510
* @param after the variation tree after an edit
511+
* @see "Constructing Variation Diffs Using Tree Diffing Algorithms"
509512
*/
510513
public static <B extends VariationNode<B>> DiffNode compareUsingMatching(
511514
DiffNode before,
@@ -533,6 +536,13 @@ public static <B extends VariationNode<B>> DiffNode compareUsingMatching(
533536
return before;
534537
}
535538

539+
/**
540+
* Remove all nodes from the {@code BEFORE} projection which aren't part of a mapping.
541+
*
542+
* @param mappings the matching between the {@code BEFORE} projection of {@code root} some
543+
* variation tree
544+
* @param root the variation diff whose before projection is modified
545+
*/
536546
private static void removeUnmapped(MappingStore mappings, DiffTreeAdapter root) {
537547
for (var node : root.preOrder()) {
538548
Tree dst = mappings.getDstForSrc(node);
@@ -544,6 +554,18 @@ private static void removeUnmapped(MappingStore mappings, DiffTreeAdapter root)
544554
}
545555
}
546556

557+
/**
558+
* Recursively adds {@code afterNode} to {@code parent} reusing matched nodes.
559+
*
560+
* The variation diff {@code parent} is modified in-place such that its {@code AFTER}
561+
* projection contains a child equivalent to {@code afterNode} which shares matched nodes with
562+
* the {@code BEFORE} projection of {@code parent}.
563+
*
564+
* @param mappings the matching between the {@code BEFORE} projection of {@code root} and a
565+
* variation tree containing {@code afterNode}
566+
* @param parent the variation diff whose {@code AFTER} projection is modified
567+
* @param afterNode a desired child of {@code parent}'s {@code AFTER} projection
568+
*/
547569
private static void addUnmapped(MappingStore mappings, DiffNode parent, VariationTreeAdapter afterNode) {
548570
VariationNode<?> variationNode = afterNode.getVariationNode();
549571
DiffNode diffNode;
@@ -564,6 +586,7 @@ private static void addUnmapped(MappingStore mappings, DiffNode parent, Variatio
564586
} else {
565587
diffNode = ((DiffTreeAdapter)src).getDiffNode();
566588
if (diffNode.getParent(AFTER) != null) {
589+
// Always drop and reinsert it because it could have moved.
567590
diffNode.drop(AFTER);
568591
}
569592
}
@@ -578,6 +601,8 @@ private static void addUnmapped(MappingStore mappings, DiffNode parent, Variatio
578601
/**
579602
* Run {@code matcher} on the implicit matching of this variation diff and update this variation
580603
* tree in-place to reflect the new matching.
604+
*
605+
* @see improveMatching(DiffNode tree, Matcher matcher)
581606
*/
582607
public void improveMatching(Matcher matcher) {
583608
improveMatching(getRoot(), matcher);
@@ -586,6 +611,13 @@ public void improveMatching(Matcher matcher) {
586611
/**
587612
* Run {@code matcher} on the matching extracted from {@code tree} and modify {@code tree}
588613
* in-place to reflect the new matching.
614+
*
615+
* This is equivalent to {@code compareUsingMatching} except that the existing implicit matching
616+
* is {@link extractMatching extracted} and used as basis for the new matching. Hence, this
617+
* method is mostly an optimisation to avoid a copy of the {@code AFTER} projection of {@code
618+
* tree}.
619+
*
620+
* @see "Constructing Variation Diffs Using Tree Diffing Algorithms"
589621
*/
590622
public static DiffNode improveMatching(DiffNode tree, Matcher matcher) {
591623
var src = new DiffTreeAdapter(tree, BEFORE);
@@ -627,6 +659,18 @@ public static DiffNode improveMatching(DiffNode tree, Matcher matcher) {
627659
return tree;
628660
}
629661

662+
/**
663+
* Removes the implicit matching between the {@code BEFORE} and {@code AFTER} projection of
664+
* {@code beforeNode}. This is achieved by copying {@code beforeNode} and reconnecting all
665+
* necessary edges such that the new node exists only after and {@code beforeNode} only exists
666+
* before the edit.
667+
*
668+
* This method doesn't change the {@code BEFORE} and {@code AFTER} projection of {@code
669+
* beforeNode}.
670+
*
671+
* @param beforeNode the node to be split
672+
* @return a copy of {@code beforeNode} existing only after the edit.
673+
*/
630674
private static DiffNode splitNode(DiffNode beforeNode) {
631675
Assert.assertTrue(beforeNode.isNon());
632676

@@ -646,6 +690,17 @@ private static DiffNode splitNode(DiffNode beforeNode) {
646690
return afterNode;
647691
}
648692

693+
/**
694+
* Merges {@code afterNode} into {@code beforeNode} such that {@code beforeNode.isNon() ==
695+
* true}. Essentially, an implicit matching is inserted between {@code beforeNode} and {@code
696+
* afterNode}.
697+
*
698+
* This method doesn't change the {@code BEFORE} and {@code AFTER} projection of {@code
699+
* beforeNode}.
700+
*
701+
* @param beforeNode the node which is will exist {@code BEFORE} and {@code AFTER} the edit
702+
* @param afterNode the node which is discarded
703+
*/
649704
private static void joinNode(DiffNode beforeNode, DiffNode afterNode) {
650705
Assert.assertTrue(beforeNode.isRem());
651706
Assert.assertTrue(afterNode.isAdd());
@@ -659,6 +714,15 @@ private static void joinNode(DiffNode beforeNode, DiffNode afterNode) {
659714
afterNode.drop(AFTER);
660715
}
661716

717+
/**
718+
* Makes the implicit matching of a {@code DiffTree} explicit.
719+
*
720+
* @param src the source nodes of the matching, must be of the same {@link DiffTree} as {@code
721+
* dst.
722+
* @param dst the destination nodes of the matching, must be of the same {@link DiffTree} as
723+
* {@code src}
724+
* @param result the destination where the matching between {@code src} and {@code dst} is added
725+
*/
662726
private static void extractMatching(
663727
DiffTreeAdapter src,
664728
DiffTreeAdapter dst,

0 commit comments

Comments
 (0)