Skip to content

Commit f200c7a

Browse files
refactor: pull up interface of CPP diff line extraction and abstraction in preparation for JPP parsing
1 parent def9aa7 commit f200c7a

5 files changed

Lines changed: 195 additions & 75 deletions

File tree

src/main/java/org/variantsync/diffdetective/datasets/predefined/MarlinCPPDiffLineFormulaExtractor.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ public class MarlinCPPDiffLineFormulaExtractor extends CPPDiffLineFormulaExtract
1616
private static final Pattern DISABLED_PATTERN = Pattern.compile("DISABLED\\s*\\(([^)]*)\\)");
1717

1818
@Override
19-
protected String resolveFeatureMacroFunctions(String formula) {
19+
public String resolveFeatureMacroFunctions(String formula) {
2020
return
2121
replaceAll(ENABLED_PATTERN, "$1",
2222
replaceAll(DISABLED_PATTERN, "!($1)",
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
package org.variantsync.diffdetective.feature;
2+
3+
import org.tinylog.Logger;
4+
import org.variantsync.diffdetective.error.UncheckedUnParseableFormulaException;
5+
import org.variantsync.diffdetective.error.UnparseableFormulaException;
6+
import org.variantsync.diffdetective.feature.cpp.AbstractingCExpressionVisitor;
7+
import org.variantsync.diffdetective.feature.cpp.ControllingCExpressionVisitor;
8+
9+
import java.util.function.Supplier;
10+
import java.util.regex.Matcher;
11+
import java.util.regex.Pattern;
12+
13+
/**
14+
* AbstractingFormulaExtractor is an abstract class that extracts a formula from text containing a conditional annotation,
15+
* and then abstracts the formula using the custom {@link #abstractFormula(String)} implementation of its subclass.
16+
* The extraction of a formula is controlled by a {@link Pattern} with which an AbstractingFormulaExtractor is initialized.
17+
* The given text might also be a line in a diff (i.e., preceeded by a '-' or '+').
18+
*
19+
* <p>
20+
* For example, given the annotation "#if defined(A) || B()", the extractor should extract the formula
21+
* "defined(A) || B". It would then hand this formula to the {@link #abstractFormula(String)} method for abstraction
22+
* (e.g., to substitute the 'defined(A)' macro call with 'DEFINED_A').
23+
* </p>
24+
* @author Paul Bittner, Sören Viegener, Benjamin Moosherr, Alexander Schultheiß
25+
*/
26+
public abstract class AbstractingFormulaExtractor implements DiffLineFormulaExtractor {
27+
private final Pattern annotationPattern;
28+
29+
/**
30+
* Initialize a new AbstractingFormulaExtractor object that uses the given Pattern to identify formulas in annotations.
31+
* See {@link org.variantsync.diffdetective.feature.cpp.CPPDiffLineFormulaExtractor} for an example of how such a pattern
32+
* could look like.
33+
* @param annotationPattern The pattern used for formula extraction
34+
*/
35+
public AbstractingFormulaExtractor(Pattern annotationPattern) {
36+
this.annotationPattern = annotationPattern;
37+
}
38+
39+
/**
40+
* Extracts the feature formula as a string from a piece of text (possibly within a diff) and abstracts it.
41+
*
42+
* @param text The text of which to extract the formula
43+
* @return The extracted and abstracted formula
44+
*/
45+
@Override
46+
public String extractFormula(final String text) throws UnparseableFormulaException {
47+
final Matcher matcher = annotationPattern.matcher(text);
48+
final Supplier<UnparseableFormulaException> couldNotExtractFormula = () ->
49+
new UnparseableFormulaException("Could not extract formula from line \"" + text + "\".");
50+
51+
// Retrieve the formula from the macro line
52+
String fm;
53+
if (matcher.find()) {
54+
if (matcher.group(3) != null) {
55+
fm = matcher.group(3);
56+
} else {
57+
fm = matcher.group(4);
58+
}
59+
} else {
60+
throw couldNotExtractFormula.get();
61+
}
62+
63+
// abstract complex formulas (e.g., if they contain arithmetics or macro calls)
64+
try {
65+
fm = abstractFormula(fm);
66+
} catch (UncheckedUnParseableFormulaException e) {
67+
throw e.inner();
68+
} catch (Exception e) {
69+
Logger.warn(e);
70+
throw new UnparseableFormulaException(e);
71+
}
72+
73+
if (fm.isEmpty()) {
74+
throw couldNotExtractFormula.get();
75+
}
76+
77+
return fm;
78+
}
79+
80+
/**
81+
* Abstract the given formula (e.g., by substituting parts of the formula with predefined String literals).
82+
* See {@link org.variantsync.diffdetective.feature.cpp.CPPDiffLineFormulaExtractor} for an example of how this could
83+
* be done.
84+
*
85+
* @param formula that is to be abstracted
86+
* @return the abstracted formula
87+
*/
88+
protected abstract String abstractFormula(String formula);
89+
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
package org.variantsync.diffdetective.feature;
2+
3+
import org.variantsync.diffdetective.error.UnparseableFormulaException;
4+
5+
/**
6+
* Extracts the expression from a C preprocessor statement.
7+
* For example, given the annotation "#if defined(A) || B()", the extractor would extract
8+
* "A || B". The extractor detects if, ifdef, ifndef and elif annotations.
9+
* (Other annotations do not have expressions.)
10+
* The given pre-processor statement might also a line in a diff (i.e., preceeded by a - or +).
11+
*
12+
* @author Paul Bittner, Sören Viegener, Benjamin Moosherr, Alexander Schultheiß
13+
*/
14+
public interface DiffLineFormulaExtractor {
15+
/**
16+
* Extracts the feature formula as a string from a macro line (possibly within a diff).
17+
*
18+
* @param line The line of which to get the feature mapping
19+
* @return The feature mapping as a String of the given line
20+
*/
21+
String extractFormula(final String line) throws UnparseableFormulaException;
22+
23+
/**
24+
* Resolves any macros in the given formula that are relevant for feature annotations.
25+
* For example, in {@link org.variantsync.diffdetective.datasets.predefined.MarlinCPPDiffLineFormulaExtractor Marlin},
26+
* feature annotations are given by the custom <code>ENABLED</code> and <code>DISABLED</code> macros,
27+
* which have to be unwrapped.
28+
*
29+
* @param formula The formula whose feature macros to resolve.
30+
* @return The parseable formula as string. The default implementation returns the input string.
31+
*/
32+
default String resolveFeatureMacroFunctions(String formula) {
33+
return formula;
34+
}
35+
}
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
package org.variantsync.diffdetective.feature;
2+
3+
import org.antlr.v4.runtime.ANTLRErrorListener;
4+
import org.antlr.v4.runtime.Parser;
5+
import org.antlr.v4.runtime.RecognitionException;
6+
import org.antlr.v4.runtime.Recognizer;
7+
import org.antlr.v4.runtime.atn.ATNConfigSet;
8+
import org.antlr.v4.runtime.dfa.DFA;
9+
import org.tinylog.Logger;
10+
import org.variantsync.diffdetective.error.UncheckedUnParseableFormulaException;
11+
12+
import java.util.BitSet;
13+
14+
/**
15+
* A ParseErrorListener listens to syntactical errors discovered by an ANTLR parser while parsing a text. Encountered
16+
* errors are logged as warnings so that they can later be analyzed.
17+
* <p>
18+
* Logged warning might indicate that the ANTLR grammar used for parsing is imprecise or incomplete. However, it might
19+
* also simply be the case that the input text is indeed syntactically invalid.
20+
* </p>
21+
* @author Alexander Schultheiß
22+
*/
23+
public class ParseErrorListener implements ANTLRErrorListener {
24+
private final String formula;
25+
26+
public ParseErrorListener(String formula) {
27+
this.formula = formula;
28+
}
29+
30+
@Override
31+
public void syntaxError(Recognizer<?, ?> recognizer, Object o, int i, int i1, String s, RecognitionException e) {
32+
Logger.warn("syntax error: {} ; {}", s, e);
33+
Logger.warn("formula: {}", formula);
34+
throw new UncheckedUnParseableFormulaException(s, e);
35+
}
36+
37+
@Override
38+
public void reportAmbiguity(Parser parser, DFA dfa, int i, int i1, boolean b, BitSet bitSet, ATNConfigSet atnConfigSet) {
39+
// Do nothing
40+
}
41+
42+
@Override
43+
public void reportAttemptingFullContext(Parser parser, DFA dfa, int i, int i1, BitSet bitSet, ATNConfigSet atnConfigSet) {
44+
// Do nothing
45+
}
46+
47+
@Override
48+
public void reportContextSensitivity(Parser parser, DFA dfa, int i, int i1, int i2, ATNConfigSet atnConfigSet) {
49+
// Do nothing
50+
}
51+
}
Lines changed: 19 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,13 @@
11
package org.variantsync.diffdetective.feature.cpp;
22

3-
import org.antlr.v4.runtime.*;
4-
import org.antlr.v4.runtime.atn.ATNConfigSet;
5-
import org.antlr.v4.runtime.dfa.DFA;
6-
import org.antlr.v4.runtime.tree.ParseTree;
7-
import org.tinylog.Logger;
8-
import org.variantsync.diffdetective.error.UncheckedUnParseableFormulaException;
3+
import org.antlr.v4.runtime.CharStreams;
4+
import org.antlr.v4.runtime.CommonTokenStream;
95
import org.variantsync.diffdetective.error.UnparseableFormulaException;
6+
import org.variantsync.diffdetective.feature.AbstractingFormulaExtractor;
7+
import org.variantsync.diffdetective.feature.ParseErrorListener;
108
import org.variantsync.diffdetective.feature.antlr.CExpressionLexer;
119
import org.variantsync.diffdetective.feature.antlr.CExpressionParser;
1210

13-
import java.util.BitSet;
14-
import java.util.function.Supplier;
1511
import java.util.regex.Matcher;
1612
import java.util.regex.Pattern;
1713

@@ -22,24 +18,15 @@
2218
* (Other annotations do not have expressions.)
2319
* The given pre-processor statement might also a line in a diff (i.e., preceeded by a - or +).
2420
*
25-
* @author Paul Bittner, Sören Viegener, Benjamin Moosherr
21+
* @author Paul Bittner, Sören Viegener, Benjamin Moosherr, Alexander Schultheiß
2622
*/
27-
public class CPPDiffLineFormulaExtractor {
23+
public class CPPDiffLineFormulaExtractor extends AbstractingFormulaExtractor {
2824
// ^[+-]?\s*#\s*(if|ifdef|ifndef|elif)(\s+(.*)|\((.*)\))$
2925
private static final String CPP_ANNOTATION_REGEX = "^[+-]?\\s*#\\s*(if|ifdef|ifndef|elif)(\\s+(.*)|(\\(.*\\)))$";
30-
private static final Pattern CPP_ANNOTATION_REGEX_PATTERN = Pattern.compile(CPP_ANNOTATION_REGEX);
26+
private static final Pattern CPP_ANNOTATION_PATTERN = Pattern.compile(CPP_ANNOTATION_REGEX);
3127

32-
/**
33-
* Resolves any macros in the given formula that are relevant for feature annotations.
34-
* For example, in {@link org.variantsync.diffdetective.datasets.predefined.MarlinCPPDiffLineFormulaExtractor Marlin},
35-
* feature annotations are given by the custom <code>ENABLED</code> and <code>DISABLED</code> macros,
36-
* which have to be unwrapped.
37-
*
38-
* @param formula The formula whose feature macros to resolve.
39-
* @return The parseable formula as string. The default implementation returns the input string.
40-
*/
41-
protected String resolveFeatureMacroFunctions(String formula) {
42-
return formula;
28+
public CPPDiffLineFormulaExtractor() {
29+
super(CPP_ANNOTATION_PATTERN);
4330
}
4431

4532
/**
@@ -48,39 +35,14 @@ protected String resolveFeatureMacroFunctions(String formula) {
4835
* @param line The line of which to get the feature mapping
4936
* @return The feature mapping as a String of the given line
5037
*/
38+
@Override
5139
public String extractFormula(final String line) throws UnparseableFormulaException {
52-
final Matcher matcher = CPP_ANNOTATION_REGEX_PATTERN.matcher(line);
53-
final Supplier<UnparseableFormulaException> couldNotExtractFormula = () ->
54-
new UnparseableFormulaException("Could not extract formula from line \"" + line + "\".");
55-
56-
// Retrieve the formula from the macro line
57-
String fm;
58-
if (matcher.find()) {
59-
if (matcher.group(3) != null) {
60-
fm = matcher.group(3);
61-
} else {
62-
fm = matcher.group(4);
63-
}
64-
} else {
65-
throw couldNotExtractFormula.get();
66-
}
67-
68-
// abstract complex formulas (e.g., if they contain arithmetics or macro calls)
69-
try {
70-
fm = abstractFormula(fm);
71-
} catch (UncheckedUnParseableFormulaException e) {
72-
throw e.inner();
73-
} catch (Exception e) {
74-
Logger.warn(e);
75-
throw new UnparseableFormulaException(e);
76-
}
77-
78-
if (fm.isEmpty()) {
79-
throw couldNotExtractFormula.get();
80-
}
40+
// Delegate the formula extraction to AbstractingFormulaExtractor
41+
String fm = super.extractFormula(line);
8142

8243
// negate for ifndef
83-
if ("ifndef".equals(matcher.group(1))) {
44+
final Matcher matcher = CPP_ANNOTATION_PATTERN.matcher(line);
45+
if (matcher.find() && "ifndef".equals(matcher.group(1))) {
8446
fm = "!(" + fm + ")";
8547
}
8648

@@ -99,31 +61,14 @@ public String extractFormula(final String line) throws UnparseableFormulaExcepti
9961
* @param formula that is to be abstracted
10062
* @return the abstracted formula
10163
*/
102-
private String abstractFormula(String formula) {
64+
@Override
65+
protected String abstractFormula(String formula) {
10366
CExpressionLexer lexer = new CExpressionLexer(CharStreams.fromString(formula));
10467
CommonTokenStream tokens = new CommonTokenStream(lexer);
105-
CExpressionParser parser = new CExpressionParser(tokens);
106-
parser.addErrorListener(new ANTLRErrorListener() {
107-
@Override
108-
public void syntaxError(Recognizer<?, ?> recognizer, Object o, int i, int i1, String s, RecognitionException e) {
109-
Logger.warn("syntax error: {} ; {}", s, e);
110-
Logger.warn("formula: {}", formula);
111-
throw new UncheckedUnParseableFormulaException(s, e);
112-
}
113-
114-
@Override
115-
public void reportAmbiguity(Parser parser, DFA dfa, int i, int i1, boolean b, BitSet bitSet, ATNConfigSet atnConfigSet) {
116-
}
11768

118-
@Override
119-
public void reportAttemptingFullContext(Parser parser, DFA dfa, int i, int i1, BitSet bitSet, ATNConfigSet atnConfigSet) {
120-
}
69+
CExpressionParser parser = new CExpressionParser(tokens);
70+
parser.addErrorListener(new ParseErrorListener(formula));
12171

122-
@Override
123-
public void reportContextSensitivity(Parser parser, DFA dfa, int i, int i1, int i2, ATNConfigSet atnConfigSet) {
124-
}
125-
});
126-
ParseTree tree = parser.expression();
127-
return tree.accept(new ControllingCExpressionVisitor()).toString();
72+
return parser.expression().accept(new ControllingCExpressionVisitor()).toString();
12873
}
12974
}

0 commit comments

Comments
 (0)