Skip to content

Commit 32c9dea

Browse files
committed
Compile regular expressions only once
For performance reasons regexes should be compiled only once, because it takes considerable time to compile them. The simplest solution is to compile these regexes in a static context, so that they are only compiled when the class is loaded. A further optimisation might be to compile them lazily (only compile regexes when they are actually needed) to prevent slow startup times. Because regexes are mostly implementation details they are made private unless they clearly belong to a public API (e.g. `LINEBREAK_REGEX`).
1 parent b1950c2 commit 32c9dea

6 files changed

Lines changed: 32 additions & 23 deletions

File tree

src/main/java/org/variantsync/diffdetective/datasets/predefined/MarlinCPPDiffLineFormulaExtractor.java

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,26 @@
22

33
import org.variantsync.diffdetective.feature.CPPDiffLineFormulaExtractor;
44

5+
import java.util.regex.Pattern;
6+
57
/**
68
* Extracts formulas from preprocessor annotations in the marlin firmware.
79
* In particular, it resolves the 'ENABLED' and 'DISABLED' macros that are used in Marlin
810
* to check for features being (de-)selected.
911
*/
1012
public class MarlinCPPDiffLineFormulaExtractor extends CPPDiffLineFormulaExtractor {
13+
private static Pattern ENABLED_PATTERN = Pattern.compile("ENABLED\\s*\\(([^)]*)\\)");
14+
private static Pattern DISABLED_PATTERN = Pattern.compile("DISABLED\\s*\\(([^)]*)\\)");
15+
1116
@Override
1217
protected String resolveFeatureMacroFunctions(String formula) {
13-
return super.resolveFeatureMacroFunctions(formula)
14-
.replaceAll("ENABLED\\s*\\(([^)]*)\\)", "$1")
15-
.replaceAll("DISABLED\\s*\\(([^)]*)\\)", "!($1)");
18+
return
19+
replaceAll(ENABLED_PATTERN, "$1",
20+
replaceAll(DISABLED_PATTERN, "!($1)",
21+
super.resolveFeatureMacroFunctions(formula)));
22+
}
23+
24+
private String replaceAll(Pattern pattern, String replacement, String string) {
25+
return pattern.matcher(string).replaceAll(replacement);
1626
}
1727
}

src/main/java/org/variantsync/diffdetective/diff/GitDiffer.java

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,10 @@
5353
* @author Soeren Viegener, Paul Maximilian Bittner
5454
*/
5555
public class GitDiffer {
56-
public static final String BOM_REGEX = "\\x{FEFF}";
57-
public static final String DIFF_HUNK_REGEX = "^@@\\s-(\\d+).*\\+(\\d+).*@@$";
58-
public static final String DIFF_HEADER_REGEX = "^\\+\\+\\+.*$";
59-
public static final String NO_NEW_LINE = "\\ No newline at end of file";
56+
private static final Pattern BOM_PATTERN = Pattern.compile("\\x{FEFF}");
57+
private static final Pattern DIFF_HUNK_PATTERN = Pattern.compile( "^@@\\s-(\\d+).*\\+(\\d+).*@@$");
58+
private static final Pattern DIFF_HEADER_PATTERN = Pattern.compile( "^\\+\\+\\+.*$", Pattern.MULTILINE);
59+
private static final String NO_NEW_LINE = "\\ No newline at end of file";
6060

6161
private final Git git;
6262
private final DiffFilter diffFilter;
@@ -339,8 +339,7 @@ private static DiffResult<PatchDiff> createPatchDiff(
339339
final String gitDiff,
340340
String beforeFullFile,
341341
final ParseOptions parseOptions) {
342-
final Pattern headerPattern = Pattern.compile(DIFF_HEADER_REGEX, Pattern.MULTILINE);
343-
final Matcher matcher = headerPattern.matcher(gitDiff);
342+
final Matcher matcher = DIFF_HEADER_PATTERN.matcher(gitDiff);
344343
final String strippedDiff;
345344
if (matcher.find()) {
346345
strippedDiff = gitDiff.substring(matcher.end() + 1);
@@ -378,16 +377,15 @@ private static DiffResult<PatchDiff> createPatchDiff(
378377
* @return A full git diff containing the complete file and all changes
379378
*/
380379
public static String getFullDiff(String beforeFile, String gitDiff) {
381-
String[] beforeLines = beforeFile.split(LINEBREAK_REGEX, -1);
382-
String[] diffLines = gitDiff.split(LINEBREAK_REGEX);
380+
String[] beforeLines = LINEBREAK_REGEX.split(beforeFile, -1);
381+
String[] diffLines = LINEBREAK_REGEX.split(gitDiff);
383382

384383
int beforeIndex = 0;
385384

386385
List<String> fullDiffLines = new ArrayList<>();
387386

388387
for (String diffLine : diffLines) {
389-
Pattern diffHunkPattern = Pattern.compile(DIFF_HUNK_REGEX);
390-
Matcher matcher = diffHunkPattern.matcher(diffLine);
388+
Matcher matcher = DIFF_HUNK_PATTERN.matcher(diffLine);
391389

392390
if (matcher.find()) {
393391
// found diffHunkRegex
@@ -416,7 +414,7 @@ public static String getFullDiff(String beforeFile, String gitDiff) {
416414

417415
// JGit seems to put BOMs in weird locations somewhere in the files
418416
// We need to remove those or the regex matching for the lines fails
419-
fullDiff = fullDiff.replaceAll(BOM_REGEX, "");
417+
fullDiff = BOM_PATTERN.matcher(fullDiff).replaceAll("");
420418

421419
return fullDiff;
422420
}

src/main/java/org/variantsync/diffdetective/diff/difftree/DiffNode.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -738,7 +738,7 @@ public void assertSemanticConsistency() {
738738
}
739739

740740
public static String toTextDiffLine(final DiffType diffType, final String text) {
741-
return diffType.symbol + text.replaceAll(StringUtils.LINEBREAK_REGEX, StringUtils.LINEBREAK + diffType.symbol);
741+
return diffType.symbol + StringUtils.LINEBREAK_REGEX.matcher(text).replaceAll(StringUtils.LINEBREAK + diffType.symbol);
742742
}
743743

744744
public String toTextDiffLine() {

src/main/java/org/variantsync/diffdetective/diff/difftree/parse/DiffTreeParser.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,6 @@
2626
import java.util.concurrent.atomic.AtomicReference;
2727
import java.util.function.BiConsumer;
2828

29-
import static org.variantsync.diffdetective.util.StringUtils.LINEBREAK_REGEX;
30-
3129
public class DiffTreeParser {
3230
/**
3331
* Implementation of the diff tree algorithm.

src/main/java/org/variantsync/diffdetective/feature/CPPDiffLineFormulaExtractor.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,10 @@
1414
*/
1515
public class CPPDiffLineFormulaExtractor {
1616
// ^[+-]?\s*#\s*(if|ifdef|ifndef|elif)(\s+(.*)|\((.*)\))$
17-
public static final String CPP_ANNOTATION_REGEX = "^[+-]?\\s*#\\s*(if|ifdef|ifndef|elif)(\\s+(.*)|\\((.*)\\))$";
18-
public static final Pattern CPP_ANNOTATION_REGEX_PATTERN = Pattern.compile(CPP_ANNOTATION_REGEX);
17+
private static final String CPP_ANNOTATION_REGEX = "^[+-]?\\s*#\\s*(if|ifdef|ifndef|elif)(\\s+(.*)|\\((.*)\\))$";
18+
private static final Pattern CPP_ANNOTATION_REGEX_PATTERN = Pattern.compile(CPP_ANNOTATION_REGEX);
19+
private static final Pattern COMMENT_PATTERN = Pattern.compile("/\\*.*\\*/");
20+
private static final Pattern DEFINED_PATTERN = Pattern.compile("defined\\(([^)]*)\\)");
1921

2022
protected String resolveFeatureMacroFunctions(String formula) {
2123
return formula;
@@ -27,7 +29,7 @@ protected String resolveFeatureMacroFunctions(String formula) {
2729
* @return The feature mapping as a String of the given line
2830
*/
2931
public String extractFormula(final String line) throws IllFormedAnnotationException {
30-
// TODO: There are so many regexes here in replaceAll that could be optimized by precompiling the regexes once.
32+
// TODO: There still regexes here in replaceAll that could be optimized by precompiling the regexes once.
3133
final Matcher matcher = CPP_ANNOTATION_REGEX_PATTERN.matcher(line);
3234

3335
String fm;
@@ -43,14 +45,14 @@ public String extractFormula(final String line) throws IllFormedAnnotationExcept
4345

4446
// remove comments
4547
fm = fm.split("//")[0];
46-
fm = fm.replaceAll("/\\*.*\\*/", "");
48+
fm = COMMENT_PATTERN.matcher(fm).replaceAll("");
4749

4850
// remove whitespace
4951
fm = fm.trim();
5052
fm = fm.replaceAll("\\s", "");
5153

5254
// remove defined()
53-
fm = fm.replaceAll("defined\\(([^)]*)\\)", "$1");
55+
fm = DEFINED_PATTERN.matcher(fm).replaceAll("$1");
5456
fm = fm.replaceAll("defined ", " ");
5557
fm = resolveFeatureMacroFunctions(fm);
5658

src/main/java/org/variantsync/diffdetective/util/StringUtils.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
package org.variantsync.diffdetective.util;
22

33
import java.util.Collection;
4+
import java.util.regex.Pattern;
45

56
public class StringUtils {
67
public final static String LINEBREAK = "\r\n";
7-
public final static String LINEBREAK_REGEX = "\\r\\n|\\r|\\n";
8+
public final static Pattern LINEBREAK_REGEX = Pattern.compile("\\r\\n|\\r|\\n");
89

910

1011
public static void clear(final StringBuilder builder) {

0 commit comments

Comments
 (0)