Skip to content

Commit d3b8320

Browse files
balajirraogbrail
authored andcommitted
Introduce fast path for RegExp.prototype[Symbol.replace]
1 parent 6dc41fa commit d3b8320

1 file changed

Lines changed: 203 additions & 37 deletions

File tree

rhino/src/main/java/org/mozilla/javascript/regexp/NativeRegExp.java

Lines changed: 203 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import java.io.Serializable;
1010
import java.util.ArrayList;
1111
import java.util.HashMap;
12+
import java.util.LinkedHashMap;
1213
import java.util.List;
1314
import java.util.Map;
1415
import org.mozilla.javascript.AbstractEcmaObjectOperations;
@@ -3505,10 +3506,70 @@ && upcase(matchCh) == upcase((char) anchorCodePoint))) {
35053506
return false;
35063507
}
35073508

3509+
private static class ExecResult {
3510+
final String match;
3511+
final ArrayList<String> captures = new ArrayList<>();
3512+
final LinkedHashMap<String, String> groups = new LinkedHashMap<>();
3513+
final int index;
3514+
final String input;
3515+
3516+
ExecResult(int index, String input) {
3517+
this.match = null;
3518+
this.index = index;
3519+
this.input = input;
3520+
}
3521+
3522+
ExecResult(int index, String input, String match) {
3523+
this.match = match;
3524+
this.index = index;
3525+
this.input = input;
3526+
}
3527+
}
3528+
3529+
Object executeRegExp(
3530+
Context cx, Scriptable scope, RegExpImpl res, String str, int[] indexp, int matchType) {
3531+
var result = executeRegExpInternal(cx, scope, res, str, indexp, matchType);
3532+
3533+
if (result == null) {
3534+
if (matchType != PREFIX) return null;
3535+
return Undefined.instance;
3536+
} else if (matchType == TEST) {
3537+
/*
3538+
* Testing for a match and updating cx.regExpImpl: don't allocate
3539+
* an array object, do return true.
3540+
*/
3541+
return Boolean.TRUE;
3542+
} else {
3543+
Object[] captures = result.captures.toArray();
3544+
Scriptable obj = cx.newArray(scope, captures.length + 1);
3545+
3546+
obj.put(0, obj, result.match);
3547+
for (int i = 0; i < captures.length; i++) {
3548+
obj.put(i + 1, obj, (captures[i] == null) ? Undefined.instance : captures[i]);
3549+
}
3550+
3551+
obj.put("index", obj, Integer.valueOf(result.index));
3552+
obj.put("input", obj, str);
3553+
if (!result.groups.isEmpty()) {
3554+
var groups = new NativeObject();
3555+
for (var g : result.groups.entrySet()) {
3556+
groups.put(
3557+
g.getKey(),
3558+
groups,
3559+
g.getValue() == null ? Undefined.instance : g.getValue());
3560+
}
3561+
obj.put("groups", obj, groups);
3562+
} else {
3563+
obj.put("groups", obj, Undefined.instance);
3564+
}
3565+
return obj;
3566+
}
3567+
}
3568+
35083569
/*
35093570
* indexp is assumed to be an array of length 1
35103571
*/
3511-
Object executeRegExp(
3572+
ExecResult executeRegExpInternal(
35123573
Context cx, Scriptable scope, RegExpImpl res, String str, int[] indexp, int matchType) {
35133574
REGlobalData gData = new REGlobalData();
35143575

@@ -3519,37 +3580,26 @@ Object executeRegExp(
35193580
// Call the recursive matcher to do the real work.
35203581
//
35213582
boolean matches = matchRegExp(cx, gData, re, str, start, end, res.multiline);
3522-
if (!matches) {
3523-
if (matchType != PREFIX) return null;
3524-
return Undefined.instance;
3525-
}
3583+
if (!matches) return null;
3584+
35263585
int index = gData.cp;
35273586
int ep = indexp[0] = index;
35283587
int matchlen = ep - (start + gData.skipped);
35293588
index -= matchlen;
3530-
Object result;
3531-
Scriptable obj;
3532-
Scriptable groups = Undefined.SCRIPTABLE_UNDEFINED;
3589+
ExecResult result;
35333590

35343591
if (matchType == TEST) {
3535-
/*
3536-
* Testing for a match and updating cx.regExpImpl: don't allocate
3537-
* an array object, do return true.
3538-
*/
3539-
result = Boolean.TRUE;
3540-
obj = null;
3592+
result = new ExecResult(index, str);
35413593
} else {
35423594
/*
35433595
* The array returned on match has element 0 bound to the matched
35443596
* string, elements 1 through re.parenCount bound to the paren
35453597
* matches, an index property telling the length of the left context,
35463598
* and an input property referring to the input string.
35473599
*/
3548-
result = cx.newArray(scope, 0);
3549-
obj = (Scriptable) result;
35503600

35513601
String matchstr = str.substring(index, index + matchlen);
3552-
obj.put(0, obj, matchstr);
3602+
result = new ExecResult(index, str, matchstr);
35533603
}
35543604

35553605
if (re.parenCount == 0) {
@@ -3563,11 +3613,6 @@ Object executeRegExp(
35633613
if (matchType != TEST) {
35643614
namedCaptureGroups = new String[re.parenCount];
35653615

3566-
if (!re.namedCaptureGroups.isEmpty()) {
3567-
// We do a new NativeObject() and not cx.newObject(scope)
3568-
// since we want the groups to have null as prototype
3569-
groups = new NativeObject();
3570-
}
35713616
for (Map.Entry<String, List<Integer>> entry : re.namedCaptureGroups.entrySet()) {
35723617
String key = entry.getKey();
35733618
List<Integer> indices = entry.getValue();
@@ -3585,34 +3630,24 @@ Object executeRegExp(
35853630
parsub = new SubString(str, cap_index, cap_length);
35863631
res.parens[num] = parsub;
35873632
if (matchType != TEST) {
3588-
obj.put(num + 1, obj, parsub.toString());
3633+
result.captures.add(parsub.toString());
35893634
if (namedCaptureGroups[num] != null) {
3590-
groups.put(namedCaptureGroups[num], groups, parsub.toString());
3635+
result.groups.put(namedCaptureGroups[num], parsub.toString());
35913636
}
35923637
}
35933638
} else {
3639+
result.captures.add(null);
35943640
if (matchType != TEST) {
3595-
obj.put(num + 1, obj, Undefined.instance);
35963641
if (namedCaptureGroups[num] != null
3597-
&& !groups.has(namedCaptureGroups[num], groups)) {
3598-
groups.put(namedCaptureGroups[num], groups, Undefined.instance);
3642+
&& !result.groups.containsKey(namedCaptureGroups[num])) {
3643+
result.groups.put(namedCaptureGroups[num], null);
35993644
}
36003645
}
36013646
}
36023647
}
36033648
res.lastParen = parsub;
36043649
}
36053650

3606-
if (!(matchType == TEST)) {
3607-
/*
3608-
* Define the index and input properties last for better for/in loop
3609-
* order (so they come after the elements).
3610-
*/
3611-
obj.put("index", obj, Integer.valueOf(start + gData.skipped));
3612-
obj.put("input", obj, str);
3613-
obj.put("groups", obj, groups);
3614-
}
3615-
36163651
if (res.lastMatch == null) {
36173652
res.lastMatch = new SubString();
36183653
res.leftContext = new SubString();
@@ -4062,6 +4097,137 @@ private Object js_SymbolMatchAll(
40624097

40634098
private Object js_SymbolReplace(
40644099
Context cx, Scriptable scope, Scriptable thisObj, Object[] args) {
4100+
if (thisObj instanceof NativeRegExp) {
4101+
var regexp = (NativeRegExp) thisObj;
4102+
var exec = ScriptableObject.getProperty(regexp, "exec");
4103+
if ((regexp.lastIndexAttr & READONLY) == 0
4104+
&& exec instanceof IdFunctionObject
4105+
&& ((IdFunctionObject) exec).methodId() == Id_exec
4106+
&& ((IdFunctionObject) exec).getTag() == REGEXP_TAG)
4107+
return regexp.js_SymbolReplaceFast(cx, scope, (NativeRegExp) thisObj, args);
4108+
}
4109+
return js_SymbolReplaceSlow(cx, scope, thisObj, args);
4110+
}
4111+
4112+
private Object js_SymbolReplaceFast(
4113+
Context cx, Scriptable scope, NativeRegExp thisObj, Object[] args) {
4114+
String s = ScriptRuntime.toString(args.length > 0 ? args[0] : Undefined.instance);
4115+
int lengthS = s.length();
4116+
Object replaceValue = args.length > 1 ? args[1] : Undefined.instance;
4117+
boolean functionalReplace = replaceValue instanceof Callable;
4118+
List<ReplacementOperation> replaceOps;
4119+
Callable replaceFn;
4120+
if (!functionalReplace) {
4121+
replaceFn = null;
4122+
replaceOps =
4123+
AbstractEcmaStringOperations.buildReplacementList(
4124+
ScriptRuntime.toString(replaceValue));
4125+
} else {
4126+
replaceFn = (Callable) replaceValue;
4127+
replaceOps = List.of();
4128+
}
4129+
String flags = ScriptRuntime.toString(ScriptRuntime.getObjectProp(thisObj, "flags", cx));
4130+
boolean fullUnicode = flags.indexOf('u') != -1 || flags.indexOf('v') != -1;
4131+
4132+
List<ExecResult> results = new ArrayList<>();
4133+
boolean done = false;
4134+
4135+
RegExpImpl reImpl = getImpl(cx);
4136+
boolean sticky = (re.flags & JSREG_STICKY) != 0;
4137+
boolean global = (re.flags & JSREG_GLOB) != 0;
4138+
4139+
int[] indexp = {0};
4140+
if (sticky) {
4141+
indexp[0] = (int) getLastIndex(cx, thisObj);
4142+
}
4143+
while (!done) {
4144+
ExecResult result;
4145+
if (indexp[0] < 0 || indexp[0] > s.length()) {
4146+
result = null;
4147+
} else {
4148+
result = executeRegExpInternal(cx, scope, reImpl, s, indexp, MATCH);
4149+
}
4150+
if (result == null) {
4151+
if (global || sticky) {
4152+
indexp[0] = 0;
4153+
}
4154+
done = true;
4155+
} else {
4156+
results.add(result);
4157+
if (!global) {
4158+
done = true;
4159+
} else {
4160+
String matchStr = result.match;
4161+
if (matchStr.isEmpty()) {
4162+
indexp[0] =
4163+
(int) ScriptRuntime.advanceStringIndex(s, indexp[0], fullUnicode);
4164+
}
4165+
}
4166+
}
4167+
}
4168+
setLastIndex(thisObj, indexp[0]);
4169+
4170+
StringBuilder accumulatedResult = new StringBuilder();
4171+
int nextSourcePosition = 0;
4172+
for (ExecResult result : results) {
4173+
String matched = result.match;
4174+
int matchLength = matched.length();
4175+
double positionDbl = result.index;
4176+
int position = ScriptRuntime.clamp((int) positionDbl, 0, lengthS);
4177+
4178+
List<String> captures = result.captures;
4179+
Object namedCaptures;
4180+
if (!result.groups.isEmpty()) {
4181+
var groups = new NativeObject();
4182+
for (var g : result.groups.entrySet()) {
4183+
groups.put(
4184+
g.getKey(),
4185+
groups,
4186+
g.getValue() == null ? Undefined.instance : g.getValue());
4187+
}
4188+
namedCaptures = groups;
4189+
} else {
4190+
namedCaptures = Undefined.instance;
4191+
}
4192+
4193+
String replacementString =
4194+
functionalReplace
4195+
? makeComplexReplacement(
4196+
cx,
4197+
scope,
4198+
matched,
4199+
captures,
4200+
position,
4201+
s,
4202+
namedCaptures,
4203+
replaceFn)
4204+
: makeSimpleReplacement(
4205+
cx,
4206+
scope,
4207+
matched,
4208+
captures,
4209+
position,
4210+
s,
4211+
namedCaptures,
4212+
replaceOps);
4213+
4214+
if (position >= nextSourcePosition) {
4215+
accumulatedResult.append(s, nextSourcePosition, position);
4216+
accumulatedResult.append(replacementString);
4217+
nextSourcePosition = position + matchLength;
4218+
}
4219+
}
4220+
4221+
if (nextSourcePosition >= lengthS) {
4222+
return accumulatedResult.toString();
4223+
} else {
4224+
accumulatedResult.append(s.substring(nextSourcePosition));
4225+
return accumulatedResult.toString();
4226+
}
4227+
}
4228+
4229+
private Object js_SymbolReplaceSlow(
4230+
Context cx, Scriptable scope, Scriptable thisObj, Object[] args) {
40654231
// See ECMAScript spec 22.2.6.11
40664232
if (!ScriptRuntime.isObject(thisObj)) {
40674233
throw ScriptRuntime.typeErrorById("msg.arg.not.object", ScriptRuntime.typeof(thisObj));

0 commit comments

Comments
 (0)