Skip to content

Commit 6b93231

Browse files
committed
perf: SQL optimizations
1 parent 6b9b6be commit 6b93231

7 files changed

Lines changed: 178 additions & 11 deletions

File tree

engine/src/main/java/com/arcadedb/query/sql/executor/AbstractTraverseStep.java

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,22 +31,29 @@
3131
*/
3232
public abstract class AbstractTraverseStep extends AbstractExecutionStep {
3333
protected final WhereClause whileClause;
34+
// Optional emit filter pushed down from an outer SELECT's WHERE. Non-matching vertices still drive expansion, they just don't appear in the results. Null
35+
// means "emit everything traversed", preserving the historical behavior.
36+
protected final WhereClause postFilter;
3437
protected final List<TraverseProjectionItem> projections;
3538
protected final PInteger maxDepth;
3639

37-
protected List<Result> entryPoints = null;
38-
protected final List<Result> results = new ArrayList<>();
40+
// ArrayDeque, not ArrayList: entryPoints is used as a stack (addFirst/removeFirst in the hot loop). ArrayList would make both O(n), turning the traversal into
41+
// O(n^2) on the queue depth and was the main cause of slow TRAVERSE on deep/wide subgraphs.
42+
protected Deque<Result> entryPoints = null;
43+
protected final List<Result> results = new ArrayList<>();
3944

4045
// Visited set for traversal dedup. Graph traversal is inherently sparse (RIDs scatter across buckets with random offsets), so RidHashSet's primitive-packed
4146
// open-addressing hash wins over RidSet's bitmap on both memory and build time. See performance.RidDedupSetBenchmark.
4247
final RidHashSet traversed;
4348

4449
public AbstractTraverseStep(final List<TraverseProjectionItem> projections, final WhereClause whileClause,
50+
final WhereClause postFilter,
4551
final PInteger maxDepth,
4652
final CommandContext context) {
4753
super(context);
4854
this.traversed = new RidHashSet();
4955
this.whileClause = whileClause;
56+
this.postFilter = postFilter;
5057
this.maxDepth = maxDepth;
5158
this.projections = projections.stream().map(TraverseProjectionItem::copy).collect(Collectors.toList());
5259
}
@@ -93,7 +100,7 @@ public Result next() {
93100

94101
private void fetchNextBlock(final CommandContext context, final int nRecords) {
95102
if (this.entryPoints == null)
96-
this.entryPoints = new ArrayList<>();
103+
this.entryPoints = new ArrayDeque<>();
97104

98105
while (this.results.isEmpty()) {
99106
if (this.entryPoints.isEmpty())

engine/src/main/java/com/arcadedb/query/sql/executor/BreadthFirstTraverseStep.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,12 @@ public class BreadthFirstTraverseStep extends AbstractTraverseStep {
3535

3636
public BreadthFirstTraverseStep(final List<TraverseProjectionItem> projections, final WhereClause whileClause, final PInteger maxDepth,
3737
final CommandContext context) {
38-
super(projections, whileClause, maxDepth, context);
38+
this(projections, whileClause, null, maxDepth, context);
39+
}
40+
41+
public BreadthFirstTraverseStep(final List<TraverseProjectionItem> projections, final WhereClause whileClause, final WhereClause postFilter,
42+
final PInteger maxDepth, final CommandContext context) {
43+
super(projections, whileClause, postFilter, maxDepth, context);
3944
}
4045

4146
@Override
@@ -100,7 +105,8 @@ private Result toTraverseResult(final Result item) {
100105
protected void fetchNextResults(final CommandContext context, final int nRecords) {
101106
if (!this.entryPoints.isEmpty()) {
102107
final TraverseResult item = (TraverseResult) this.entryPoints.removeFirst();
103-
this.results.add(item);
108+
if (postFilter == null || postFilter.matchesFilters(item, context))
109+
this.results.add(item);
104110
for (final TraverseProjectionItem proj : projections) {
105111
final Object nextStep = proj.execute(item, context);
106112
final Integer depth = item.depth != null ? item.depth : (Integer) item.getMetadata("$depth");

engine/src/main/java/com/arcadedb/query/sql/executor/DepthFirstTraverseStep.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,12 @@ public class DepthFirstTraverseStep extends AbstractTraverseStep {
3535

3636
public DepthFirstTraverseStep(final List<TraverseProjectionItem> projections, final WhereClause whileClause, final PInteger maxDepth,
3737
final CommandContext context) {
38-
super(projections, whileClause, maxDepth, context);
38+
this(projections, whileClause, null, maxDepth, context);
39+
}
40+
41+
public DepthFirstTraverseStep(final List<TraverseProjectionItem> projections, final WhereClause whileClause, final WhereClause postFilter,
42+
final PInteger maxDepth, final CommandContext context) {
43+
super(projections, whileClause, postFilter, maxDepth, context);
3944
}
4045

4146
@Override
@@ -100,7 +105,8 @@ else if (item.isElement() && item.getElement().get().getIdentity() != null) {
100105
protected void fetchNextResults(final CommandContext context, final int nRecords) {
101106
if (!this.entryPoints.isEmpty()) {
102107
final TraverseResult item = (TraverseResult) this.entryPoints.removeFirst();
103-
this.results.add(item);
108+
if (postFilter == null || postFilter.matchesFilters(item, context))
109+
this.results.add(item);
104110
for (final TraverseProjectionItem proj : projections) {
105111
final Object nextStep = proj.execute(item, context);
106112
final Integer depth = item.depth != null ? item.depth : (Integer) item.getMetadata("$depth");

engine/src/main/java/com/arcadedb/query/sql/executor/SelectExecutionPlanner.java

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@
6868
import com.arcadedb.query.sql.parser.SelectStatement;
6969
import com.arcadedb.query.sql.parser.SuffixIdentifier;
7070
import com.arcadedb.query.sql.parser.Statement;
71+
import com.arcadedb.query.sql.parser.TraverseStatement;
7172
import com.arcadedb.query.sql.parser.SubQueryCollector;
7273
import com.arcadedb.query.sql.parser.Timeout;
7374
import com.arcadedb.query.sql.parser.WhereClause;
@@ -1250,7 +1251,7 @@ else if (variableValue instanceof String typeName) {
12501251
}
12511252
handleBucketsAsTarget(info.fetchExecutionPlan, info, buckets, context);
12521253
} else if (target.getStatement() != null) {
1253-
handleSubqueryAsTarget(info.fetchExecutionPlan, target.getStatement(), context);
1254+
handleSubqueryAsTarget(info.fetchExecutionPlan, maybePushWhereIntoTraverse(target.getStatement(), info), context);
12541255
} else if (target.getFunctionCall() != null) {
12551256
// handleFunctionCallAsTarget(result, target.getFunctionCall(), context);//TODO
12561257
throw new CommandExecutionException("function call as target is not supported yet");
@@ -3458,6 +3459,33 @@ private void handleSubqueryAsTarget(final SelectExecutionPlan plan, final Statem
34583459
plan.chain(new SubQueryStep(subExecutionPlan, context, subCtx));
34593460
}
34603461

3462+
/**
3463+
* When the outer SELECT has shape `SELECT ... FROM (TRAVERSE ...) WHERE <row-local cond>`, push the WHERE into the TRAVERSE step as a post-emit filter.
3464+
* Non-matching vertices still drive expansion (the sub-graph is fully walked), they are just not emitted. This avoids materializing every intermediate vertex
3465+
* as a Result and running each through a separate FilterStep, which is the dominant cost on the common pattern `... WHERE @type = 'X'` that filters a minority
3466+
* class out of a multi-type traversal.
3467+
* <p>
3468+
* The original TraverseStatement node is not mutated: a copy is returned and the outer WHERE is cleared when consumed.
3469+
*/
3470+
private Statement maybePushWhereIntoTraverse(final Statement subQuery, final QueryPlanningInfo info) {
3471+
if (!(subQuery instanceof TraverseStatement traverse))
3472+
return subQuery;
3473+
if (info.whereClause == null || info.whereClause.getBaseExpression() == null)
3474+
return subQuery;
3475+
// Conservative: skip if WHERE references parent scope, LET variables, or $current/$parent (anything starting with '$').
3476+
if (info.whereClause.refersToParent())
3477+
return subQuery;
3478+
if (info.whereClause.toString().contains("$"))
3479+
return subQuery;
3480+
3481+
final TraverseStatement copy = (TraverseStatement) traverse.copy();
3482+
copy.setPostFilter(info.whereClause.copy());
3483+
// WHERE has been consumed by the inner TRAVERSE, so the outer plan must not add another FilterStep for it.
3484+
info.whereClause = null;
3485+
info.flattenedWhereClause = null;
3486+
return copy;
3487+
}
3488+
34613489
private boolean isOrderByRidDesc(final QueryPlanningInfo info) {
34623490
if (!hasTargetWithSortedRids(info))
34633491
return false;

engine/src/main/java/com/arcadedb/query/sql/executor/TraverseExecutionPlanner.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ public class TraverseExecutionPlanner {
3737
private final List<TraverseProjectionItem> projections;
3838
private final FromClause target;
3939
private final WhereClause whileClause;
40+
private final WhereClause postFilter;
4041
private final TraverseStatement.Strategy strategy;
4142
private final PInteger maxDepth;
4243
private final Skip skip;
@@ -50,6 +51,7 @@ public TraverseExecutionPlanner(final TraverseStatement statement) {
5051

5152
this.target = statement.getTarget();
5253
this.whileClause = statement.getWhileClause() == null ? null : statement.getWhileClause().copy();
54+
this.postFilter = statement.getPostFilter() == null ? null : statement.getPostFilter().copy();
5355

5456
this.strategy = statement.getStrategy() == null ? TraverseStatement.Strategy.DEPTH_FIRST : statement.getStrategy();
5557
this.maxDepth = statement.getMaxDepth() == null ? null : statement.getMaxDepth().copy();
@@ -78,10 +80,10 @@ public InternalExecutionPlan createExecutionPlan(final CommandContext context) {
7880
private void handleTraversal(final SelectExecutionPlan result, final CommandContext context) {
7981
switch (strategy) {
8082
case BREADTH_FIRST:
81-
result.chain(new BreadthFirstTraverseStep(this.projections, this.whileClause, maxDepth, context));
83+
result.chain(new BreadthFirstTraverseStep(this.projections, this.whileClause, this.postFilter, maxDepth, context));
8284
break;
8385
case DEPTH_FIRST:
84-
result.chain(new DepthFirstTraverseStep(this.projections, this.whileClause, maxDepth, context));
86+
result.chain(new DepthFirstTraverseStep(this.projections, this.whileClause, this.postFilter, maxDepth, context));
8587
break;
8688
}
8789
//TODO

engine/src/main/java/com/arcadedb/query/sql/parser/TraverseStatement.java

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ public enum Strategy {
4242
protected Skip skip;
4343
protected Strategy strategy;
4444
protected PInteger maxDepth;
45+
// Optional emit filter set by the outer SELECT planner when `SELECT ... FROM (TRAVERSE ...) WHERE <row-local cond>` is detected. Evaluated per visited vertex
46+
// before emission only: non-matching vertices still drive expansion, they are just not returned as results. Not exposed in grammar.
47+
protected WhereClause postFilter;
4548

4649
public TraverseStatement(final int id) {
4750
super(id);
@@ -159,6 +162,7 @@ public Statement copy() {
159162
result.limit = limit == null ? null : limit.copy();
160163
result.strategy = strategy;
161164
result.maxDepth = maxDepth == null ? null : maxDepth.copy();
165+
result.postFilter = postFilter == null ? null : postFilter.copy();
162166
return result;
163167
}
164168

@@ -181,7 +185,9 @@ public boolean equals(final Object o) {
181185
return false;
182186
if (strategy != that.strategy)
183187
return false;
184-
return Objects.equals(maxDepth, that.maxDepth);
188+
if (!Objects.equals(maxDepth, that.maxDepth))
189+
return false;
190+
return Objects.equals(postFilter, that.postFilter);
185191
}
186192

187193
@Override
@@ -192,6 +198,7 @@ public int hashCode() {
192198
result = 31 * result + (limit != null ? limit.hashCode() : 0);
193199
result = 31 * result + (strategy != null ? strategy.hashCode() : 0);
194200
result = 31 * result + (maxDepth != null ? maxDepth.hashCode() : 0);
201+
result = 31 * result + (postFilter != null ? postFilter.hashCode() : 0);
195202
return result;
196203
}
197204

@@ -247,5 +254,13 @@ public Skip getSkip() {
247254
public void setSkip(final Skip skip) {
248255
this.skip = skip;
249256
}
257+
258+
public WhereClause getPostFilter() {
259+
return postFilter;
260+
}
261+
262+
public void setPostFilter(final WhereClause postFilter) {
263+
this.postFilter = postFilter;
264+
}
250265
}
251266
/* JavaCC - OriginalChecksum=47399a3a3d5a423768bbdc70ee957464 (do not edit this line) */

engine/src/test/java/com/arcadedb/query/sql/executor/TraverseStatementExecutionTest.java

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,109 @@ void traverseInBatchTx() {
201201
});
202202
}
203203

204+
@Test
205+
void branchingTreeTraverse() {
206+
// Regression test for the ArrayList->ArrayDeque swap on entryPoints. A shallow but wide tree stresses the deque depth more than any linear chain covered by
207+
// the other tests; with the old ArrayList the addFirst/removeFirst hot path was O(n) and the full traversal was O(n^2), which made non-trivial subgraphs
208+
// visibly slow even though the visited count was small. Correctness (total visited, per-depth count) must be unchanged.
209+
database.transaction(() -> {
210+
final String v = "BranchingTreeV";
211+
final String e = "BranchingTreeE";
212+
database.getSchema().createVertexType(v);
213+
database.getSchema().createEdgeType(e);
214+
215+
final int fanout = 20;
216+
final RID root = database.command("sql", "create vertex " + v + " set name = 'root'").next().getIdentity().get();
217+
final Map<String, Object> params = new HashMap<>();
218+
params.put("from", root);
219+
220+
int expectedTotal = 1;
221+
for (int i = 0; i < fanout; i++) {
222+
final RID child = database.command("sql", "create vertex " + v + " set name = 'c" + i + "'").next().getIdentity().get();
223+
params.put("to", child);
224+
database.command("sql", "create edge " + e + " from :from to :to", params).close();
225+
expectedTotal++;
226+
for (int j = 0; j < fanout; j++) {
227+
final RID grand = database.command("sql", "create vertex " + v + " set name = 'g" + i + "_" + j + "'").next().getIdentity().get();
228+
params.put("from", child);
229+
params.put("to", grand);
230+
database.command("sql", "create edge " + e + " from :from to :to", params).close();
231+
expectedTotal++;
232+
}
233+
params.put("from", root);
234+
}
235+
236+
params.clear();
237+
params.put("root", root);
238+
try (final ResultSet rs = database.query("sql", "select count(*) as c from (traverse out('" + e + "') from :root)", params)) {
239+
assertThat(rs.hasNext()).isTrue();
240+
assertThat(((Number) rs.next().getProperty("c")).intValue()).isEqualTo(expectedTotal);
241+
}
242+
});
243+
}
244+
245+
@Test
246+
void postFilterPushdownByType() {
247+
// Outer `SELECT ... FROM (TRAVERSE ...) WHERE @type = 'X'` should evaluate the @type filter inside the traverse step, so non-matching vertices are visited
248+
// (to keep expansion correct) but not emitted. Correctness is what this test guards; the perf win is that the outer SubQueryStep no longer shuttles every
249+
// intermediate vertex through a FilterStep.
250+
database.transaction(() -> {
251+
final String folder = "PushdownFolderV";
252+
final String leaf = "PushdownLeafV";
253+
final String edge = "PushdownE";
254+
database.getSchema().createVertexType(folder);
255+
database.getSchema().createVertexType(leaf);
256+
database.getSchema().createEdgeType(edge);
257+
258+
final RID root = database.command("sql", "create vertex " + folder + " set name = 'root'").next().getIdentity().get();
259+
final Map<String, Object> params = new HashMap<>();
260+
int expectedLeaves = 0;
261+
for (int i = 0; i < 5; i++) {
262+
final RID sub = database.command("sql", "create vertex " + folder + " set name = 'sub" + i + "'").next().getIdentity().get();
263+
params.clear();
264+
params.put("from", root);
265+
params.put("to", sub);
266+
database.command("sql", "create edge " + edge + " from :from to :to", params).close();
267+
for (int j = 0; j < 4; j++) {
268+
final RID l = database.command("sql", "create vertex " + leaf + " set name = 'leaf" + i + "_" + j + "'").next().getIdentity().get();
269+
params.clear();
270+
params.put("from", sub);
271+
params.put("to", l);
272+
database.command("sql", "create edge " + edge + " from :from to :to", params).close();
273+
expectedLeaves++;
274+
}
275+
}
276+
277+
params.clear();
278+
params.put("root", root);
279+
try (final ResultSet rs = database.query("sql",
280+
"select @rid as rid, @type as t from (traverse out('" + edge + "') from :root) where @type = '" + leaf + "'", params)) {
281+
int count = 0;
282+
while (rs.hasNext()) {
283+
final Result r = rs.next();
284+
assertThat((String) r.getProperty("t")).isEqualTo(leaf);
285+
count++;
286+
}
287+
assertThat(count).isEqualTo(expectedLeaves);
288+
}
289+
290+
// Count form - the pushdown should not change count-aggregation semantics.
291+
try (final ResultSet rs = database.query("sql",
292+
"select count(*) as c from (traverse out('" + edge + "') from :root) where @type = '" + leaf + "'", params)) {
293+
assertThat(rs.hasNext()).isTrue();
294+
assertThat(((Number) rs.next().getProperty("c")).intValue()).isEqualTo(expectedLeaves);
295+
}
296+
297+
// Safety: when outer WHERE references a LET variable, pushdown must NOT fire (because the LET lives in the outer scope) and behavior must still be
298+
// correct.
299+
try (final ResultSet rs = database.query("sql",
300+
"select count(*) as c from (traverse out('" + edge + "') from :root) let $t = '" + leaf + "' where @type = $t", params)) {
301+
assertThat(rs.hasNext()).isTrue();
302+
assertThat(((Number) rs.next().getProperty("c")).intValue()).isEqualTo(expectedLeaves);
303+
}
304+
});
305+
}
306+
204307
@Test
205308
void traverseFromRID() {
206309
database.command("sql", "CREATE VERTEX TYPE TVtx IF NOT EXISTS");

0 commit comments

Comments
 (0)