diff --git a/src/main/java/com/teragrep/pth10/ast/DPLParserConfig.java b/src/main/java/com/teragrep/pth10/ast/DPLParserConfig.java index 0d77751cc..60150adcb 100644 --- a/src/main/java/com/teragrep/pth10/ast/DPLParserConfig.java +++ b/src/main/java/com/teragrep/pth10/ast/DPLParserConfig.java @@ -45,6 +45,7 @@ */ package com.teragrep.pth10.ast; +import com.teragrep.pth10.ast.commands.transformstatement.timechart.span.TimeRange; import com.teragrep.pth10.ast.time.RelativeTimeParser; import com.teragrep.pth10.ast.time.RelativeTimestamp; import org.slf4j.Logger; @@ -66,8 +67,6 @@ public class DPLParserConfig { private Map config = new LinkedHashMap<>(); - TimeRange timeRange = TimeRange.ONE_MONTH; - /** * Get named value from config map * @@ -162,7 +161,7 @@ public void setLatest(String latest) { * @return enum range values 10s,...,1M */ public TimeRange getTimeRange() { - TimeRange rv = TimeRange.ONE_DAY; + TimeRange timeRange = new TimeRange("1 days"); long r = 0; // Earliest set, latest not if (config.get("earliest") != null && config.get("latest") == null) { @@ -181,21 +180,21 @@ else if (config.get("earliest") != null && config.get("latest") != null) { r = abs(r); LOGGER.info("Calculated range=<{}>", r); if (r <= 15 * 60) { - rv = TimeRange.TEN_SECONDS; + timeRange = new TimeRange("10 seconds"); } else if (r <= 60 * 60) { - rv = TimeRange.ONE_MINUTE; + timeRange = new TimeRange("1 minutes"); } else if (r <= 4 * 60 * 60) { - rv = TimeRange.FIVE_MINUTES; + timeRange = new TimeRange("5 minutes"); } else if (r <= 24 * 60 * 60) { - rv = TimeRange.THIRTY_MINUTES; + timeRange = new TimeRange("30 minutes"); } else if (r > 30 * 24 * 60 * 60) { // Default max value is 1 day - rv = TimeRange.ONE_DAY; + timeRange = new TimeRange("1 days"); } - return rv; + return timeRange; } } diff --git a/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/TimechartTransformation.java b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/TimechartTransformation.java index 2ce11d7a1..ef2cd75bc 100644 --- a/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/TimechartTransformation.java +++ b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/TimechartTransformation.java @@ -47,54 +47,36 @@ import com.teragrep.pth10.ast.*; import com.teragrep.pth10.ast.bo.*; -import com.teragrep.pth10.ast.bo.Token.Type; import com.teragrep.pth10.ast.commands.aggregate.AggregateFunction; +import com.teragrep.pth10.ast.commands.transformstatement.timechart.DivByInstContextValue; +import com.teragrep.pth10.ast.commands.transformstatement.timechart.SpanContextValue; import com.teragrep.pth10.steps.timechart.TimechartStep; import com.teragrep.pth_03.antlr.DPLParser; import com.teragrep.pth_03.antlr.DPLParserBaseVisitor; -import com.teragrep.pth_03.shaded.org.antlr.v4.runtime.tree.ParseTree; import org.apache.spark.sql.Column; -import org.apache.spark.sql.functions; -import org.apache.spark.unsafe.types.CalendarInterval; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.w3c.dom.Document; -import org.w3c.dom.Element; import java.util.*; -import java.util.regex.Matcher; -import java.util.regex.Pattern; /** * Class that contains the visitor methods for the timechart command
- * Provides a pivoted dataset, making it easier to form time-field graphs in the UI - *
Dataset.groupBy("_time").pivot(aggregateField).sum(fieldname)
*/ -public class TimechartTransformation extends DPLParserBaseVisitor { +public final class TimechartTransformation extends DPLParserBaseVisitor { private static final Logger LOGGER = LoggerFactory.getLogger(TimechartTransformation.class); - private DPLParserCatalystContext catCtx = null; - private DPLParserCatalystVisitor catVisitor; - private Document doc; + private final DPLParserCatalystContext catCtx; - EvalTransformation evalTransformation; - AggregateFunction aggregateFunction; - private String aggregateField = null; + private final AggregateFunction aggregateFunction; - public TimechartStep timechartStep = null; + // fields set in visit functions + private final ArrayList aggFunCols = new ArrayList<>(); - public TimechartTransformation(DPLParserCatalystContext catCtx, DPLParserCatalystVisitor catVisitor) { - this.doc = null; + public TimechartTransformation(final DPLParserCatalystContext catCtx) { this.catCtx = catCtx; - this.catVisitor = catVisitor; - this.evalTransformation = new EvalTransformation(catCtx); this.aggregateFunction = new AggregateFunction(catCtx); } - public String getAggregateField() { - return this.aggregateField; - } - /** * timechartTransformation : COMMAND_MODE_TIMECHART (t_timechart_sepParameter)? (t_timechart_formatParameter)? * (t_timechart_fixedrangeParameter)? (t_timechart_partialParameter)? (t_timechart_contParameter)? @@ -108,299 +90,36 @@ public Node visitTimechartTransformation(DPLParser.TimechartTransformationContex } private Node timechartTransformationEmitCatalyst(DPLParser.TimechartTransformationContext ctx) { - this.timechartStep = new TimechartStep(); + final Column span = new SpanContextValue(ctx.t_timechart_binOptParameter(), catCtx).value(); + final List divByInsts = new DivByInstContextValue(ctx.t_timechart_divideByInstruction()).value(); - Column span = null; + visitChildren(ctx); // visit all the parameters - if (ctx.t_timechart_binOptParameter() != null && !ctx.t_timechart_binOptParameter().isEmpty()) { - LOGGER.info("Timechart Optional parameters: <[{}]>", ctx.t_timechart_binOptParameter().get(0).getText()); - - ColumnNode spanNode = (ColumnNode) visit(ctx.t_timechart_binOptParameter().get(0)); - if (spanNode != null) { - span = spanNode.getColumn(); - } - } + TimechartStep timechartStep = new TimechartStep(aggFunCols, divByInsts, span); - Column funCol = null; - List listOfAggFunCols = new ArrayList<>(); - List listOfDivideByInst = new ArrayList<>(); - for (int i = 0; i < ctx.getChildCount(); i++) { - ParseTree child = ctx.getChild(i); - - if (child instanceof DPLParser.AggregateFunctionContext) { - // go through each agg. function - DPLParser.AggregateFunctionContext aggFunCtx = (DPLParser.AggregateFunctionContext) child; - Node funNode = visit(aggFunCtx); - if (funNode != null) { - if (funCol != null) { - listOfAggFunCols.add(funCol); - } - funCol = ((ColumnNode) funNode).getColumn(); - } - } - else if (child instanceof DPLParser.T_timechart_divideByInstructionContext) { - String divByInst = ((StringNode) visitT_timechart_divideByInstruction( - (DPLParser.T_timechart_divideByInstructionContext) child - )).toString(); - listOfDivideByInst.add(divByInst); - } - else if (child instanceof DPLParser.T_timechart_fieldRenameInstructionContext) { - if (funCol != null) { - funCol = funCol.as(visit(child).toString()); - } - } - } - listOfAggFunCols.add(funCol); // need to add last one; for loop above only adds if there's a new one coming - - if (span == null) { - span = createDefaultSpan(); - } - - timechartStep.setHdfsPath(this.catVisitor.getHdfsPath()); - timechartStep.setCatCtx(catCtx); - timechartStep.setSpan(span); - timechartStep.setAggCols(listOfAggFunCols); - timechartStep.setDivByInsts(listOfDivideByInst); - - // span - this.catCtx.setTimeChartSpanSeconds(getSpanSeconds(span)); - - LOGGER.debug("span= <[{}]>", timechartStep.getSpan().toString()); - LOGGER.debug("aggcols= <[{}]>", Arrays.toString(timechartStep.getAggCols().toArray())); - LOGGER.debug("divby= <[{}]>", Arrays.toString(timechartStep.getDivByInsts().toArray())); + LOGGER.debug("span= <[{}]>", span); + LOGGER.debug("aggcols= <[{}]>", aggFunCols); + LOGGER.debug("divby= <[{}]>", divByInsts); return new StepNode(timechartStep); - - //throw new RuntimeException("Chart transformation operation not supported yet"); - } - - /** - * Convert span of type Column to the span length in seconds - * - * @param span span of type column - * @return span length in seconds - */ - private long getSpanSeconds(Column span) { - // span column is of type 'timewindow(_time, 60000000, 60000000, 0)' - // get second parameter and convert from microseconds to seconds - // yes, this is terrible but it works so is it really? - char[] spanChars = span.toString().toCharArray(); - boolean isWithinNumber = false; - StringBuilder num = new StringBuilder(); - for (char spanChar : spanChars) { - if (spanChar == ',') { - isWithinNumber = !isWithinNumber; - if (!isWithinNumber) { - break; - } - } - else if (isWithinNumber && spanChar != ' ') { - num.append(spanChar); - } - } - - try { - return Long.parseLong(num.toString()) / 1_000_000L; - } - catch (NumberFormatException nfe) { - throw new RuntimeException("Error converting span column into seconds"); - } } @Override public Node visitAggregateFunction(DPLParser.AggregateFunctionContext ctx) { - Node rv = aggregateFunction.visitAggregateFunction(ctx); - if (aggregateField == null) - aggregateField = aggregateFunction.getAggregateField(); - return aggregateFunction.visitAggregateFunction(ctx); - } - - @Override - public Node visitT_timechart_divideByInstruction(DPLParser.T_timechart_divideByInstructionContext ctx) { - // LOGGER.info(ctx.getChildCount()+"--visitT_chart_divideByInstruction incoming{}", ctx.getText()); - if (ctx.getChildCount() == 0) { - return null; - } - String target = ctx.getChild(1).getChild(0).toString(); - - if (doc != null) { - Element el = doc.createElement("divideBy"); - el.setAttribute("field", target); - return new ElementNode(el); - } - else { - return new StringNode(new Token(Type.STRING, target)); - } + ColumnNode aggCol = (ColumnNode) aggregateFunction.visitAggregateFunction(ctx); + aggFunCols.add(aggCol.getColumn()); + return new NullNode(); } @Override public Node visitT_timechart_fieldRenameInstruction(DPLParser.T_timechart_fieldRenameInstructionContext ctx) { - String field = ctx.getChild(1).getText(); - if (doc != null) { - Element el = doc.createElement("fieldRename"); - el.setAttribute("field", field); - return new ElementNode(el); - } - else { - return new StringNode(new Token(Type.STRING, field)); + String rename = ctx.getChild(1).getText(); + if (!aggFunCols.isEmpty()) { + Column latestAgg = aggFunCols.remove(aggFunCols.size() - 1); + aggFunCols.add(latestAgg.as(rename)); // rename the newest visited aggregation column } - } - @Override - public Node visitT_timechart_binOptParameter(DPLParser.T_timechart_binOptParameterContext ctx) { - LOGGER.info("visitT_timechart_binOptParameter:<{}>", ctx.getText()); - return visitChildren(ctx); - } - - @Override - public Node visitT_timechart_binSpanParameter(DPLParser.T_timechart_binSpanParameterContext ctx) { - LOGGER.info("visitT_timechart_binSpanParameter:<{}>", ctx.getText()); - CalendarInterval ival = getSpanLength(ctx.getChild(1).getText()); - Column col = new Column("_time"); - Column span = functions.window(col, String.valueOf(ival)); - - return new ColumnNode(span); - } - - /** - * Creates a column with default span of one hour - * - * @return - */ - private Column createDefaultSpan() { - long sec = 0; - TimeRange tr = TimeRange.ONE_HOUR; - String duration = "1 days"; // Default duration - // LOGGER.info("createDefaultSpan="+catCtx.getTimeRange()); - DPLParserConfig pConf = catCtx.getParserConfig(); - if (pConf != null) { - tr = pConf.getTimeRange(); - } - switch (tr) { - case TEN_SECONDS: { - sec = 10; - duration = "10 seconds"; - break; - } - case ONE_MINUTE: { - sec = 60; - duration = "1 minutes"; - break; - } - case FIVE_MINUTES: { - sec = 5 * 60; - duration = "5 minutes"; - break; - } - case THIRTY_MINUTES: { - sec = 30 * 60; - duration = "30 minutes"; - break; - } - case ONE_HOUR: { - sec = 3600; - duration = "1 hours"; - break; - } - case ONE_DAY: { - sec = 24 * 3600; - duration = "1 days"; - break; - } - case ONE_MONTH: { - sec = 30 * 24 * 3600; - duration = "30 days"; - break; - } - default: { - throw new RuntimeException("timechart span duration greater that month is not supported"); - } - } - CalendarInterval ival = new CalendarInterval(0, 0, sec * 1000 * 1000); - return functions.window(new Column("_time"), String.valueOf(ival), duration, "0 minutes"); - } - - /** - * Gets the CalendarInterval of string form span - * - * @param value span as string - * @return CalendarInterval - */ - private CalendarInterval getSpanLength(String value) { - // incoming span-length consist of - // [] - // default timescale is sec - String timescale = "sec"; - int numericalValue; - int month = 0; - long sec = 0; - Pattern p = Pattern.compile("\\d+"); - Matcher m = p.matcher(value); - if (m.lookingAt()) { - numericalValue = Integer.parseInt(m.group()); - String[] parts = value.split(m.group()); - if (parts.length > 1) - timescale = parts[1].trim(); - } - else { - LOGGER.error("Span length error: missing numerical value:<{}>", value); - throw new RuntimeException("getSpanLength, missing numerical value:" + value); - } - // Calculate value - switch (timescale) { - case "s": - case "sec": - case "secs": - case "second": - case "seconds": - case "S": { - sec = numericalValue; - break; - } - case "m": - case "min": - case "mins": - case "minute": - case "minutes": - case "M": { - sec = numericalValue * 60L; - break; - } - case "h": - case "hr": - case "hrs": - case "hour": - case "hours": - case "H": { - sec = numericalValue * 3600L; - break; - } - case "d": - case "day": - case "days": - case "D": { - sec = numericalValue * 3600L * 24; - break; - } - case "w": - case "week": - case "weeks": - case "W": { - sec = numericalValue * 3600L * 24 * 7; - break; - } - case "mon": - case "month": - case "months": - case "MON": { - //month = numericalValue; - // month is not supported as such, it needs to be changed seconds - // use 30 as default month length - sec = (long) numericalValue * 30 * 24 * 3600; - break; - } - } - return new CalendarInterval(month, 0, sec * 1000 * 1000L); + return new NullNode(); } @Override @@ -483,33 +202,8 @@ public Node visitT_timechart_evaledField(DPLParser.T_timechart_evaledFieldContex return visitChildren(ctx); } - /*@Override public Node visitT_timechart_singleAggregation(DPLParser.T_timechart_singleAggregationContext ctx) { - String oper = ctx.getText(); - String defaultField="*"; - Node rv = null; - Column col = null; - if(oper.equalsIgnoreCase("count") || oper.equalsIgnoreCase("c")) { - aggregateField = "count"; // use default name - col = org.apache.spark.sql.functions.count(defaultField); - // LOGGER.info("T_timechart_singleAggregation (Catalyst):{}", col.expr().sql()+" default field="+defaultField); - traceBuffer.add("Visit AggregateMethodCount(Catalyst):{}", col.expr().sql()); - rv = new ColumnNode(col); - }else { - rv = this.aggregateFunction.visitAggregateFunction(ctx.aggregateFunction()); - this.aggregateField = aggregateFunction.getAggregateField(); - } - // Check whether field needs to be renamed - if(ctx.t_timechart_fieldRenameInstruction() != null){ - Node renameCmd = visitT_timechart_fieldRenameInstruction(ctx.t_timechart_fieldRenameInstruction()); - aggregateField = renameCmd.toString(); - // rv = new ColumnNode(((ColumnNode) rv).getColumn().as(renameCmd.toString())); - } - return rv; - }*/ - @Override public Node visitSpanType(DPLParser.SpanTypeContext ctx) { - // LOGGER.info("visitSpanType:"+ctx.getText()); return visitChildren(ctx); } diff --git a/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/TransformStatement.java b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/TransformStatement.java index 813878588..1db84020f 100644 --- a/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/TransformStatement.java +++ b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/TransformStatement.java @@ -176,7 +176,7 @@ public Node visitChartTransformation(DPLParser.ChartTransformationContext ctx) { @Override public Node visitTimechartTransformation(DPLParser.TimechartTransformationContext ctx) { // timechart command - return new TimechartTransformation(catCtx, catVisitor).visitTimechartTransformation(ctx); + return new TimechartTransformation(catCtx).visitTimechartTransformation(ctx); } @Override diff --git a/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/DivByInstContextValue.java b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/DivByInstContextValue.java new file mode 100644 index 000000000..70f46ce6d --- /dev/null +++ b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/DivByInstContextValue.java @@ -0,0 +1,74 @@ +/* + * Teragrep Data Processing Language (DPL) translator for Apache Spark (pth_10) + * Copyright (C) 2019-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.pth10.ast.commands.transformstatement.timechart; + +import com.teragrep.pth10.ast.ContextValue; +import com.teragrep.pth_03.antlr.DPLParser; + +import java.util.ArrayList; +import java.util.List; + +/** + * Parses the divideByInstruction in "| timechart". + */ +public final class DivByInstContextValue implements ContextValue> { + + private final List ctx; + + public DivByInstContextValue(List ctx) { + this.ctx = ctx; + } + + @Override + public List value() { + final List divByFields = new ArrayList<>(); + for (DPLParser.T_timechart_divideByInstructionContext divByInstruction : ctx) { + divByFields.add(divByInstruction.fieldType().getChild(0).toString()); + } + + return divByFields; + } +} diff --git a/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/SpanContextValue.java b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/SpanContextValue.java new file mode 100644 index 000000000..d70228b73 --- /dev/null +++ b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/SpanContextValue.java @@ -0,0 +1,114 @@ +/* + * Teragrep Data Processing Language (DPL) translator for Apache Spark (pth_10) + * Copyright (C) 2019-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.pth10.ast.commands.transformstatement.timechart; + +import com.teragrep.pth10.ast.ContextValue; +import com.teragrep.pth10.ast.DPLParserCatalystContext; + +import com.teragrep.pth10.ast.commands.transformstatement.timechart.span.Span; +import com.teragrep.pth10.ast.commands.transformstatement.timechart.span.SpanParameter; +import com.teragrep.pth10.ast.commands.transformstatement.timechart.span.TimeRange; +import com.teragrep.pth_03.antlr.DPLParser; +import org.apache.spark.sql.Column; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; + +/** + * Parses a Column from the "| timechart" command's span parameter + */ +public final class SpanContextValue implements ContextValue { + + private static final Logger LOGGER = LoggerFactory.getLogger(SpanContextValue.class); + + private final List ctxList; + private final DPLParserCatalystContext catCtx; + private final TimeRange defaultTimeRange; + + public SpanContextValue( + List ctxList, + DPLParserCatalystContext catCtx + ) { + this(ctxList, catCtx, catCtx.getParserConfig().getTimeRange()); + } + + public SpanContextValue( + List ctxList, + DPLParserCatalystContext catCtx, + TimeRange defaultTimeRange + ) { + this.ctxList = ctxList; + this.catCtx = catCtx; + this.defaultTimeRange = defaultTimeRange; + } + + @Override + public Column value() { + // use default time range from config if span= parameter is not used + Span span = new SpanParameter(defaultTimeRange); + TimeRange actualTimeRange = defaultTimeRange; + + boolean spanFound = false; + for (DPLParser.T_timechart_binOptParameterContext binOpt : ctxList) { + DPLParser.T_timechart_binSpanParameterContext spanCtx = binOpt + .t_timechart_spanOptParameter() + .t_timechart_binSpanParameter(); + if (spanCtx != null) { + if (spanFound) { + throw new IllegalArgumentException("| timechart does not allow multiple 'span=' parameters"); + } + LOGGER.info("visiting t_timechart_binSpanParameter:<{}>", spanCtx.getText()); + actualTimeRange = new TimeRange(spanCtx.getChild(1).getText()); + span = new SpanParameter(actualTimeRange); + spanFound = true; + } + } + + this.catCtx.setTimeChartSpanSeconds(actualTimeRange.asSeconds()); + return span.asColumn(); + } +} diff --git a/src/main/java/com/teragrep/pth10/ast/TimeRange.java b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/Span.java similarity index 92% rename from src/main/java/com/teragrep/pth10/ast/TimeRange.java rename to src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/Span.java index 8a919ee82..8df9482a5 100644 --- a/src/main/java/com/teragrep/pth10/ast/TimeRange.java +++ b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/Span.java @@ -43,8 +43,11 @@ * Teragrep, the applicable Commercial License may apply to this file if you as * a licensee so wish it. */ -package com.teragrep.pth10.ast; +package com.teragrep.pth10.ast.commands.transformstatement.timechart.span; -public enum TimeRange { - TEN_SECONDS, ONE_MINUTE, FIVE_MINUTES, THIRTY_MINUTES, ONE_HOUR, ONE_DAY, ONE_MONTH +import org.apache.spark.sql.Column; + +public interface Span { + + Column asColumn(); } diff --git a/src/main/java/com/teragrep/pth10/steps/timechart/AbstractTimechartStep.java b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/SpanParameter.java similarity index 58% rename from src/main/java/com/teragrep/pth10/steps/timechart/AbstractTimechartStep.java rename to src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/SpanParameter.java index 28bf1c8aa..86709405a 100644 --- a/src/main/java/com/teragrep/pth10/steps/timechart/AbstractTimechartStep.java +++ b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/SpanParameter.java @@ -43,72 +43,51 @@ * Teragrep, the applicable Commercial License may apply to this file if you as * a licensee so wish it. */ -package com.teragrep.pth10.steps.timechart; +package com.teragrep.pth10.ast.commands.transformstatement.timechart.span; -import com.teragrep.pth10.ast.DPLParserCatalystContext; -import com.teragrep.pth10.steps.AbstractStep; import org.apache.spark.sql.Column; +import org.apache.spark.sql.functions; +import org.apache.spark.unsafe.types.CalendarInterval; -import java.util.List; +import java.util.Objects; -public abstract class AbstractTimechartStep extends AbstractStep { - - protected List aggCols = null; - protected List divByInsts = null; - protected Column span = null; - protected String aggregateField = null; - protected DPLParserCatalystContext catCtx = null; - protected String hdfsPath = null; - - public AbstractTimechartStep() { - super(); - } - - public void setAggCols(List aggCols) { - this.aggCols = aggCols; - } - - public List getAggCols() { - return aggCols; - } - - public void setDivByInsts(List divByInsts) { - this.divByInsts = divByInsts; - } - - public List getDivByInsts() { - return divByInsts; - } - - public void setAggregateField(String field) { - this.aggregateField = field; - } +/** + * | timechart commands 'span=' parameter + */ +public final class SpanParameter implements Span { - public String getAggregateField() { - return this.aggregateField; - } + private final Column column; + private final CalendarInterval ival; - public void setSpan(Column span) { - this.span = span; + public SpanParameter(TimeRange timeRange) { + this(timeRange.asSeconds()); } - public Column getSpan() { - return this.span; + public SpanParameter(long seconds) { + this(new Column("_time"), new CalendarInterval(0, 0, seconds * 1000 * 1000L)); } - public void setCatCtx(DPLParserCatalystContext catCtx) { - this.catCtx = catCtx; + public SpanParameter(Column column, CalendarInterval ival) { + this.column = column; + this.ival = ival; } - public DPLParserCatalystContext getCatCtx() { - return catCtx; + @Override + public Column asColumn() { + return functions.window(column, String.valueOf(ival)); } - public void setHdfsPath(String hdfsPath) { - this.hdfsPath = hdfsPath; + @Override + public boolean equals(Object o) { + if (o == null || getClass() != o.getClass()) { + return false; + } + SpanParameter that = (SpanParameter) o; + return Objects.equals(column, that.column) && Objects.equals(ival, that.ival); } - public String getHdfsPath() { - return hdfsPath; + @Override + public int hashCode() { + return Objects.hash(column, ival); } } diff --git a/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/TimeRange.java b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/TimeRange.java new file mode 100644 index 000000000..251d48877 --- /dev/null +++ b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/TimeRange.java @@ -0,0 +1,163 @@ +/* + * Teragrep Data Processing Language (DPL) translator for Apache Spark (pth_10) + * Copyright (C) 2019-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.pth10.ast.commands.transformstatement.timechart.span; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Objects; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * A time range for | timechart command's 'span=' parameter. Gets the span= parameter from the query and parses it. + */ +public final class TimeRange { + + private static final Logger LOGGER = LoggerFactory.getLogger(TimeRange.class); + + private final String duration; + + public TimeRange(String duration) { + this.duration = duration; + } + + public long asSeconds() { + // incoming span-length consist of [] + // default timescale is sec + String timescale = "sec"; + final int numericalValue; + final Pattern p = Pattern.compile("\\d+"); + final Matcher m = p.matcher(duration); + if (m.lookingAt()) { + numericalValue = Integer.parseInt(m.group()); + final String[] parts = duration.split(m.group()); + if (parts.length > 1) { + timescale = parts[1].trim(); + } + } + else { + LOGGER.error("Span length error: missing numerical value:<{}>", duration); + throw new RuntimeException("| timechart 'span' parameter is missing a numerical value:" + duration); + } + + long sec; + switch (timescale) { + case "s": + case "sec": + case "secs": + case "second": + case "seconds": + case "S": { + sec = numericalValue; + break; + } + case "m": + case "min": + case "mins": + case "minute": + case "minutes": + case "M": { + sec = numericalValue * 60L; + break; + } + case "h": + case "hr": + case "hrs": + case "hour": + case "hours": + case "H": { + sec = numericalValue * 3600L; + break; + } + case "d": + case "day": + case "days": + case "D": { + sec = numericalValue * 3600L * 24; + break; + } + case "w": + case "week": + case "weeks": + case "W": { + sec = numericalValue * 3600L * 24 * 7; + break; + } + case "mon": + case "month": + case "months": + case "MON": { + // month is not supported as such, it needs to be changed to seconds + // use 30 as default month length + sec = (long) numericalValue * 30 * 24 * 3600; + break; + } + default: { + throw new RuntimeException( + "| timechart 'span' parameter only accepts seconds, minutes, hours, days, weeks and months. Got '" + + timescale + "' instead." + ); + } + } + + return sec; + } + + @Override + public boolean equals(Object o) { + if (o == null || getClass() != o.getClass()) { + return false; + } + TimeRange timeRange = (TimeRange) o; + return Objects.equals(duration, timeRange.duration); + } + + @Override + public int hashCode() { + return Objects.hashCode(duration); + } +} diff --git a/src/main/java/com/teragrep/pth10/steps/timechart/TimechartStep.java b/src/main/java/com/teragrep/pth10/steps/timechart/TimechartStep.java index 9849094da..06bbdd1f5 100644 --- a/src/main/java/com/teragrep/pth10/steps/timechart/TimechartStep.java +++ b/src/main/java/com/teragrep/pth10/steps/timechart/TimechartStep.java @@ -45,18 +45,27 @@ */ package com.teragrep.pth10.steps.timechart; +import com.teragrep.pth10.steps.AbstractStep; import org.apache.spark.sql.*; import scala.collection.JavaConversions; import scala.collection.Seq; import java.util.ArrayList; import java.util.List; +import java.util.Objects; import java.util.stream.Collectors; -public final class TimechartStep extends AbstractTimechartStep { +public final class TimechartStep extends AbstractStep { - public TimechartStep() { + private final List aggCols; + private final List divByInsts; + private final Column span; + + public TimechartStep(final List aggCols, final List divByInsts, final Column span) { super(); + this.aggCols = aggCols; + this.divByInsts = divByInsts; + this.span = span; this.properties.add(CommandProperty.AGGREGATE); } @@ -66,18 +75,17 @@ public Dataset get(Dataset dataset) { return null; } - if (this.getAggCols() == null || this.getAggCols().isEmpty()) { + if (aggCols.isEmpty()) { throw new RuntimeException("Aggregate columns not present in TimechartStep, cannot proceed"); } // .agg has funky arguments; just giving a Seq of columns is no good, first arg needs to be a column - Column firstAggCol = this.aggCols.get(0); - Seq seqOfAggColsExceptFirst = JavaConversions - .asScalaBuffer(this.aggCols.subList(1, this.aggCols.size())); + Column firstAggCol = aggCols.get(0); + Seq seqOfAggColsExceptFirst = JavaConversions.asScalaBuffer(aggCols.subList(1, aggCols.size())); List allGroupBys = new ArrayList<>(); - allGroupBys.add(this.span); - allGroupBys.addAll(this.divByInsts.stream().map(functions::col).collect(Collectors.toList())); + allGroupBys.add(span); + allGroupBys.addAll(divByInsts.stream().map(functions::col).collect(Collectors.toList())); Seq seqOfAllGroupBys = JavaConversions.asScalaBuffer(allGroupBys); @@ -90,4 +98,20 @@ public Dataset get(Dataset dataset) { .drop("window") .orderBy("_time"); } + + @Override + public boolean equals(final Object o) { + if (o == null || getClass() != o.getClass()) { + return false; + } + final TimechartStep that = (TimechartStep) o; + // Column.equals() doesn't work, using String representation instead to see that the same operations are done on both + return Objects.equals(aggCols.toString(), that.aggCols.toString()) + && Objects.equals(divByInsts, that.divByInsts) && Objects.equals(span.toString(), that.span.toString()); + } + + @Override + public int hashCode() { + return Objects.hash(aggCols, divByInsts, span); + } } diff --git a/src/test/java/com/teragrep/pth10/TimechartStreamingTest.java b/src/test/java/com/teragrep/pth10/TimechartStreamingTest.java index 82f41e942..5a20f8bc5 100644 --- a/src/test/java/com/teragrep/pth10/TimechartStreamingTest.java +++ b/src/test/java/com/teragrep/pth10/TimechartStreamingTest.java @@ -222,4 +222,19 @@ public void testTimechartBasicCount() { Assertions.assertEquals("10", listOfCount.get(0)); }); } + + @Test + @DisabledIfSystemProperty( + named = "skipSparkTest", + matches = "true" + ) + public void testTimechartMultipleSpans() { + IllegalArgumentException e = streamingTestUtil + .performThrowingDPLTest( + IllegalArgumentException.class, "index=index_A | timechart span=1min span=2min count", testFile, + ds -> { + } + ); + Assertions.assertEquals("| timechart does not allow multiple 'span=' parameters", e.getMessage()); + } } diff --git a/src/test/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/SpanParameterTest.java b/src/test/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/SpanParameterTest.java new file mode 100644 index 000000000..781decc30 --- /dev/null +++ b/src/test/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/SpanParameterTest.java @@ -0,0 +1,79 @@ +/* + * Teragrep Data Processing Language (DPL) translator for Apache Spark (pth_10) + * Copyright (C) 2019-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.pth10.ast.commands.transformstatement.timechart.span; + +import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.spark.sql.Column; +import org.apache.spark.sql.functions; +import org.apache.spark.unsafe.types.CalendarInterval; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class SpanParameterTest { + + @Test + public void testEqualsVerifier() { + // equalsVerifier flags Spark's Column as a recursive data structure, have to use prefabs + EqualsVerifier + .forClass(SpanParameter.class) + .withNonnullFields("column", "ival") + .withPrefabValues(Column.class, new Column("_time"), functions.lit("")) + .verify(); + } + + @Test + public void testAsColumn() { + TimeRange timeRange = new TimeRange("5 minutes"); + SpanParameter spanParameter = new SpanParameter(timeRange); + + long microSeconds = timeRange.asSeconds() * 1000 * 1000; + Column expectedColumn = functions + .window(new Column("_time"), String.valueOf(new CalendarInterval(0, 0, microSeconds))); + + // Spark's Columns seem to be equal only with same reference. Using String representation instead. + Assertions.assertEquals(expectedColumn.toString(), spanParameter.asColumn().toString()); + } +} diff --git a/src/test/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/TimeRangeTest.java b/src/test/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/TimeRangeTest.java new file mode 100644 index 000000000..9df4c80c9 --- /dev/null +++ b/src/test/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/TimeRangeTest.java @@ -0,0 +1,113 @@ +/* + * Teragrep Data Processing Language (DPL) translator for Apache Spark (pth_10) + * Copyright (C) 2019-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.pth10.ast.commands.transformstatement.timechart.span; + +import nl.jqno.equalsverifier.EqualsVerifier; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class TimeRangeTest { + + @Test + public void testEqualsVerifier() { + EqualsVerifier.forClass(TimeRange.class).withNonnullFields("duration").verify(); + } + + @Test + public void testSeconds() { + String duration = "5 seconds"; + TimeRange timeRange = new TimeRange(duration); + + long expectedSeconds = 5; + Assertions.assertEquals(expectedSeconds, timeRange.asSeconds()); + } + + @Test + public void testMinutes() { + String duration = "5 minutes"; + TimeRange timeRange = new TimeRange(duration); + + long expectedSeconds = 5 * 60; + Assertions.assertEquals(expectedSeconds, timeRange.asSeconds()); + } + + @Test + public void testHours() { + String duration = "5 hours"; + TimeRange timeRange = new TimeRange(duration); + + long expectedSeconds = 5 * 60 * 60; + Assertions.assertEquals(expectedSeconds, timeRange.asSeconds()); + } + + @Test + public void testDays() { + String duration = "5 days"; + TimeRange timeRange = new TimeRange(duration); + + long expectedSeconds = 5 * 60 * 60 * 24; + Assertions.assertEquals(expectedSeconds, timeRange.asSeconds()); + } + + @Test + public void testMonths() { + String duration = "5 months"; + TimeRange timeRange = new TimeRange(duration); + + long expectedSeconds = 5 * 60 * 60 * 24 * 30; + Assertions.assertEquals(expectedSeconds, timeRange.asSeconds()); + } + + @Test + public void testYears() { + String duration = "5 years"; + TimeRange timeRange = new TimeRange(duration); + + String expectedErrorMessage = "| timechart 'span' parameter only accepts seconds, minutes, hours, days, weeks and months. Got 'years' instead."; + RuntimeException e = Assertions.assertThrows(RuntimeException.class, timeRange::asSeconds); + Assertions.assertEquals(expectedErrorMessage, e.getMessage()); + } +} diff --git a/src/test/java/com/teragrep/pth10/translationTests/TimechartTest.java b/src/test/java/com/teragrep/pth10/translationTests/TimechartTest.java index 4a3942555..c9c1abd46 100644 --- a/src/test/java/com/teragrep/pth10/translationTests/TimechartTest.java +++ b/src/test/java/com/teragrep/pth10/translationTests/TimechartTest.java @@ -46,8 +46,11 @@ package com.teragrep.pth10.translationTests; import com.teragrep.pth10.ast.DPLParserCatalystContext; -import com.teragrep.pth10.ast.DPLParserCatalystVisitor; +import com.teragrep.pth10.ast.bo.ColumnNode; +import com.teragrep.pth10.ast.bo.StepNode; +import com.teragrep.pth10.ast.commands.aggregate.AggregateFunction; import com.teragrep.pth10.ast.commands.transformstatement.TimechartTransformation; +import com.teragrep.pth10.steps.AbstractStep; import com.teragrep.pth10.steps.timechart.TimechartStep; import com.teragrep.pth_03.antlr.DPLLexer; import com.teragrep.pth_03.antlr.DPLParser; @@ -55,95 +58,147 @@ import com.teragrep.pth_03.shaded.org.antlr.v4.runtime.CharStreams; import com.teragrep.pth_03.shaded.org.antlr.v4.runtime.CommonTokenStream; import com.teragrep.pth_03.shaded.org.antlr.v4.runtime.tree.ParseTree; +import org.apache.spark.sql.Column; +import org.apache.spark.sql.functions; +import org.apache.spark.unsafe.types.CalendarInterval; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; @TestInstance(TestInstance.Lifecycle.PER_CLASS) public class TimechartTest { - private static final Logger LOGGER = LoggerFactory.getLogger(TimechartTest.class); - @Test void testTimeChartTranslation() { - String query = "| timechart span=5min sum(sales) as sales by product"; + String rename = "sales"; + String byField = "product"; + String query = "| timechart span=5min sum(sales) as " + rename + " by " + byField; + + DPLParserCatalystContext ctx = new DPLParserCatalystContext(null); + + // create parse tree with PTH-03 CharStream inputStream = CharStreams.fromString(query); DPLLexer lexer = new DPLLexer(inputStream); DPLParser parser = new DPLParser(new CommonTokenStream(lexer)); ParseTree tree = parser.root(); - LOGGER.debug(tree.toStringTree(parser)); + DPLParser.AggregateMethodSumContext aggContext = (DPLParser.AggregateMethodSumContext) tree + .getChild(1) + .getChild(0) + .getChild(2) + .getChild(0); + DPLParser.TimechartTransformationContext timechartContext = (DPLParser.TimechartTransformationContext) tree + .getChild(1) + .getChild(0); - DPLParserCatalystContext ctx = new DPLParserCatalystContext(null); - // Use this file for dataset initialization - String testFile = "src/test/resources/timechartTestData.jsonl"; ctx.setEarliest("-1w"); - DPLParserCatalystVisitor visitor = new DPLParserCatalystVisitor(ctx); + // traverse the tree in PTH-10 and create TimechartStep + TimechartTransformation tct = new TimechartTransformation(ctx); + StepNode timechartNode = (StepNode) tct.visitTimechartTransformation(timechartContext); + AbstractStep tcs = timechartNode.get(); - TimechartTransformation tct = new TimechartTransformation(ctx, visitor); - tct.visitTimechartTransformation((DPLParser.TimechartTransformationContext) tree.getChild(1).getChild(0)); - TimechartStep tcs = tct.timechartStep; + // expected BY clause + List divByInsts = new ArrayList<>(); + divByInsts.add(byField); - Assertions.assertEquals("window(_time, 300000000, 300000000, 0) AS window", tcs.getSpan().toString()); - Assertions - .assertEquals( - "sumaggregator(encodeusingserializer(input[0, java.lang.Object, true], false) AS value, decodeusingserializer(input[0, binary, true], com.teragrep.pth10.ast.commands.aggregate.UDAFs.BufferClasses.SumBuffer, false), staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false, true)) AS `sum(sales)` AS sales", - tcs.getAggCols().get(0).toString() - ); - Assertions.assertEquals("product", tcs.getDivByInsts().get(0)); + // expected aggregations + AggregateFunction aggregateFunction = new AggregateFunction(ctx); + ColumnNode aggColNode = (ColumnNode) aggregateFunction.visitAggregateMethodSum(aggContext); // "sum(sales)" aggregation + Column aggCol = aggColNode.getColumn().as(rename); // "as sales" + List aggCols = new ArrayList<>(); + aggCols.add(aggCol); + + // expected span + CalendarInterval ival = new CalendarInterval(0, 0, 5 * 60 * 1000 * 1000); + Column span = functions.window(new Column("_time"), String.valueOf(ival), "5 minutes", "0 minutes"); + + TimechartStep expected = new TimechartStep(aggCols, divByInsts, span); + + Assertions.assertEquals(expected, tcs); } @Test void testTimeChartTranslation_NoByClause() { - String query = "| timechart span=5min sum(sales) as sales"; + String rename = "sales"; + String query = "| timechart span=5min sum(sales) as " + rename; + + // create parse tree with PTH-03 CharStream inputStream = CharStreams.fromString(query); DPLLexer lexer = new DPLLexer(inputStream); DPLParser parser = new DPLParser(new CommonTokenStream(lexer)); ParseTree tree = parser.root(); - LOGGER.debug(tree.toStringTree(parser)); + + DPLParser.AggregateMethodSumContext aggContext = (DPLParser.AggregateMethodSumContext) tree + .getChild(1) + .getChild(0) + .getChild(2) + .getChild(0); + DPLParser.TimechartTransformationContext timechartContext = (DPLParser.TimechartTransformationContext) tree + .getChild(1) + .getChild(0); DPLParserCatalystContext ctx = new DPLParserCatalystContext(null); ctx.setEarliest("-1w"); - DPLParserCatalystVisitor visitor = new DPLParserCatalystVisitor(ctx); - - TimechartTransformation tct = new TimechartTransformation(ctx, visitor); - tct.visitTimechartTransformation((DPLParser.TimechartTransformationContext) tree.getChild(1).getChild(0)); - TimechartStep tcs = tct.timechartStep; - - Assertions.assertEquals("window(_time, 300000000, 300000000, 0) AS window", tcs.getSpan().toString()); - Assertions - .assertEquals( - "sumaggregator(encodeusingserializer(input[0, java.lang.Object, true], false) AS value, decodeusingserializer(input[0, binary, true], com.teragrep.pth10.ast.commands.aggregate.UDAFs.BufferClasses.SumBuffer, false), staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false, true)) AS `sum(sales)` AS sales", - tcs.getAggCols().get(0).toString() - ); - Assertions.assertEquals(0, tcs.getDivByInsts().size()); + + // traverse the tree in PTH-10 and create TimechartStep + TimechartTransformation tct = new TimechartTransformation(ctx); + StepNode timechartNode = (StepNode) tct.visitTimechartTransformation(timechartContext); + AbstractStep tcs = timechartNode.get(); + + // expected aggregations + AggregateFunction aggregateFunction = new AggregateFunction(ctx); + ColumnNode aggColNode = (ColumnNode) aggregateFunction.visitAggregateMethodSum(aggContext); // "sum(sales)" aggregation + Column aggCol = aggColNode.getColumn().as(rename); // "as sales" + List aggCols = new ArrayList<>(); + aggCols.add(aggCol); + + // expected span + CalendarInterval ival = new CalendarInterval(0, 0, 5 * 60 * 1000 * 1000); + Column span = functions.window(new Column("_time"), String.valueOf(ival), "5 minutes", "0 minutes"); + + TimechartStep expected = new TimechartStep(aggCols, new ArrayList<>(), span); + + Assertions.assertEquals(expected, tcs); } @Test void testTimeChartTranslationBasic() { String query = "| timechart count"; + + // create parse tree with PTH-03 CharStream inputStream = CharStreams.fromString(query); DPLLexer lexer = new DPLLexer(inputStream); DPLParser parser = new DPLParser(new CommonTokenStream(lexer)); ParseTree tree = parser.root(); - LOGGER.debug(tree.toStringTree(parser)); + DPLParser.AggregateFunctionContext aggContext = (DPLParser.AggregateFunctionContext) tree + .getChild(1) + .getChild(0) + .getChild(1); DPLParserCatalystContext ctx = new DPLParserCatalystContext(null); ctx.setEarliest("-1w"); - DPLParserCatalystVisitor visitor = new DPLParserCatalystVisitor(ctx); - - TimechartTransformation tct = new TimechartTransformation(ctx, visitor); - tct.visitTimechartTransformation((DPLParser.TimechartTransformationContext) tree.getChild(1).getChild(0)); - TimechartStep tcs = tct.timechartStep; - - Assertions.assertEquals("window(_time, 86400000000, 86400000000, 0) AS window", tcs.getSpan().toString()); - Assertions - .assertEquals( - "countaggregator(input[0, java.lang.Long, true].longValue AS value, staticinvoke(class java.lang.Long, ObjectType(class java.lang.Long), valueOf, input[0, bigint, true], true, false, true), input[0, java.lang.Long, true].longValue) AS count", - tcs.getAggCols().get(0).toString() - ); - Assertions.assertEquals(0, tcs.getDivByInsts().size()); + + // traverse the tree in PTH-10 and create TimechartStep + TimechartTransformation tct = new TimechartTransformation(ctx); + StepNode timechartNode = (StepNode) tct + .visitTimechartTransformation((DPLParser.TimechartTransformationContext) tree.getChild(1).getChild(0)); + AbstractStep tcs = timechartNode.get(); + + // expected aggregations + AggregateFunction aggregateFunction = new AggregateFunction(ctx); + ColumnNode aggColNode = (ColumnNode) aggregateFunction.visitAggregateFunction(aggContext); // "count" aggregation + List aggCols = new ArrayList<>(); + aggCols.add(aggColNode.getColumn()); + + // expected default span of 1 day when "span=" parameter is not specified + CalendarInterval ival = new CalendarInterval(0, 1, 0); + Column span = functions.window(new Column("_time"), String.valueOf(ival), "1 day", "0 minutes"); + + TimechartStep expected = new TimechartStep(aggCols, new ArrayList<>(), span); + + Assertions.assertEquals(expected, tcs); } }