From 1e7634f6585c98305eea1ff59fa22b332966a993 Mon Sep 17 00:00:00 2001 From: Rasmus Jokinen <146736881+51-code@users.noreply.github.com> Date: Wed, 29 Jan 2025 09:25:08 +0200 Subject: [PATCH 1/5] Refactor TimechartStep --- .../TimechartTransformation.java | 44 ++---- .../timechart/AbstractTimechartStep.java | 114 -------------- .../pth10/steps/timechart/TimechartStep.java | 40 ++++- .../pth10/translationTests/TimechartTest.java | 144 +++++++++++++----- 4 files changed, 145 insertions(+), 197 deletions(-) delete mode 100644 src/main/java/com/teragrep/pth10/steps/timechart/AbstractTimechartStep.java diff --git a/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/TimechartTransformation.java b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/TimechartTransformation.java index 2ce11d7a1..67cb3aef7 100644 --- a/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/TimechartTransformation.java +++ b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/TimechartTransformation.java @@ -70,31 +70,21 @@ * Provides a pivoted dataset, making it easier to form time-field graphs in the UI *
Dataset.groupBy("_time").pivot(aggregateField).sum(fieldname)
*/ -public class TimechartTransformation extends DPLParserBaseVisitor { +public final class TimechartTransformation extends DPLParserBaseVisitor { private static final Logger LOGGER = LoggerFactory.getLogger(TimechartTransformation.class); - private DPLParserCatalystContext catCtx = null; - private DPLParserCatalystVisitor catVisitor; - private Document doc; + private final DPLParserCatalystContext catCtx; + private final Document doc; - EvalTransformation evalTransformation; - AggregateFunction aggregateFunction; - private String aggregateField = null; + private final AggregateFunction aggregateFunction; + private String aggregateField; - public TimechartStep timechartStep = null; - - public TimechartTransformation(DPLParserCatalystContext catCtx, DPLParserCatalystVisitor catVisitor) { + public TimechartTransformation(final DPLParserCatalystContext catCtx, final DPLParserCatalystVisitor catVisitor) { this.doc = null; this.catCtx = catCtx; - this.catVisitor = catVisitor; - this.evalTransformation = new EvalTransformation(catCtx); this.aggregateFunction = new AggregateFunction(catCtx); } - public String getAggregateField() { - return this.aggregateField; - } - /** * timechartTransformation : COMMAND_MODE_TIMECHART (t_timechart_sepParameter)? (t_timechart_formatParameter)? * (t_timechart_fixedrangeParameter)? (t_timechart_partialParameter)? (t_timechart_contParameter)? @@ -108,9 +98,7 @@ public Node visitTimechartTransformation(DPLParser.TimechartTransformationContex } private Node timechartTransformationEmitCatalyst(DPLParser.TimechartTransformationContext ctx) { - this.timechartStep = new TimechartStep(); - - Column span = null; + Column span = createDefaultSpan(); if (ctx.t_timechart_binOptParameter() != null && !ctx.t_timechart_binOptParameter().isEmpty()) { LOGGER.info("Timechart Optional parameters: <[{}]>", ctx.t_timechart_binOptParameter().get(0).getText()); @@ -152,26 +140,16 @@ else if (child instanceof DPLParser.T_timechart_fieldRenameInstructionContext) { } listOfAggFunCols.add(funCol); // need to add last one; for loop above only adds if there's a new one coming - if (span == null) { - span = createDefaultSpan(); - } - - timechartStep.setHdfsPath(this.catVisitor.getHdfsPath()); - timechartStep.setCatCtx(catCtx); - timechartStep.setSpan(span); - timechartStep.setAggCols(listOfAggFunCols); - timechartStep.setDivByInsts(listOfDivideByInst); + TimechartStep timechartStep = new TimechartStep(listOfAggFunCols, listOfDivideByInst, span); // span this.catCtx.setTimeChartSpanSeconds(getSpanSeconds(span)); - LOGGER.debug("span= <[{}]>", timechartStep.getSpan().toString()); - LOGGER.debug("aggcols= <[{}]>", Arrays.toString(timechartStep.getAggCols().toArray())); - LOGGER.debug("divby= <[{}]>", Arrays.toString(timechartStep.getDivByInsts().toArray())); + LOGGER.debug("span= <[{}]>", span); + LOGGER.debug("aggcols= <[{}]>", listOfAggFunCols); + LOGGER.debug("divby= <[{}]>", listOfDivideByInst); return new StepNode(timechartStep); - - //throw new RuntimeException("Chart transformation operation not supported yet"); } /** diff --git a/src/main/java/com/teragrep/pth10/steps/timechart/AbstractTimechartStep.java b/src/main/java/com/teragrep/pth10/steps/timechart/AbstractTimechartStep.java deleted file mode 100644 index 28bf1c8aa..000000000 --- a/src/main/java/com/teragrep/pth10/steps/timechart/AbstractTimechartStep.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Teragrep Data Processing Language (DPL) translator for Apache Spark (pth_10) - * Copyright (C) 2019-2024 Suomen Kanuuna Oy - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - * - * - * Additional permission under GNU Affero General Public License version 3 - * section 7 - * - * If you modify this Program, or any covered work, by linking or combining it - * with other code, such other code is not for that reason alone subject to any - * of the requirements of the GNU Affero GPL version 3 as long as this Program - * is the same Program as licensed from Suomen Kanuuna Oy without any additional - * modifications. - * - * Supplemented terms under GNU Affero General Public License version 3 - * section 7 - * - * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified - * versions must be marked as "Modified version of" The Program. - * - * Names of the licensors and authors may not be used for publicity purposes. - * - * No rights are granted for use of trade names, trademarks, or service marks - * which are in The Program if any. - * - * Licensee must indemnify licensors and authors for any liability that these - * contractual assumptions impose on licensors and authors. - * - * To the extent this program is licensed as part of the Commercial versions of - * Teragrep, the applicable Commercial License may apply to this file if you as - * a licensee so wish it. - */ -package com.teragrep.pth10.steps.timechart; - -import com.teragrep.pth10.ast.DPLParserCatalystContext; -import com.teragrep.pth10.steps.AbstractStep; -import org.apache.spark.sql.Column; - -import java.util.List; - -public abstract class AbstractTimechartStep extends AbstractStep { - - protected List aggCols = null; - protected List divByInsts = null; - protected Column span = null; - protected String aggregateField = null; - protected DPLParserCatalystContext catCtx = null; - protected String hdfsPath = null; - - public AbstractTimechartStep() { - super(); - } - - public void setAggCols(List aggCols) { - this.aggCols = aggCols; - } - - public List getAggCols() { - return aggCols; - } - - public void setDivByInsts(List divByInsts) { - this.divByInsts = divByInsts; - } - - public List getDivByInsts() { - return divByInsts; - } - - public void setAggregateField(String field) { - this.aggregateField = field; - } - - public String getAggregateField() { - return this.aggregateField; - } - - public void setSpan(Column span) { - this.span = span; - } - - public Column getSpan() { - return this.span; - } - - public void setCatCtx(DPLParserCatalystContext catCtx) { - this.catCtx = catCtx; - } - - public DPLParserCatalystContext getCatCtx() { - return catCtx; - } - - public void setHdfsPath(String hdfsPath) { - this.hdfsPath = hdfsPath; - } - - public String getHdfsPath() { - return hdfsPath; - } -} diff --git a/src/main/java/com/teragrep/pth10/steps/timechart/TimechartStep.java b/src/main/java/com/teragrep/pth10/steps/timechart/TimechartStep.java index 9849094da..06bbdd1f5 100644 --- a/src/main/java/com/teragrep/pth10/steps/timechart/TimechartStep.java +++ b/src/main/java/com/teragrep/pth10/steps/timechart/TimechartStep.java @@ -45,18 +45,27 @@ */ package com.teragrep.pth10.steps.timechart; +import com.teragrep.pth10.steps.AbstractStep; import org.apache.spark.sql.*; import scala.collection.JavaConversions; import scala.collection.Seq; import java.util.ArrayList; import java.util.List; +import java.util.Objects; import java.util.stream.Collectors; -public final class TimechartStep extends AbstractTimechartStep { +public final class TimechartStep extends AbstractStep { - public TimechartStep() { + private final List aggCols; + private final List divByInsts; + private final Column span; + + public TimechartStep(final List aggCols, final List divByInsts, final Column span) { super(); + this.aggCols = aggCols; + this.divByInsts = divByInsts; + this.span = span; this.properties.add(CommandProperty.AGGREGATE); } @@ -66,18 +75,17 @@ public Dataset get(Dataset dataset) { return null; } - if (this.getAggCols() == null || this.getAggCols().isEmpty()) { + if (aggCols.isEmpty()) { throw new RuntimeException("Aggregate columns not present in TimechartStep, cannot proceed"); } // .agg has funky arguments; just giving a Seq of columns is no good, first arg needs to be a column - Column firstAggCol = this.aggCols.get(0); - Seq seqOfAggColsExceptFirst = JavaConversions - .asScalaBuffer(this.aggCols.subList(1, this.aggCols.size())); + Column firstAggCol = aggCols.get(0); + Seq seqOfAggColsExceptFirst = JavaConversions.asScalaBuffer(aggCols.subList(1, aggCols.size())); List allGroupBys = new ArrayList<>(); - allGroupBys.add(this.span); - allGroupBys.addAll(this.divByInsts.stream().map(functions::col).collect(Collectors.toList())); + allGroupBys.add(span); + allGroupBys.addAll(divByInsts.stream().map(functions::col).collect(Collectors.toList())); Seq seqOfAllGroupBys = JavaConversions.asScalaBuffer(allGroupBys); @@ -90,4 +98,20 @@ public Dataset get(Dataset dataset) { .drop("window") .orderBy("_time"); } + + @Override + public boolean equals(final Object o) { + if (o == null || getClass() != o.getClass()) { + return false; + } + final TimechartStep that = (TimechartStep) o; + // Column.equals() doesn't work, using String representation instead to see that the same operations are done on both + return Objects.equals(aggCols.toString(), that.aggCols.toString()) + && Objects.equals(divByInsts, that.divByInsts) && Objects.equals(span.toString(), that.span.toString()); + } + + @Override + public int hashCode() { + return Objects.hash(aggCols, divByInsts, span); + } } diff --git a/src/test/java/com/teragrep/pth10/translationTests/TimechartTest.java b/src/test/java/com/teragrep/pth10/translationTests/TimechartTest.java index 4a3942555..045b9aea4 100644 --- a/src/test/java/com/teragrep/pth10/translationTests/TimechartTest.java +++ b/src/test/java/com/teragrep/pth10/translationTests/TimechartTest.java @@ -47,7 +47,11 @@ import com.teragrep.pth10.ast.DPLParserCatalystContext; import com.teragrep.pth10.ast.DPLParserCatalystVisitor; +import com.teragrep.pth10.ast.bo.ColumnNode; +import com.teragrep.pth10.ast.bo.StepNode; +import com.teragrep.pth10.ast.commands.aggregate.AggregateFunction; import com.teragrep.pth10.ast.commands.transformstatement.TimechartTransformation; +import com.teragrep.pth10.steps.AbstractStep; import com.teragrep.pth10.steps.timechart.TimechartStep; import com.teragrep.pth_03.antlr.DPLLexer; import com.teragrep.pth_03.antlr.DPLParser; @@ -55,95 +59,151 @@ import com.teragrep.pth_03.shaded.org.antlr.v4.runtime.CharStreams; import com.teragrep.pth_03.shaded.org.antlr.v4.runtime.CommonTokenStream; import com.teragrep.pth_03.shaded.org.antlr.v4.runtime.tree.ParseTree; +import org.apache.spark.sql.Column; +import org.apache.spark.sql.functions; +import org.apache.spark.unsafe.types.CalendarInterval; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; @TestInstance(TestInstance.Lifecycle.PER_CLASS) public class TimechartTest { - private static final Logger LOGGER = LoggerFactory.getLogger(TimechartTest.class); - @Test void testTimeChartTranslation() { - String query = "| timechart span=5min sum(sales) as sales by product"; + String rename = "sales"; + String byField = "product"; + String query = "| timechart span=5min sum(sales) as " + rename + " by " + byField; + + DPLParserCatalystContext ctx = new DPLParserCatalystContext(null); + + // create parse tree with PTH-03 CharStream inputStream = CharStreams.fromString(query); DPLLexer lexer = new DPLLexer(inputStream); DPLParser parser = new DPLParser(new CommonTokenStream(lexer)); ParseTree tree = parser.root(); - LOGGER.debug(tree.toStringTree(parser)); + DPLParser.AggregateMethodSumContext aggContext = (DPLParser.AggregateMethodSumContext) tree + .getChild(1) + .getChild(0) + .getChild(2) + .getChild(0); + DPLParser.TimechartTransformationContext timechartContext = (DPLParser.TimechartTransformationContext) tree + .getChild(1) + .getChild(0); - DPLParserCatalystContext ctx = new DPLParserCatalystContext(null); - // Use this file for dataset initialization - String testFile = "src/test/resources/timechartTestData.jsonl"; ctx.setEarliest("-1w"); DPLParserCatalystVisitor visitor = new DPLParserCatalystVisitor(ctx); + // traverse the tree in PTH-10 and create TimechartStep TimechartTransformation tct = new TimechartTransformation(ctx, visitor); - tct.visitTimechartTransformation((DPLParser.TimechartTransformationContext) tree.getChild(1).getChild(0)); - TimechartStep tcs = tct.timechartStep; - - Assertions.assertEquals("window(_time, 300000000, 300000000, 0) AS window", tcs.getSpan().toString()); - Assertions - .assertEquals( - "sumaggregator(encodeusingserializer(input[0, java.lang.Object, true], false) AS value, decodeusingserializer(input[0, binary, true], com.teragrep.pth10.ast.commands.aggregate.UDAFs.BufferClasses.SumBuffer, false), staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false, true)) AS `sum(sales)` AS sales", - tcs.getAggCols().get(0).toString() - ); - Assertions.assertEquals("product", tcs.getDivByInsts().get(0)); + StepNode timechartNode = (StepNode) tct.visitTimechartTransformation(timechartContext); + AbstractStep tcs = timechartNode.get(); + + // expected BY clause + List divByInsts = new ArrayList<>(); + divByInsts.add(byField); + + // expected aggregations + AggregateFunction aggregateFunction = new AggregateFunction(ctx); + ColumnNode aggColNode = (ColumnNode) aggregateFunction.visitAggregateMethodSum(aggContext); // "sum(sales)" aggregation + Column aggCol = aggColNode.getColumn().as(rename); // "as sales" + List aggCols = new ArrayList<>(); + aggCols.add(aggCol); + + // expected span + CalendarInterval ival = new CalendarInterval(0, 0, 5 * 60 * 1000 * 1000); + Column span = functions.window(new Column("_time"), String.valueOf(ival), "5 minutes", "0 minutes"); + + TimechartStep expected = new TimechartStep(aggCols, divByInsts, span); + + Assertions.assertEquals(expected, tcs); } @Test void testTimeChartTranslation_NoByClause() { - String query = "| timechart span=5min sum(sales) as sales"; + String rename = "sales"; + String query = "| timechart span=5min sum(sales) as " + rename; + + // create parse tree with PTH-03 CharStream inputStream = CharStreams.fromString(query); DPLLexer lexer = new DPLLexer(inputStream); DPLParser parser = new DPLParser(new CommonTokenStream(lexer)); ParseTree tree = parser.root(); - LOGGER.debug(tree.toStringTree(parser)); + + DPLParser.AggregateMethodSumContext aggContext = (DPLParser.AggregateMethodSumContext) tree + .getChild(1) + .getChild(0) + .getChild(2) + .getChild(0); + DPLParser.TimechartTransformationContext timechartContext = (DPLParser.TimechartTransformationContext) tree + .getChild(1) + .getChild(0); DPLParserCatalystContext ctx = new DPLParserCatalystContext(null); ctx.setEarliest("-1w"); DPLParserCatalystVisitor visitor = new DPLParserCatalystVisitor(ctx); + // traverse the tree in PTH-10 and create TimechartStep TimechartTransformation tct = new TimechartTransformation(ctx, visitor); - tct.visitTimechartTransformation((DPLParser.TimechartTransformationContext) tree.getChild(1).getChild(0)); - TimechartStep tcs = tct.timechartStep; - - Assertions.assertEquals("window(_time, 300000000, 300000000, 0) AS window", tcs.getSpan().toString()); - Assertions - .assertEquals( - "sumaggregator(encodeusingserializer(input[0, java.lang.Object, true], false) AS value, decodeusingserializer(input[0, binary, true], com.teragrep.pth10.ast.commands.aggregate.UDAFs.BufferClasses.SumBuffer, false), staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false, true)) AS `sum(sales)` AS sales", - tcs.getAggCols().get(0).toString() - ); - Assertions.assertEquals(0, tcs.getDivByInsts().size()); + StepNode timechartNode = (StepNode) tct.visitTimechartTransformation(timechartContext); + AbstractStep tcs = timechartNode.get(); + + // expected aggregations + AggregateFunction aggregateFunction = new AggregateFunction(ctx); + ColumnNode aggColNode = (ColumnNode) aggregateFunction.visitAggregateMethodSum(aggContext); // "sum(sales)" aggregation + Column aggCol = aggColNode.getColumn().as(rename); // "as sales" + List aggCols = new ArrayList<>(); + aggCols.add(aggCol); + + // expected span + CalendarInterval ival = new CalendarInterval(0, 0, 5 * 60 * 1000 * 1000); + Column span = functions.window(new Column("_time"), String.valueOf(ival), "5 minutes", "0 minutes"); + + TimechartStep expected = new TimechartStep(aggCols, new ArrayList<>(), span); + + Assertions.assertEquals(expected, tcs); } @Test void testTimeChartTranslationBasic() { String query = "| timechart count"; + + // create parse tree with PTH-03 CharStream inputStream = CharStreams.fromString(query); DPLLexer lexer = new DPLLexer(inputStream); DPLParser parser = new DPLParser(new CommonTokenStream(lexer)); ParseTree tree = parser.root(); - LOGGER.debug(tree.toStringTree(parser)); + DPLParser.AggregateFunctionContext aggContext = (DPLParser.AggregateFunctionContext) tree + .getChild(1) + .getChild(0) + .getChild(1); DPLParserCatalystContext ctx = new DPLParserCatalystContext(null); ctx.setEarliest("-1w"); DPLParserCatalystVisitor visitor = new DPLParserCatalystVisitor(ctx); + // traverse the tree in PTH-10 and create TimechartStep TimechartTransformation tct = new TimechartTransformation(ctx, visitor); - tct.visitTimechartTransformation((DPLParser.TimechartTransformationContext) tree.getChild(1).getChild(0)); - TimechartStep tcs = tct.timechartStep; - - Assertions.assertEquals("window(_time, 86400000000, 86400000000, 0) AS window", tcs.getSpan().toString()); - Assertions - .assertEquals( - "countaggregator(input[0, java.lang.Long, true].longValue AS value, staticinvoke(class java.lang.Long, ObjectType(class java.lang.Long), valueOf, input[0, bigint, true], true, false, true), input[0, java.lang.Long, true].longValue) AS count", - tcs.getAggCols().get(0).toString() - ); - Assertions.assertEquals(0, tcs.getDivByInsts().size()); + StepNode timechartNode = (StepNode) tct + .visitTimechartTransformation((DPLParser.TimechartTransformationContext) tree.getChild(1).getChild(0)); + AbstractStep tcs = timechartNode.get(); + + // expected aggregations + AggregateFunction aggregateFunction = new AggregateFunction(ctx); + ColumnNode aggColNode = (ColumnNode) aggregateFunction.visitAggregateFunction(aggContext); // "count" aggregation + List aggCols = new ArrayList<>(); + aggCols.add(aggColNode.getColumn()); + + // expected default span of 1 day when "span=" parameter is not specified + CalendarInterval ival = new CalendarInterval(0, 1, 0); + Column span = functions.window(new Column("_time"), String.valueOf(ival), "1 day", "0 minutes"); + + TimechartStep expected = new TimechartStep(aggCols, new ArrayList<>(), span); + + Assertions.assertEquals(expected, tcs); } } From 99461c536c7dcefd31189c8ce7b461872d17f340 Mon Sep 17 00:00:00 2001 From: Rasmus Jokinen <146736881+51-code@users.noreply.github.com> Date: Wed, 29 Jan 2025 12:52:40 +0200 Subject: [PATCH 2/5] Refactor TimechartTransformation --- .../TimechartTransformation.java | 137 ++++-------------- .../TransformStatement.java | 2 +- .../pth10/translationTests/TimechartTest.java | 11 +- 3 files changed, 32 insertions(+), 118 deletions(-) diff --git a/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/TimechartTransformation.java b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/TimechartTransformation.java index 67cb3aef7..7333cfb28 100644 --- a/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/TimechartTransformation.java +++ b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/TimechartTransformation.java @@ -47,19 +47,15 @@ import com.teragrep.pth10.ast.*; import com.teragrep.pth10.ast.bo.*; -import com.teragrep.pth10.ast.bo.Token.Type; import com.teragrep.pth10.ast.commands.aggregate.AggregateFunction; import com.teragrep.pth10.steps.timechart.TimechartStep; import com.teragrep.pth_03.antlr.DPLParser; import com.teragrep.pth_03.antlr.DPLParserBaseVisitor; -import com.teragrep.pth_03.shaded.org.antlr.v4.runtime.tree.ParseTree; import org.apache.spark.sql.Column; import org.apache.spark.sql.functions; import org.apache.spark.unsafe.types.CalendarInterval; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.w3c.dom.Document; -import org.w3c.dom.Element; import java.util.*; import java.util.regex.Matcher; @@ -67,20 +63,20 @@ /** * Class that contains the visitor methods for the timechart command
- * Provides a pivoted dataset, making it easier to form time-field graphs in the UI - *
Dataset.groupBy("_time").pivot(aggregateField).sum(fieldname)
*/ public final class TimechartTransformation extends DPLParserBaseVisitor { private static final Logger LOGGER = LoggerFactory.getLogger(TimechartTransformation.class); private final DPLParserCatalystContext catCtx; - private final Document doc; private final AggregateFunction aggregateFunction; - private String aggregateField; - public TimechartTransformation(final DPLParserCatalystContext catCtx, final DPLParserCatalystVisitor catVisitor) { - this.doc = null; + // fields set in visit functions + private Column span; + private final ArrayList aggFunCols = new ArrayList<>(); + private final ArrayList divByInsts = new ArrayList<>(); + + public TimechartTransformation(final DPLParserCatalystContext catCtx) { this.catCtx = catCtx; this.aggregateFunction = new AggregateFunction(catCtx); } @@ -98,56 +94,18 @@ public Node visitTimechartTransformation(DPLParser.TimechartTransformationContex } private Node timechartTransformationEmitCatalyst(DPLParser.TimechartTransformationContext ctx) { - Column span = createDefaultSpan(); - - if (ctx.t_timechart_binOptParameter() != null && !ctx.t_timechart_binOptParameter().isEmpty()) { - LOGGER.info("Timechart Optional parameters: <[{}]>", ctx.t_timechart_binOptParameter().get(0).getText()); - - ColumnNode spanNode = (ColumnNode) visit(ctx.t_timechart_binOptParameter().get(0)); - if (spanNode != null) { - span = spanNode.getColumn(); - } - } + span = createDefaultSpan(); - Column funCol = null; - List listOfAggFunCols = new ArrayList<>(); - List listOfDivideByInst = new ArrayList<>(); - for (int i = 0; i < ctx.getChildCount(); i++) { - ParseTree child = ctx.getChild(i); - - if (child instanceof DPLParser.AggregateFunctionContext) { - // go through each agg. function - DPLParser.AggregateFunctionContext aggFunCtx = (DPLParser.AggregateFunctionContext) child; - Node funNode = visit(aggFunCtx); - if (funNode != null) { - if (funCol != null) { - listOfAggFunCols.add(funCol); - } - funCol = ((ColumnNode) funNode).getColumn(); - } - } - else if (child instanceof DPLParser.T_timechart_divideByInstructionContext) { - String divByInst = ((StringNode) visitT_timechart_divideByInstruction( - (DPLParser.T_timechart_divideByInstructionContext) child - )).toString(); - listOfDivideByInst.add(divByInst); - } - else if (child instanceof DPLParser.T_timechart_fieldRenameInstructionContext) { - if (funCol != null) { - funCol = funCol.as(visit(child).toString()); - } - } - } - listOfAggFunCols.add(funCol); // need to add last one; for loop above only adds if there's a new one coming + visitChildren(ctx); // visit all the parameters - TimechartStep timechartStep = new TimechartStep(listOfAggFunCols, listOfDivideByInst, span); + TimechartStep timechartStep = new TimechartStep(aggFunCols, divByInsts, span); // span this.catCtx.setTimeChartSpanSeconds(getSpanSeconds(span)); LOGGER.debug("span= <[{}]>", span); - LOGGER.debug("aggcols= <[{}]>", listOfAggFunCols); - LOGGER.debug("divby= <[{}]>", listOfDivideByInst); + LOGGER.debug("aggcols= <[{}]>", aggFunCols); + LOGGER.debug("divby= <[{}]>", divByInsts); return new StepNode(timechartStep); } @@ -187,47 +145,35 @@ else if (isWithinNumber && spanChar != ' ') { @Override public Node visitAggregateFunction(DPLParser.AggregateFunctionContext ctx) { - Node rv = aggregateFunction.visitAggregateFunction(ctx); - if (aggregateField == null) - aggregateField = aggregateFunction.getAggregateField(); - return aggregateFunction.visitAggregateFunction(ctx); + ColumnNode aggCol = (ColumnNode) aggregateFunction.visitAggregateFunction(ctx); + aggFunCols.add(aggCol.getColumn()); + return new NullNode(); } @Override public Node visitT_timechart_divideByInstruction(DPLParser.T_timechart_divideByInstructionContext ctx) { - // LOGGER.info(ctx.getChildCount()+"--visitT_chart_divideByInstruction incoming{}", ctx.getText()); - if (ctx.getChildCount() == 0) { - return null; - } - String target = ctx.getChild(1).getChild(0).toString(); + String field = ctx.fieldType().getChild(0).toString(); + divByInsts.add(field); - if (doc != null) { - Element el = doc.createElement("divideBy"); - el.setAttribute("field", target); - return new ElementNode(el); - } - else { - return new StringNode(new Token(Type.STRING, target)); - } + return new NullNode(); } @Override public Node visitT_timechart_fieldRenameInstruction(DPLParser.T_timechart_fieldRenameInstructionContext ctx) { - String field = ctx.getChild(1).getText(); - if (doc != null) { - Element el = doc.createElement("fieldRename"); - el.setAttribute("field", field); - return new ElementNode(el); - } - else { - return new StringNode(new Token(Type.STRING, field)); + String rename = ctx.getChild(1).getText(); + if (!aggFunCols.isEmpty()) { + Column latestAgg = aggFunCols.remove(aggFunCols.size() - 1); + aggFunCols.add(latestAgg.as(rename)); // rename the newest visited aggregation column } + + return new NullNode(); } @Override public Node visitT_timechart_binOptParameter(DPLParser.T_timechart_binOptParameterContext ctx) { LOGGER.info("visitT_timechart_binOptParameter:<{}>", ctx.getText()); - return visitChildren(ctx); + span = ((ColumnNode) visitChildren(ctx)).getColumn(); + return new NullNode(); } @Override @@ -246,10 +192,9 @@ public Node visitT_timechart_binSpanParameter(DPLParser.T_timechart_binSpanParam * @return */ private Column createDefaultSpan() { - long sec = 0; + final long sec; + final String duration; TimeRange tr = TimeRange.ONE_HOUR; - String duration = "1 days"; // Default duration - // LOGGER.info("createDefaultSpan="+catCtx.getTimeRange()); DPLParserConfig pConf = catCtx.getParserConfig(); if (pConf != null) { tr = pConf.getTimeRange(); @@ -310,7 +255,6 @@ private CalendarInterval getSpanLength(String value) { // default timescale is sec String timescale = "sec"; int numericalValue; - int month = 0; long sec = 0; Pattern p = Pattern.compile("\\d+"); Matcher m = p.matcher(value); @@ -378,7 +322,7 @@ private CalendarInterval getSpanLength(String value) { break; } } - return new CalendarInterval(month, 0, sec * 1000 * 1000L); + return new CalendarInterval(0, 0, sec * 1000 * 1000L); } @Override @@ -461,33 +405,8 @@ public Node visitT_timechart_evaledField(DPLParser.T_timechart_evaledFieldContex return visitChildren(ctx); } - /*@Override public Node visitT_timechart_singleAggregation(DPLParser.T_timechart_singleAggregationContext ctx) { - String oper = ctx.getText(); - String defaultField="*"; - Node rv = null; - Column col = null; - if(oper.equalsIgnoreCase("count") || oper.equalsIgnoreCase("c")) { - aggregateField = "count"; // use default name - col = org.apache.spark.sql.functions.count(defaultField); - // LOGGER.info("T_timechart_singleAggregation (Catalyst):{}", col.expr().sql()+" default field="+defaultField); - traceBuffer.add("Visit AggregateMethodCount(Catalyst):{}", col.expr().sql()); - rv = new ColumnNode(col); - }else { - rv = this.aggregateFunction.visitAggregateFunction(ctx.aggregateFunction()); - this.aggregateField = aggregateFunction.getAggregateField(); - } - // Check whether field needs to be renamed - if(ctx.t_timechart_fieldRenameInstruction() != null){ - Node renameCmd = visitT_timechart_fieldRenameInstruction(ctx.t_timechart_fieldRenameInstruction()); - aggregateField = renameCmd.toString(); - // rv = new ColumnNode(((ColumnNode) rv).getColumn().as(renameCmd.toString())); - } - return rv; - }*/ - @Override public Node visitSpanType(DPLParser.SpanTypeContext ctx) { - // LOGGER.info("visitSpanType:"+ctx.getText()); return visitChildren(ctx); } diff --git a/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/TransformStatement.java b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/TransformStatement.java index 813878588..1db84020f 100644 --- a/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/TransformStatement.java +++ b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/TransformStatement.java @@ -176,7 +176,7 @@ public Node visitChartTransformation(DPLParser.ChartTransformationContext ctx) { @Override public Node visitTimechartTransformation(DPLParser.TimechartTransformationContext ctx) { // timechart command - return new TimechartTransformation(catCtx, catVisitor).visitTimechartTransformation(ctx); + return new TimechartTransformation(catCtx).visitTimechartTransformation(ctx); } @Override diff --git a/src/test/java/com/teragrep/pth10/translationTests/TimechartTest.java b/src/test/java/com/teragrep/pth10/translationTests/TimechartTest.java index 045b9aea4..c9c1abd46 100644 --- a/src/test/java/com/teragrep/pth10/translationTests/TimechartTest.java +++ b/src/test/java/com/teragrep/pth10/translationTests/TimechartTest.java @@ -46,7 +46,6 @@ package com.teragrep.pth10.translationTests; import com.teragrep.pth10.ast.DPLParserCatalystContext; -import com.teragrep.pth10.ast.DPLParserCatalystVisitor; import com.teragrep.pth10.ast.bo.ColumnNode; import com.teragrep.pth10.ast.bo.StepNode; import com.teragrep.pth10.ast.commands.aggregate.AggregateFunction; @@ -96,10 +95,8 @@ void testTimeChartTranslation() { ctx.setEarliest("-1w"); - DPLParserCatalystVisitor visitor = new DPLParserCatalystVisitor(ctx); - // traverse the tree in PTH-10 and create TimechartStep - TimechartTransformation tct = new TimechartTransformation(ctx, visitor); + TimechartTransformation tct = new TimechartTransformation(ctx); StepNode timechartNode = (StepNode) tct.visitTimechartTransformation(timechartContext); AbstractStep tcs = timechartNode.get(); @@ -145,10 +142,9 @@ void testTimeChartTranslation_NoByClause() { DPLParserCatalystContext ctx = new DPLParserCatalystContext(null); ctx.setEarliest("-1w"); - DPLParserCatalystVisitor visitor = new DPLParserCatalystVisitor(ctx); // traverse the tree in PTH-10 and create TimechartStep - TimechartTransformation tct = new TimechartTransformation(ctx, visitor); + TimechartTransformation tct = new TimechartTransformation(ctx); StepNode timechartNode = (StepNode) tct.visitTimechartTransformation(timechartContext); AbstractStep tcs = timechartNode.get(); @@ -184,10 +180,9 @@ void testTimeChartTranslationBasic() { DPLParserCatalystContext ctx = new DPLParserCatalystContext(null); ctx.setEarliest("-1w"); - DPLParserCatalystVisitor visitor = new DPLParserCatalystVisitor(ctx); // traverse the tree in PTH-10 and create TimechartStep - TimechartTransformation tct = new TimechartTransformation(ctx, visitor); + TimechartTransformation tct = new TimechartTransformation(ctx); StepNode timechartNode = (StepNode) tct .visitTimechartTransformation((DPLParser.TimechartTransformationContext) tree.getChild(1).getChild(0)); AbstractStep tcs = timechartNode.get(); From 3204bf91dcda2e55a1122c1754cc7696f188fcf4 Mon Sep 17 00:00:00 2001 From: Rasmus Jokinen <146736881+51-code@users.noreply.github.com> Date: Mon, 24 Feb 2025 12:15:37 +0200 Subject: [PATCH 3/5] Refactor timechartTransformation to be immutable with span and divByInst, refactored Span parameter parsing --- .../teragrep/pth10/ast/DPLParserConfig.java | 17 +- .../TimechartTransformation.java | 211 +----------------- .../timechart/DivByInstContextValue.java | 74 ++++++ .../timechart/SpanContextValue.java | 114 ++++++++++ .../timechart/span/DefaultSpan.java | 79 +++++++ .../timechart/span/Span.java} | 9 +- .../timechart/span/SpanParameter.java | 77 +++++++ .../timechart/span/TimeRange.java | 152 +++++++++++++ .../pth10/TimechartStreamingTest.java | 15 ++ 9 files changed, 529 insertions(+), 219 deletions(-) create mode 100644 src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/DivByInstContextValue.java create mode 100644 src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/SpanContextValue.java create mode 100644 src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/DefaultSpan.java rename src/main/java/com/teragrep/pth10/ast/{TimeRange.java => commands/transformstatement/timechart/span/Span.java} (92%) create mode 100644 src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/SpanParameter.java create mode 100644 src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/TimeRange.java diff --git a/src/main/java/com/teragrep/pth10/ast/DPLParserConfig.java b/src/main/java/com/teragrep/pth10/ast/DPLParserConfig.java index 0d77751cc..60150adcb 100644 --- a/src/main/java/com/teragrep/pth10/ast/DPLParserConfig.java +++ b/src/main/java/com/teragrep/pth10/ast/DPLParserConfig.java @@ -45,6 +45,7 @@ */ package com.teragrep.pth10.ast; +import com.teragrep.pth10.ast.commands.transformstatement.timechart.span.TimeRange; import com.teragrep.pth10.ast.time.RelativeTimeParser; import com.teragrep.pth10.ast.time.RelativeTimestamp; import org.slf4j.Logger; @@ -66,8 +67,6 @@ public class DPLParserConfig { private Map config = new LinkedHashMap<>(); - TimeRange timeRange = TimeRange.ONE_MONTH; - /** * Get named value from config map * @@ -162,7 +161,7 @@ public void setLatest(String latest) { * @return enum range values 10s,...,1M */ public TimeRange getTimeRange() { - TimeRange rv = TimeRange.ONE_DAY; + TimeRange timeRange = new TimeRange("1 days"); long r = 0; // Earliest set, latest not if (config.get("earliest") != null && config.get("latest") == null) { @@ -181,21 +180,21 @@ else if (config.get("earliest") != null && config.get("latest") != null) { r = abs(r); LOGGER.info("Calculated range=<{}>", r); if (r <= 15 * 60) { - rv = TimeRange.TEN_SECONDS; + timeRange = new TimeRange("10 seconds"); } else if (r <= 60 * 60) { - rv = TimeRange.ONE_MINUTE; + timeRange = new TimeRange("1 minutes"); } else if (r <= 4 * 60 * 60) { - rv = TimeRange.FIVE_MINUTES; + timeRange = new TimeRange("5 minutes"); } else if (r <= 24 * 60 * 60) { - rv = TimeRange.THIRTY_MINUTES; + timeRange = new TimeRange("30 minutes"); } else if (r > 30 * 24 * 60 * 60) { // Default max value is 1 day - rv = TimeRange.ONE_DAY; + timeRange = new TimeRange("1 days"); } - return rv; + return timeRange; } } diff --git a/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/TimechartTransformation.java b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/TimechartTransformation.java index 7333cfb28..ef2cd75bc 100644 --- a/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/TimechartTransformation.java +++ b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/TimechartTransformation.java @@ -48,18 +48,16 @@ import com.teragrep.pth10.ast.*; import com.teragrep.pth10.ast.bo.*; import com.teragrep.pth10.ast.commands.aggregate.AggregateFunction; +import com.teragrep.pth10.ast.commands.transformstatement.timechart.DivByInstContextValue; +import com.teragrep.pth10.ast.commands.transformstatement.timechart.SpanContextValue; import com.teragrep.pth10.steps.timechart.TimechartStep; import com.teragrep.pth_03.antlr.DPLParser; import com.teragrep.pth_03.antlr.DPLParserBaseVisitor; import org.apache.spark.sql.Column; -import org.apache.spark.sql.functions; -import org.apache.spark.unsafe.types.CalendarInterval; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.*; -import java.util.regex.Matcher; -import java.util.regex.Pattern; /** * Class that contains the visitor methods for the timechart command
@@ -72,9 +70,7 @@ public final class TimechartTransformation extends DPLParserBaseVisitor { private final AggregateFunction aggregateFunction; // fields set in visit functions - private Column span; private final ArrayList aggFunCols = new ArrayList<>(); - private final ArrayList divByInsts = new ArrayList<>(); public TimechartTransformation(final DPLParserCatalystContext catCtx) { this.catCtx = catCtx; @@ -94,15 +90,13 @@ public Node visitTimechartTransformation(DPLParser.TimechartTransformationContex } private Node timechartTransformationEmitCatalyst(DPLParser.TimechartTransformationContext ctx) { - span = createDefaultSpan(); + final Column span = new SpanContextValue(ctx.t_timechart_binOptParameter(), catCtx).value(); + final List divByInsts = new DivByInstContextValue(ctx.t_timechart_divideByInstruction()).value(); visitChildren(ctx); // visit all the parameters TimechartStep timechartStep = new TimechartStep(aggFunCols, divByInsts, span); - // span - this.catCtx.setTimeChartSpanSeconds(getSpanSeconds(span)); - LOGGER.debug("span= <[{}]>", span); LOGGER.debug("aggcols= <[{}]>", aggFunCols); LOGGER.debug("divby= <[{}]>", divByInsts); @@ -110,39 +104,6 @@ private Node timechartTransformationEmitCatalyst(DPLParser.TimechartTransformati return new StepNode(timechartStep); } - /** - * Convert span of type Column to the span length in seconds - * - * @param span span of type column - * @return span length in seconds - */ - private long getSpanSeconds(Column span) { - // span column is of type 'timewindow(_time, 60000000, 60000000, 0)' - // get second parameter and convert from microseconds to seconds - // yes, this is terrible but it works so is it really? - char[] spanChars = span.toString().toCharArray(); - boolean isWithinNumber = false; - StringBuilder num = new StringBuilder(); - for (char spanChar : spanChars) { - if (spanChar == ',') { - isWithinNumber = !isWithinNumber; - if (!isWithinNumber) { - break; - } - } - else if (isWithinNumber && spanChar != ' ') { - num.append(spanChar); - } - } - - try { - return Long.parseLong(num.toString()) / 1_000_000L; - } - catch (NumberFormatException nfe) { - throw new RuntimeException("Error converting span column into seconds"); - } - } - @Override public Node visitAggregateFunction(DPLParser.AggregateFunctionContext ctx) { ColumnNode aggCol = (ColumnNode) aggregateFunction.visitAggregateFunction(ctx); @@ -150,14 +111,6 @@ public Node visitAggregateFunction(DPLParser.AggregateFunctionContext ctx) { return new NullNode(); } - @Override - public Node visitT_timechart_divideByInstruction(DPLParser.T_timechart_divideByInstructionContext ctx) { - String field = ctx.fieldType().getChild(0).toString(); - divByInsts.add(field); - - return new NullNode(); - } - @Override public Node visitT_timechart_fieldRenameInstruction(DPLParser.T_timechart_fieldRenameInstructionContext ctx) { String rename = ctx.getChild(1).getText(); @@ -169,162 +122,6 @@ public Node visitT_timechart_fieldRenameInstruction(DPLParser.T_timechart_fieldR return new NullNode(); } - @Override - public Node visitT_timechart_binOptParameter(DPLParser.T_timechart_binOptParameterContext ctx) { - LOGGER.info("visitT_timechart_binOptParameter:<{}>", ctx.getText()); - span = ((ColumnNode) visitChildren(ctx)).getColumn(); - return new NullNode(); - } - - @Override - public Node visitT_timechart_binSpanParameter(DPLParser.T_timechart_binSpanParameterContext ctx) { - LOGGER.info("visitT_timechart_binSpanParameter:<{}>", ctx.getText()); - CalendarInterval ival = getSpanLength(ctx.getChild(1).getText()); - Column col = new Column("_time"); - Column span = functions.window(col, String.valueOf(ival)); - - return new ColumnNode(span); - } - - /** - * Creates a column with default span of one hour - * - * @return - */ - private Column createDefaultSpan() { - final long sec; - final String duration; - TimeRange tr = TimeRange.ONE_HOUR; - DPLParserConfig pConf = catCtx.getParserConfig(); - if (pConf != null) { - tr = pConf.getTimeRange(); - } - switch (tr) { - case TEN_SECONDS: { - sec = 10; - duration = "10 seconds"; - break; - } - case ONE_MINUTE: { - sec = 60; - duration = "1 minutes"; - break; - } - case FIVE_MINUTES: { - sec = 5 * 60; - duration = "5 minutes"; - break; - } - case THIRTY_MINUTES: { - sec = 30 * 60; - duration = "30 minutes"; - break; - } - case ONE_HOUR: { - sec = 3600; - duration = "1 hours"; - break; - } - case ONE_DAY: { - sec = 24 * 3600; - duration = "1 days"; - break; - } - case ONE_MONTH: { - sec = 30 * 24 * 3600; - duration = "30 days"; - break; - } - default: { - throw new RuntimeException("timechart span duration greater that month is not supported"); - } - } - CalendarInterval ival = new CalendarInterval(0, 0, sec * 1000 * 1000); - return functions.window(new Column("_time"), String.valueOf(ival), duration, "0 minutes"); - } - - /** - * Gets the CalendarInterval of string form span - * - * @param value span as string - * @return CalendarInterval - */ - private CalendarInterval getSpanLength(String value) { - // incoming span-length consist of - // [] - // default timescale is sec - String timescale = "sec"; - int numericalValue; - long sec = 0; - Pattern p = Pattern.compile("\\d+"); - Matcher m = p.matcher(value); - if (m.lookingAt()) { - numericalValue = Integer.parseInt(m.group()); - String[] parts = value.split(m.group()); - if (parts.length > 1) - timescale = parts[1].trim(); - } - else { - LOGGER.error("Span length error: missing numerical value:<{}>", value); - throw new RuntimeException("getSpanLength, missing numerical value:" + value); - } - // Calculate value - switch (timescale) { - case "s": - case "sec": - case "secs": - case "second": - case "seconds": - case "S": { - sec = numericalValue; - break; - } - case "m": - case "min": - case "mins": - case "minute": - case "minutes": - case "M": { - sec = numericalValue * 60L; - break; - } - case "h": - case "hr": - case "hrs": - case "hour": - case "hours": - case "H": { - sec = numericalValue * 3600L; - break; - } - case "d": - case "day": - case "days": - case "D": { - sec = numericalValue * 3600L * 24; - break; - } - case "w": - case "week": - case "weeks": - case "W": { - sec = numericalValue * 3600L * 24 * 7; - break; - } - case "mon": - case "month": - case "months": - case "MON": { - //month = numericalValue; - // month is not supported as such, it needs to be changed seconds - // use 30 as default month length - sec = (long) numericalValue * 30 * 24 * 3600; - break; - } - } - return new CalendarInterval(0, 0, sec * 1000 * 1000L); - } - @Override public Node visitT_timechart_binsParameter(DPLParser.T_timechart_binsParameterContext ctx) { return visitChildren(ctx); diff --git a/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/DivByInstContextValue.java b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/DivByInstContextValue.java new file mode 100644 index 000000000..70f46ce6d --- /dev/null +++ b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/DivByInstContextValue.java @@ -0,0 +1,74 @@ +/* + * Teragrep Data Processing Language (DPL) translator for Apache Spark (pth_10) + * Copyright (C) 2019-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.pth10.ast.commands.transformstatement.timechart; + +import com.teragrep.pth10.ast.ContextValue; +import com.teragrep.pth_03.antlr.DPLParser; + +import java.util.ArrayList; +import java.util.List; + +/** + * Parses the divideByInstruction in "| timechart". + */ +public final class DivByInstContextValue implements ContextValue> { + + private final List ctx; + + public DivByInstContextValue(List ctx) { + this.ctx = ctx; + } + + @Override + public List value() { + final List divByFields = new ArrayList<>(); + for (DPLParser.T_timechart_divideByInstructionContext divByInstruction : ctx) { + divByFields.add(divByInstruction.fieldType().getChild(0).toString()); + } + + return divByFields; + } +} diff --git a/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/SpanContextValue.java b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/SpanContextValue.java new file mode 100644 index 000000000..c666f56bf --- /dev/null +++ b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/SpanContextValue.java @@ -0,0 +1,114 @@ +/* + * Teragrep Data Processing Language (DPL) translator for Apache Spark (pth_10) + * Copyright (C) 2019-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.pth10.ast.commands.transformstatement.timechart; + +import com.teragrep.pth10.ast.ContextValue; +import com.teragrep.pth10.ast.DPLParserCatalystContext; + +import com.teragrep.pth10.ast.commands.transformstatement.timechart.span.DefaultSpan; +import com.teragrep.pth10.ast.commands.transformstatement.timechart.span.Span; +import com.teragrep.pth10.ast.commands.transformstatement.timechart.span.SpanParameter; +import com.teragrep.pth10.ast.commands.transformstatement.timechart.span.TimeRange; +import com.teragrep.pth_03.antlr.DPLParser; +import org.apache.spark.sql.Column; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; + +/** + * Parses a Column from the "| timechart" command's span parameter + */ +public final class SpanContextValue implements ContextValue { + + private static final Logger LOGGER = LoggerFactory.getLogger(SpanContextValue.class); + + private final List ctxList; + private final DPLParserCatalystContext catCtx; + private final TimeRange defaultTimeRange; + + public SpanContextValue( + List ctxList, + DPLParserCatalystContext catCtx + ) { + this(ctxList, catCtx, catCtx.getParserConfig().getTimeRange()); + } + + public SpanContextValue( + List ctxList, + DPLParserCatalystContext catCtx, + TimeRange timeRange + ) { + this.ctxList = ctxList; + this.catCtx = catCtx; + this.defaultTimeRange = timeRange; + } + + @Override + public Column value() { + Span span = new DefaultSpan(defaultTimeRange); // uses default if no span parameter is found + TimeRange actualTimeRange = defaultTimeRange; + + boolean spanFound = false; + for (DPLParser.T_timechart_binOptParameterContext binOpt : ctxList) { + DPLParser.T_timechart_binSpanParameterContext spanCtx = binOpt + .t_timechart_spanOptParameter() + .t_timechart_binSpanParameter(); + if (spanCtx != null) { + if (spanFound) { + throw new IllegalArgumentException("| timechart does not allow multiple 'span=' parameters"); + } + LOGGER.info("visiting t_timechart_binSpanParameter:<{}>", spanCtx.getText()); + actualTimeRange = new TimeRange(spanCtx.getChild(1).getText()); + span = new SpanParameter(actualTimeRange); + spanFound = true; + } + } + + this.catCtx.setTimeChartSpanSeconds(actualTimeRange.asSeconds()); + return span.asColumn(); + } +} diff --git a/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/DefaultSpan.java b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/DefaultSpan.java new file mode 100644 index 000000000..91725bbd4 --- /dev/null +++ b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/DefaultSpan.java @@ -0,0 +1,79 @@ +/* + * Teragrep Data Processing Language (DPL) translator for Apache Spark (pth_10) + * Copyright (C) 2019-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.pth10.ast.commands.transformstatement.timechart.span; + +import org.apache.spark.sql.Column; +import org.apache.spark.sql.functions; +import org.apache.spark.unsafe.types.CalendarInterval; + +/** + * Default 'span' for | timechart command + */ +public final class DefaultSpan implements Span { + + private final CalendarInterval ival; + private final Column column; + private final String duration; + + public DefaultSpan(TimeRange timeRange) { + this(timeRange.asSeconds(), timeRange.asString()); + } + + public DefaultSpan(long seconds, String duration) { + this(new CalendarInterval(0, 0, seconds * 1000 * 1000), new Column("_time"), duration); + } + + public DefaultSpan(CalendarInterval ival, Column column, String duration) { + this.ival = ival; + this.column = column; + this.duration = duration; + } + + @Override + public Column asColumn() { + return functions.window(column, String.valueOf(ival), duration, "0 minutes"); + } +} diff --git a/src/main/java/com/teragrep/pth10/ast/TimeRange.java b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/Span.java similarity index 92% rename from src/main/java/com/teragrep/pth10/ast/TimeRange.java rename to src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/Span.java index 8a919ee82..8df9482a5 100644 --- a/src/main/java/com/teragrep/pth10/ast/TimeRange.java +++ b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/Span.java @@ -43,8 +43,11 @@ * Teragrep, the applicable Commercial License may apply to this file if you as * a licensee so wish it. */ -package com.teragrep.pth10.ast; +package com.teragrep.pth10.ast.commands.transformstatement.timechart.span; -public enum TimeRange { - TEN_SECONDS, ONE_MINUTE, FIVE_MINUTES, THIRTY_MINUTES, ONE_HOUR, ONE_DAY, ONE_MONTH +import org.apache.spark.sql.Column; + +public interface Span { + + Column asColumn(); } diff --git a/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/SpanParameter.java b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/SpanParameter.java new file mode 100644 index 000000000..4e3b43272 --- /dev/null +++ b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/SpanParameter.java @@ -0,0 +1,77 @@ +/* + * Teragrep Data Processing Language (DPL) translator for Apache Spark (pth_10) + * Copyright (C) 2019-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.pth10.ast.commands.transformstatement.timechart.span; + +import org.apache.spark.sql.Column; +import org.apache.spark.sql.functions; +import org.apache.spark.unsafe.types.CalendarInterval; + +/** + * | timechart commands 'span=' parameter + */ +public final class SpanParameter implements Span { + + private final Column column; + private final CalendarInterval ival; + + public SpanParameter(TimeRange timeRange) { + this(timeRange.asSeconds()); + } + + public SpanParameter(long seconds) { + this(new Column("_time"), new CalendarInterval(0, 0, seconds * 1000 * 1000L)); + } + + public SpanParameter(Column column, CalendarInterval ival) { + this.column = column; + this.ival = ival; + } + + @Override + public Column asColumn() { + return functions.window(column, String.valueOf(ival)); + } +} diff --git a/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/TimeRange.java b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/TimeRange.java new file mode 100644 index 000000000..2eafdb07e --- /dev/null +++ b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/TimeRange.java @@ -0,0 +1,152 @@ +/* + * Teragrep Data Processing Language (DPL) translator for Apache Spark (pth_10) + * Copyright (C) 2019-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.pth10.ast.commands.transformstatement.timechart.span; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * A time range for | timechart command's 'span=' parameter. Gets the span= parameter from the query and parses it. + */ +public final class TimeRange { + + private static final Logger LOGGER = LoggerFactory.getLogger(TimeRange.class); + + private final String duration; + + public TimeRange(String duration) { + this.duration = duration; + } + + public String asString() { + return duration; + } + + public long asSeconds() { + // incoming span-length consist of [] + // default timescale is sec + String timescale = "sec"; + final int numericalValue; + final Pattern p = Pattern.compile("\\d+"); + final Matcher m = p.matcher(duration); + if (m.lookingAt()) { + numericalValue = Integer.parseInt(m.group()); + final String[] parts = duration.split(m.group()); + if (parts.length > 1) { + timescale = parts[1].trim(); + } + } + else { + LOGGER.error("Span length error: missing numerical value:<{}>", duration); + throw new RuntimeException("| timechart 'span' parameter is missing a numerical value:" + duration); + } + + long sec; + switch (timescale) { + case "s": + case "sec": + case "secs": + case "second": + case "seconds": + case "S": { + sec = numericalValue; + break; + } + case "m": + case "min": + case "mins": + case "minute": + case "minutes": + case "M": { + sec = numericalValue * 60L; + break; + } + case "h": + case "hr": + case "hrs": + case "hour": + case "hours": + case "H": { + sec = numericalValue * 3600L; + break; + } + case "d": + case "day": + case "days": + case "D": { + sec = numericalValue * 3600L * 24; + break; + } + case "w": + case "week": + case "weeks": + case "W": { + sec = numericalValue * 3600L * 24 * 7; + break; + } + case "mon": + case "month": + case "months": + case "MON": { + // month is not supported as such, it needs to be changed to seconds + // use 30 as default month length + sec = (long) numericalValue * 30 * 24 * 3600; + break; + } + default: { + throw new RuntimeException( + "| timechart 'span' parameter only accepts seconds, minutes, hours, days, weeks and months. Got '" + + timescale + "' instead." + ); + } + } + + return sec; + } +} diff --git a/src/test/java/com/teragrep/pth10/TimechartStreamingTest.java b/src/test/java/com/teragrep/pth10/TimechartStreamingTest.java index 82f41e942..5a20f8bc5 100644 --- a/src/test/java/com/teragrep/pth10/TimechartStreamingTest.java +++ b/src/test/java/com/teragrep/pth10/TimechartStreamingTest.java @@ -222,4 +222,19 @@ public void testTimechartBasicCount() { Assertions.assertEquals("10", listOfCount.get(0)); }); } + + @Test + @DisabledIfSystemProperty( + named = "skipSparkTest", + matches = "true" + ) + public void testTimechartMultipleSpans() { + IllegalArgumentException e = streamingTestUtil + .performThrowingDPLTest( + IllegalArgumentException.class, "index=index_A | timechart span=1min span=2min count", testFile, + ds -> { + } + ); + Assertions.assertEquals("| timechart does not allow multiple 'span=' parameters", e.getMessage()); + } } From 88d27ac29b640b7ce7a053153022be801892257a Mon Sep 17 00:00:00 2001 From: Rasmus Jokinen <146736881+51-code@users.noreply.github.com> Date: Mon, 24 Feb 2025 13:16:03 +0200 Subject: [PATCH 4/5] Remove DefaultSpan, SpanParameter can be initialised with default values --- .../timechart/SpanContextValue.java | 8 +- .../timechart/span/DefaultSpan.java | 79 ------------------- .../timechart/span/TimeRange.java | 4 - 3 files changed, 4 insertions(+), 87 deletions(-) delete mode 100644 src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/DefaultSpan.java diff --git a/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/SpanContextValue.java b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/SpanContextValue.java index c666f56bf..d70228b73 100644 --- a/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/SpanContextValue.java +++ b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/SpanContextValue.java @@ -48,7 +48,6 @@ import com.teragrep.pth10.ast.ContextValue; import com.teragrep.pth10.ast.DPLParserCatalystContext; -import com.teragrep.pth10.ast.commands.transformstatement.timechart.span.DefaultSpan; import com.teragrep.pth10.ast.commands.transformstatement.timechart.span.Span; import com.teragrep.pth10.ast.commands.transformstatement.timechart.span.SpanParameter; import com.teragrep.pth10.ast.commands.transformstatement.timechart.span.TimeRange; @@ -80,16 +79,17 @@ public SpanContextValue( public SpanContextValue( List ctxList, DPLParserCatalystContext catCtx, - TimeRange timeRange + TimeRange defaultTimeRange ) { this.ctxList = ctxList; this.catCtx = catCtx; - this.defaultTimeRange = timeRange; + this.defaultTimeRange = defaultTimeRange; } @Override public Column value() { - Span span = new DefaultSpan(defaultTimeRange); // uses default if no span parameter is found + // use default time range from config if span= parameter is not used + Span span = new SpanParameter(defaultTimeRange); TimeRange actualTimeRange = defaultTimeRange; boolean spanFound = false; diff --git a/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/DefaultSpan.java b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/DefaultSpan.java deleted file mode 100644 index 91725bbd4..000000000 --- a/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/DefaultSpan.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Teragrep Data Processing Language (DPL) translator for Apache Spark (pth_10) - * Copyright (C) 2019-2024 Suomen Kanuuna Oy - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - * - * - * Additional permission under GNU Affero General Public License version 3 - * section 7 - * - * If you modify this Program, or any covered work, by linking or combining it - * with other code, such other code is not for that reason alone subject to any - * of the requirements of the GNU Affero GPL version 3 as long as this Program - * is the same Program as licensed from Suomen Kanuuna Oy without any additional - * modifications. - * - * Supplemented terms under GNU Affero General Public License version 3 - * section 7 - * - * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified - * versions must be marked as "Modified version of" The Program. - * - * Names of the licensors and authors may not be used for publicity purposes. - * - * No rights are granted for use of trade names, trademarks, or service marks - * which are in The Program if any. - * - * Licensee must indemnify licensors and authors for any liability that these - * contractual assumptions impose on licensors and authors. - * - * To the extent this program is licensed as part of the Commercial versions of - * Teragrep, the applicable Commercial License may apply to this file if you as - * a licensee so wish it. - */ -package com.teragrep.pth10.ast.commands.transformstatement.timechart.span; - -import org.apache.spark.sql.Column; -import org.apache.spark.sql.functions; -import org.apache.spark.unsafe.types.CalendarInterval; - -/** - * Default 'span' for | timechart command - */ -public final class DefaultSpan implements Span { - - private final CalendarInterval ival; - private final Column column; - private final String duration; - - public DefaultSpan(TimeRange timeRange) { - this(timeRange.asSeconds(), timeRange.asString()); - } - - public DefaultSpan(long seconds, String duration) { - this(new CalendarInterval(0, 0, seconds * 1000 * 1000), new Column("_time"), duration); - } - - public DefaultSpan(CalendarInterval ival, Column column, String duration) { - this.ival = ival; - this.column = column; - this.duration = duration; - } - - @Override - public Column asColumn() { - return functions.window(column, String.valueOf(ival), duration, "0 minutes"); - } -} diff --git a/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/TimeRange.java b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/TimeRange.java index 2eafdb07e..8a20c6ff7 100644 --- a/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/TimeRange.java +++ b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/TimeRange.java @@ -64,10 +64,6 @@ public TimeRange(String duration) { this.duration = duration; } - public String asString() { - return duration; - } - public long asSeconds() { // incoming span-length consist of [] // default timescale is sec From 521461230f511cf7a73f8de2c5e3d33f1b476392 Mon Sep 17 00:00:00 2001 From: Rasmus Jokinen <146736881+51-code@users.noreply.github.com> Date: Tue, 25 Feb 2025 13:13:00 +0200 Subject: [PATCH 5/5] Unit tests for SpanParameter and TimeRange --- .../timechart/span/SpanParameter.java | 16 +++ .../timechart/span/TimeRange.java | 15 +++ .../timechart/span/SpanParameterTest.java | 79 ++++++++++++ .../timechart/span/TimeRangeTest.java | 113 ++++++++++++++++++ 4 files changed, 223 insertions(+) create mode 100644 src/test/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/SpanParameterTest.java create mode 100644 src/test/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/TimeRangeTest.java diff --git a/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/SpanParameter.java b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/SpanParameter.java index 4e3b43272..86709405a 100644 --- a/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/SpanParameter.java +++ b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/SpanParameter.java @@ -49,6 +49,8 @@ import org.apache.spark.sql.functions; import org.apache.spark.unsafe.types.CalendarInterval; +import java.util.Objects; + /** * | timechart commands 'span=' parameter */ @@ -74,4 +76,18 @@ public SpanParameter(Column column, CalendarInterval ival) { public Column asColumn() { return functions.window(column, String.valueOf(ival)); } + + @Override + public boolean equals(Object o) { + if (o == null || getClass() != o.getClass()) { + return false; + } + SpanParameter that = (SpanParameter) o; + return Objects.equals(column, that.column) && Objects.equals(ival, that.ival); + } + + @Override + public int hashCode() { + return Objects.hash(column, ival); + } } diff --git a/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/TimeRange.java b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/TimeRange.java index 8a20c6ff7..251d48877 100644 --- a/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/TimeRange.java +++ b/src/main/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/TimeRange.java @@ -48,6 +48,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.Objects; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -145,4 +146,18 @@ public long asSeconds() { return sec; } + + @Override + public boolean equals(Object o) { + if (o == null || getClass() != o.getClass()) { + return false; + } + TimeRange timeRange = (TimeRange) o; + return Objects.equals(duration, timeRange.duration); + } + + @Override + public int hashCode() { + return Objects.hashCode(duration); + } } diff --git a/src/test/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/SpanParameterTest.java b/src/test/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/SpanParameterTest.java new file mode 100644 index 000000000..781decc30 --- /dev/null +++ b/src/test/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/SpanParameterTest.java @@ -0,0 +1,79 @@ +/* + * Teragrep Data Processing Language (DPL) translator for Apache Spark (pth_10) + * Copyright (C) 2019-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.pth10.ast.commands.transformstatement.timechart.span; + +import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.spark.sql.Column; +import org.apache.spark.sql.functions; +import org.apache.spark.unsafe.types.CalendarInterval; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class SpanParameterTest { + + @Test + public void testEqualsVerifier() { + // equalsVerifier flags Spark's Column as a recursive data structure, have to use prefabs + EqualsVerifier + .forClass(SpanParameter.class) + .withNonnullFields("column", "ival") + .withPrefabValues(Column.class, new Column("_time"), functions.lit("")) + .verify(); + } + + @Test + public void testAsColumn() { + TimeRange timeRange = new TimeRange("5 minutes"); + SpanParameter spanParameter = new SpanParameter(timeRange); + + long microSeconds = timeRange.asSeconds() * 1000 * 1000; + Column expectedColumn = functions + .window(new Column("_time"), String.valueOf(new CalendarInterval(0, 0, microSeconds))); + + // Spark's Columns seem to be equal only with same reference. Using String representation instead. + Assertions.assertEquals(expectedColumn.toString(), spanParameter.asColumn().toString()); + } +} diff --git a/src/test/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/TimeRangeTest.java b/src/test/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/TimeRangeTest.java new file mode 100644 index 000000000..9df4c80c9 --- /dev/null +++ b/src/test/java/com/teragrep/pth10/ast/commands/transformstatement/timechart/span/TimeRangeTest.java @@ -0,0 +1,113 @@ +/* + * Teragrep Data Processing Language (DPL) translator for Apache Spark (pth_10) + * Copyright (C) 2019-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.pth10.ast.commands.transformstatement.timechart.span; + +import nl.jqno.equalsverifier.EqualsVerifier; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class TimeRangeTest { + + @Test + public void testEqualsVerifier() { + EqualsVerifier.forClass(TimeRange.class).withNonnullFields("duration").verify(); + } + + @Test + public void testSeconds() { + String duration = "5 seconds"; + TimeRange timeRange = new TimeRange(duration); + + long expectedSeconds = 5; + Assertions.assertEquals(expectedSeconds, timeRange.asSeconds()); + } + + @Test + public void testMinutes() { + String duration = "5 minutes"; + TimeRange timeRange = new TimeRange(duration); + + long expectedSeconds = 5 * 60; + Assertions.assertEquals(expectedSeconds, timeRange.asSeconds()); + } + + @Test + public void testHours() { + String duration = "5 hours"; + TimeRange timeRange = new TimeRange(duration); + + long expectedSeconds = 5 * 60 * 60; + Assertions.assertEquals(expectedSeconds, timeRange.asSeconds()); + } + + @Test + public void testDays() { + String duration = "5 days"; + TimeRange timeRange = new TimeRange(duration); + + long expectedSeconds = 5 * 60 * 60 * 24; + Assertions.assertEquals(expectedSeconds, timeRange.asSeconds()); + } + + @Test + public void testMonths() { + String duration = "5 months"; + TimeRange timeRange = new TimeRange(duration); + + long expectedSeconds = 5 * 60 * 60 * 24 * 30; + Assertions.assertEquals(expectedSeconds, timeRange.asSeconds()); + } + + @Test + public void testYears() { + String duration = "5 years"; + TimeRange timeRange = new TimeRange(duration); + + String expectedErrorMessage = "| timechart 'span' parameter only accepts seconds, minutes, hours, days, weeks and months. Got 'years' instead."; + RuntimeException e = Assertions.assertThrows(RuntimeException.class, timeRange::asSeconds); + Assertions.assertEquals(expectedErrorMessage, e.getMessage()); + } +}