Skip to content

Commit 840f155

Browse files
authored
Add teragrep regexextract command (#82)
1 parent 1269004 commit 840f155

5 files changed

Lines changed: 142 additions & 4 deletions

File tree

src/main/antlr4/imports/COMMAND_TERAGREP_MODE.g4

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ COMMAND_TERAGREP_MODE_HOST: 'host' -> pushMode(COMMAND_TERAGREP_IP_MODE);
7979
COMMAND_TERAGREP_MODE_PORT: 'port' -> pushMode(COMMAND_TERAGREP_IP_MODE);
8080
COMMAND_TERAGREP_MODE_DOT: '.';
8181
COMMAND_TERAGREP_MODE_TOKENIZER: 'tokenizer';
82+
COMMAND_TERAGREP_MODE_REGEXEXTRACT: 'regexextract';
8283
COMMAND_TERAGREP_MODE_SYSLOG: 'syslog';
8384
COMMAND_TERAGREP_MODE_STREAM: 'stream';
8485
COMMAND_TERAGREP_MODE_LOAD: 'load';
@@ -96,6 +97,7 @@ COMMAND_TERAGREP_MODE_DEFAULT_FORMAT: ('default'|'DEFAULT'|'avro'|'AVRO');
9697
COMMAND_TERAGREP_MODE_FORMAT: 'format' -> pushMode(GET_STRING);
9798
COMMAND_TERAGREP_MODE_INPUT: 'input' -> pushMode(GET_FIELD);
9899
COMMAND_TERAGREP_MODE_OUTPUT: 'output' -> pushMode(GET_FIELD);
100+
COMMAND_TERAGREP_MODE_REGEX: 'regex' -> pushMode(GET_STRING);
99101
COMMAND_TERAGREP_MODE_ESTIMATES: 'estimates' -> pushMode(GET_FIELD);
100102
COMMAND_TERAGREP_MODE_HEADER: 'header=' -> pushMode(GET_BOOLEAN);
101103
COMMAND_TERAGREP_MODE_SCHEMA: 'schema=' -> pushMode(GET_STRING);

src/main/antlr4/imports/DPLParserTransform_teragrep.g4

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ t_execParameter
5959
| t_kafkaSaveModeParameter
6060
| t_bloomModeParameter
6161
| t_tokenizerParameter
62+
| t_regexextractParameter
6263
| t_dynatraceParameter)
6364
;
6465

@@ -67,14 +68,22 @@ t_dynatraceParameter
6768
;
6869

6970
t_tokenizerParameter
70-
: COMMAND_TERAGREP_MODE_TOKENIZER t_formatParameter? t_inputParamater? t_outputParameter?
71+
: COMMAND_TERAGREP_MODE_TOKENIZER t_formatParameter? t_inputParameter? t_outputParameter?
72+
;
73+
74+
t_regexextractParameter
75+
: COMMAND_TERAGREP_MODE_REGEXEXTRACT t_regexParameter? t_inputParameter? t_outputParameter?
7176
;
7277

7378
t_formatParameter
7479
: COMMAND_TERAGREP_MODE_FORMAT stringType
7580
;
7681

77-
t_inputParamater
82+
t_regexParameter
83+
: COMMAND_TERAGREP_MODE_REGEX stringType
84+
;
85+
86+
t_inputParameter
7887
: COMMAND_TERAGREP_MODE_INPUT fieldType
7988
;
8089

@@ -131,7 +140,7 @@ t_getArchiveSummaryParameter
131140
;
132141

133142
t_bloomOptionParameter
134-
: COMMAND_TERAGREP_MODE_UPDATE t_estimatesParameter? t_inputParamater? | COMMAND_TERAGREP_MODE_CREATE t_estimatesParameter? t_inputParamater? | COMMAND_TERAGREP_MODE_ESTIMATE t_inputParamater? t_outputParameter?
143+
: COMMAND_TERAGREP_MODE_UPDATE t_estimatesParameter? t_inputParameter? | COMMAND_TERAGREP_MODE_CREATE t_estimatesParameter? t_inputParameter? | COMMAND_TERAGREP_MODE_ESTIMATE t_inputParameter? t_outputParameter?
135144
;
136145

137146
t_hostParameter

src/test/java/com/teragrep/pth_03/tests/TeragrepSyntaxTests.java

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@
4848
import com.teragrep.pth_03.ParserStructureTestingUtility;
4949
import com.teragrep.pth_03.ParserSyntaxTestingUtility;
5050
import org.junit.jupiter.api.Assertions;
51-
import org.junit.jupiter.api.Test;
5251
import org.junit.jupiter.params.ParameterizedTest;
5352
import org.junit.jupiter.params.provider.ValueSource;
5453
import org.w3c.dom.NodeList;
@@ -358,4 +357,40 @@ void testHdfsSaveAllParameters(String arg) {
358357
assertEquals(1, headerNodes.getLength());
359358
assertEquals(1, pathNodes.getLength());
360359
}
360+
361+
@ParameterizedTest
362+
@ValueSource(strings = {
363+
"teragrep_regexextract",
364+
})
365+
void testRegexExtract(String arg) {
366+
ParserStructureTestingUtility pstu = new ParserStructureTestingUtility();
367+
String fileName = "src/test/resources/antlr4/commands/teragrep/" + arg + ".txt";
368+
String regexextractPath = "/root/transformStatement/teragrepTransformation/t_execParameter/t_regexextractParameter";
369+
NodeList regexextractNodes = Assertions.assertDoesNotThrow(() -> (NodeList) pstu.xpathQueryFile(fileName, regexextractPath, false));
370+
// Check that 1 found
371+
assertEquals(1, regexextractNodes.getLength());
372+
}
373+
374+
@ParameterizedTest
375+
@ValueSource(strings = {
376+
"teragrep_regexextract_params",
377+
})
378+
void testRegexExtractWithParams(String arg) {
379+
ParserStructureTestingUtility pstu = new ParserStructureTestingUtility();
380+
String fileName = "src/test/resources/antlr4/commands/teragrep/" + arg + ".txt";
381+
String regexextractPath = "/root/transformStatement/teragrepTransformation/t_execParameter/t_regexextractParameter";
382+
String regexPath = "/root/transformStatement/teragrepTransformation/t_execParameter/t_regexextractParameter/t_regexParameter";
383+
String inputPath = "/root/transformStatement/teragrepTransformation/t_execParameter/t_regexextractParameter/t_inputParameter";
384+
String outputPath = "/root/transformStatement/teragrepTransformation/t_execParameter/t_regexextractParameter/t_outputParameter";
385+
NodeList regexextractNodes = Assertions.assertDoesNotThrow(() -> (NodeList) pstu.xpathQueryFile(fileName, regexextractPath, true));
386+
NodeList regexNodes = Assertions.assertDoesNotThrow(() -> (NodeList) pstu.xpathQueryFile(fileName, regexPath, false));
387+
NodeList inputNodes = Assertions.assertDoesNotThrow(() -> (NodeList) pstu.xpathQueryFile(fileName, inputPath, false));
388+
NodeList outputNodes = Assertions.assertDoesNotThrow(() -> (NodeList) pstu.xpathQueryFile(fileName, outputPath, false));
389+
390+
// Check that 1 found for each path
391+
assertEquals(1, regexextractNodes.getLength());
392+
assertEquals(1, regexNodes.getLength());
393+
assertEquals(1, inputNodes.getLength());
394+
assertEquals(1, outputNodes.getLength());
395+
}
361396
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
<!-- /*
2+
* Teragrep Data Processing Language Parser Library PTH-03
3+
* Copyright (C) 2019, 2020, 2021, 2022, 2023 Suomen Kanuuna Oy
4+
*
5+
* This program is free software: you can redistribute it and/or modify
6+
* it under the terms of the GNU Affero General Public License as published by
7+
* the Free Software Foundation, either version 3 of the License, or
8+
* (at your option) any later version.
9+
*
10+
* This program is distributed in the hope that it will be useful,
11+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
* GNU Affero General Public License for more details.
14+
*
15+
* You should have received a copy of the GNU Affero General Public License
16+
* along with this program. If not, see <https://github.com/teragrep/teragrep/blob/main/LICENSE>.
17+
*
18+
*
19+
* Additional permission under GNU Affero General Public License version 3
20+
* section 7
21+
*
22+
* If you modify this Program, or any covered work, by linking or combining it
23+
* with other code, such other code is not for that reason alone subject to any
24+
* of the requirements of the GNU Affero GPL version 3 as long as this Program
25+
* is the same Program as licensed from Suomen Kanuuna Oy without any additional
26+
* modifications.
27+
*
28+
* Supplemented terms under GNU Affero General Public License version 3
29+
* section 7
30+
*
31+
* Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
32+
* versions must be marked as "Modified version of" The Program.
33+
*
34+
* Names of the licensors and authors may not be used for publicity purposes.
35+
*
36+
* No rights are granted for use of trade names, trademarks, or service marks
37+
* which are in The Program if any.
38+
*
39+
* Licensee must indemnify licensors and authors for any liability that these
40+
* contractual assumptions impose on licensors and authors.
41+
*
42+
* To the extent this program is licensed as part of the Commercial versions of
43+
* Teragrep, the applicable Commercial License may apply to this file if you as
44+
* a licensee so wish it.
45+
*/ -->
46+
| teragrep exec regexextract
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
<!-- /*
2+
* Teragrep Data Processing Language Parser Library PTH-03
3+
* Copyright (C) 2019, 2020, 2021, 2022, 2023 Suomen Kanuuna Oy
4+
*
5+
* This program is free software: you can redistribute it and/or modify
6+
* it under the terms of the GNU Affero General Public License as published by
7+
* the Free Software Foundation, either version 3 of the License, or
8+
* (at your option) any later version.
9+
*
10+
* This program is distributed in the hope that it will be useful,
11+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
* GNU Affero General Public License for more details.
14+
*
15+
* You should have received a copy of the GNU Affero General Public License
16+
* along with this program. If not, see <https://github.com/teragrep/teragrep/blob/main/LICENSE>.
17+
*
18+
*
19+
* Additional permission under GNU Affero General Public License version 3
20+
* section 7
21+
*
22+
* If you modify this Program, or any covered work, by linking or combining it
23+
* with other code, such other code is not for that reason alone subject to any
24+
* of the requirements of the GNU Affero GPL version 3 as long as this Program
25+
* is the same Program as licensed from Suomen Kanuuna Oy without any additional
26+
* modifications.
27+
*
28+
* Supplemented terms under GNU Affero General Public License version 3
29+
* section 7
30+
*
31+
* Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
32+
* versions must be marked as "Modified version of" The Program.
33+
*
34+
* Names of the licensors and authors may not be used for publicity purposes.
35+
*
36+
* No rights are granted for use of trade names, trademarks, or service marks
37+
* which are in The Program if any.
38+
*
39+
* Licensee must indemnify licensors and authors for any liability that these
40+
* contractual assumptions impose on licensors and authors.
41+
*
42+
* To the extent this program is licensed as part of the Commercial versions of
43+
* Teragrep, the applicable Commercial License may apply to this file if you as
44+
* a licensee so wish it.
45+
*/ -->
46+
| teragrep exec regexextract regex=regex input=input output=output

0 commit comments

Comments
 (0)