Skip to content

Commit 9e192ef

Browse files
authored
Checks if the subject string contain null values (#628)
* checks that the subject string is not null, added tests for null subjects strings * changed to the right class name * changed to debug level print
1 parent e4242e4 commit 9e192ef

3 files changed

Lines changed: 92 additions & 3 deletions

File tree

src/main/java/com/teragrep/pth_10/ast/commands/evalstatement/UDFs/RegexMatch.java

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@
4949
import com.teragrep.pth_10.ast.TextString;
5050
import com.teragrep.pth_10.ast.UnquotedText;
5151
import org.apache.spark.sql.api.java.UDF2;
52+
import org.slf4j.Logger;
53+
import org.slf4j.LoggerFactory;
5254
import scala.collection.Iterator;
5355
import scala.collection.mutable.WrappedArray;
5456

@@ -70,6 +72,7 @@ public class RegexMatch implements UDF2<Object, String, Object>, Serializable {
7072
private static final long serialVersionUID = 1L;
7173
private final boolean isMultiValue;
7274
private final NullValue nullValue;
75+
private static final Logger LOGGER = LoggerFactory.getLogger(RegexMatch.class);
7376

7477
public RegexMatch(NullValue nullValue) {
7578
super();
@@ -86,7 +89,7 @@ public RegexMatch(boolean isMultiValue, NullValue nullValue) {
8689
@Override
8790
public Object call(Object subject, String regexString) throws Exception {
8891

89-
String subjectStr = null;
92+
String subjectStr = this.nullValue.toString();
9093

9194
if (subject instanceof Long) {
9295
subjectStr = ((Long) subject).toString();
@@ -98,7 +101,7 @@ else if (subject instanceof Double) {
98101
subjectStr = ((Double) subject).toString();
99102
}
100103
else if (subject instanceof Float) {
101-
subjectStr = ((Double) subject).toString();
104+
subjectStr = ((Float) subject).toString();
102105
}
103106
else if (subject instanceof String) {
104107
subjectStr = ((String) subject);
@@ -124,6 +127,10 @@ else if (subject instanceof java.sql.Timestamp) {
124127
private Boolean performForNormalField(String subjectStr, String regexString) {
125128
regexString = new UnquotedText(new TextString(regexString)).read();
126129
boolean isMatch = false;
130+
if (subjectStr == null) {
131+
LOGGER.debug("Subject string contains null values");
132+
return isMatch;
133+
}
127134

128135
try {
129136
Pattern p = Pattern.compile(regexString);

src/test/java/com/teragrep/pth_10/evalTest.java

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4696,4 +4696,86 @@ public void evalFieldWithSingleQuotes() {
46964696
Assertions.assertEquals("string", r.getAs(0));
46974697
});
46984698
}
4699+
4700+
@Test
4701+
@DisabledIfSystemProperty(
4702+
named = "skipSparkTest",
4703+
matches = "true"
4704+
)
4705+
public void testEvalMatchNullsOnlySubject() {
4706+
String q = "index=index_A | eval a=null() | eval b=if(match(a,\"3\"),1,0)";
4707+
String testFile = "src/test/resources/eval_test_ips*.jsonl"; // * to make the path into a directory path
4708+
4709+
streamingTestUtil.performDPLTest(q, testFile, res -> {
4710+
final StructType expectedSchema = new StructType(new StructField[] {
4711+
new StructField("_raw", DataTypes.StringType, true, new MetadataBuilder().build()),
4712+
new StructField("_time", DataTypes.TimestampType, true, new MetadataBuilder().build()),
4713+
new StructField("host", DataTypes.StringType, true, new MetadataBuilder().build()),
4714+
new StructField("index", DataTypes.StringType, true, new MetadataBuilder().build()),
4715+
new StructField("ip", DataTypes.StringType, true, new MetadataBuilder().build()),
4716+
new StructField("offset", DataTypes.LongType, true, new MetadataBuilder().build()),
4717+
new StructField("partition", DataTypes.StringType, true, new MetadataBuilder().build()),
4718+
new StructField("source", DataTypes.StringType, true, new MetadataBuilder().build()),
4719+
new StructField("sourcetype", DataTypes.StringType, true, new MetadataBuilder().build()),
4720+
new StructField("a", DataTypes.StringType, true, new MetadataBuilder().build()),
4721+
new StructField("b", DataTypes.createArrayType(DataTypes.StringType, true), true, new MetadataBuilder().build())
4722+
});
4723+
Assertions.assertEquals(expectedSchema, res.schema()); //check schema
4724+
// Get column 'a'
4725+
Dataset<Row> resA = res.select("b").orderBy("offset");
4726+
List<String> lst = resA
4727+
.collectAsList()
4728+
.stream()
4729+
.map(r -> r.getList(0).get(0).toString())
4730+
.collect(Collectors.toList());
4731+
4732+
// we should get the same amount of values back as we put in
4733+
Assertions.assertEquals(3, lst.size());
4734+
// Compare values to expected
4735+
List<String> expectedLst = Arrays.asList("0", "0", "0");
4736+
4737+
Assertions.assertEquals(expectedLst, lst);
4738+
});
4739+
}
4740+
4741+
@Test
4742+
@DisabledIfSystemProperty(
4743+
named = "skipSparkTest",
4744+
matches = "true"
4745+
)
4746+
public void testEvalMatchSubjectWithSomeNUlls() {
4747+
String q = "index=index_A | eval a=if(match(sourcetype,\"X\"),1,0)";
4748+
String testFile = "src/test/resources/eval_test_ips*.jsonl"; // * to make the path into a directory path
4749+
4750+
streamingTestUtil.performDPLTest(q, testFile, res -> {
4751+
final StructType expectedSchema = new StructType(new StructField[] {
4752+
new StructField("_raw", DataTypes.StringType, true, new MetadataBuilder().build()),
4753+
new StructField("_time", DataTypes.TimestampType, true, new MetadataBuilder().build()),
4754+
new StructField("host", DataTypes.StringType, true, new MetadataBuilder().build()),
4755+
new StructField("index", DataTypes.StringType, true, new MetadataBuilder().build()),
4756+
new StructField("ip", DataTypes.StringType, true, new MetadataBuilder().build()),
4757+
new StructField("offset", DataTypes.LongType, true, new MetadataBuilder().build()),
4758+
new StructField("partition", DataTypes.StringType, true, new MetadataBuilder().build()),
4759+
new StructField("source", DataTypes.StringType, true, new MetadataBuilder().build()),
4760+
new StructField("sourcetype", DataTypes.StringType, true, new MetadataBuilder().build()),
4761+
new StructField("a", DataTypes.createArrayType(DataTypes.StringType, true), true, new MetadataBuilder().build())
4762+
});
4763+
Assertions.assertEquals(expectedSchema, res.schema()); //check schema
4764+
// Get column 'a'
4765+
Dataset<Row> resA = res.select("a").orderBy("offset");
4766+
List<String> lst = resA
4767+
.collectAsList()
4768+
.stream()
4769+
.map(r -> r.getList(0).get(0).toString())
4770+
.collect(Collectors.toList());
4771+
4772+
// we should get the same amount of values back as we put in
4773+
Assertions.assertEquals(3, lst.size());
4774+
// Compare values to expected
4775+
List<String> expectedLst = Arrays.asList("1", "1", "0");
4776+
4777+
Assertions.assertEquals(expectedLst, lst);
4778+
});
4779+
}
4780+
46994781
}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
{"_time": "2001-01-01T01:01:01.010+03:00", "ip":"192.168.2.1" ,"_raw": "127.0.0.123:4567 [26/Nov/2021:07:02:44.809] https-in~ https-in/<NOSRV> 0/-1/-1/-1/0 302 104 - - LR-- 1/1/0/0/0 0/0 \"GET /Monster_boy_normal_(entity) HTTP/1.1\" A:X:0 computer01.example.com", "index": "index_A", "sourcetype": "A:X:0", "source": "imfile:computer01.example.com:01.log", "host": "computer01.example.com", "partition": "hundred-year/2001/01-01/computer01.example.com/01/01.logGLOB-2001010101.log.gz", "offset": 1}
22
{"_time": "2001-01-01T01:01:01.011+03:00", "ip":"192.168.3.5", "_raw": "127.0.0.123:4567 [26/Nov/2021:07:02:44.809] https-in~ https-in/<NOSRV> 0/-1/-1/-1/0 302 104 - - LR-- 1/1/0/0/0 0/0 \"GET /Monster_boy_normal_(entity) HTTP/1.1\" A:X:0 computer01.example.com cOmPuter02.example.com", "index": "index_A", "sourcetype": "A:X:0", "source": "imfile:computer01.example.com:01.log", "host": "computer02.example.com", "partition": "hundred-year/2001/01-01/computer01.example.com/01/01.logGLOB-2001010101.log.gz", "offset": 2}
3-
{"_time": "2001-01-01T01:01:01.012+03:00", "ip":"192.168.2.1", "_raw": "127.0.0.123:4567 [26/Nov/2021:07:02:44.809] https-in~ https-in/<NOSRV> 0/-1/-1/-1/0 302 104 - - LR-- 1/1/0/0/0 0/0 \"GET /Monster_boy_normal_(entity) HTTP/1.1\" A:X:0 computer01.example.com", "index": "index_A", "sourcetype": "A:X:0", "source": "imfile:computer01.example.com:01.log", "host": "computer01.example.com", "partition": "hundred-year/2001/01-01/computer01.example.com/01/01.logGLOB-2001010101.log.gz", "offset": 3}
3+
{"_time": "2001-01-01T01:01:01.012+03:00", "ip":"192.168.2.1", "_raw": "127.0.0.123:4567 [26/Nov/2021:07:02:44.809] https-in~ https-in/<NOSRV> 0/-1/-1/-1/0 302 104 - - LR-- 1/1/0/0/0 0/0 \"GET /Monster_boy_normal_(entity) HTTP/1.1\" A:X:0 computer01.example.com", "index": "index_A", "sourcetype": null, "source": "imfile:computer01.example.com:01.log", "host": "computer01.example.com", "partition": "hundred-year/2001/01-01/computer01.example.com/01/01.logGLOB-2001010101.log.gz", "offset": 3}

0 commit comments

Comments
 (0)