diff --git a/Cargo.lock b/Cargo.lock index 2f188ef34..42d8c6cfd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2770,7 +2770,7 @@ checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" [[package]] name = "orc-rust" version = "0.7.0" -source = "git+https://github.com/auron-project/datafusion-orc.git?rev=9beb12c#9beb12cbff3de5a6b8b4f6e4f4b602f9a762c9c7" +source = "git+https://github.com/auron-project/datafusion-orc.git?rev=59bcd29#59bcd29a4c756ba91e47b11aab1b9312130580eb" dependencies = [ "arrow", "async-trait", diff --git a/Cargo.toml b/Cargo.toml index dd52ef4bb..fa08d38af 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -206,7 +206,7 @@ datafusion-execution = { git = "https://github.com/auron-project/datafusion.git" datafusion-optimizer = { git = "https://github.com/auron-project/datafusion.git", rev = "9034aeffb"} datafusion-physical-expr = { git = "https://github.com/auron-project/datafusion.git", rev = "9034aeffb"} datafusion-spark = { git = "https://github.com/auron-project/datafusion.git", rev = "9034aeffb"} -orc-rust = { git = "https://github.com/auron-project/datafusion-orc.git", rev = "9beb12c"} +orc-rust = { git = "https://github.com/auron-project/datafusion-orc.git", rev = "59bcd29"} # arrow: branch=v55.2.0-blaze arrow = { git = "https://github.com/auron-project/arrow-rs.git", rev = "5de02520c"} diff --git a/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronQuerySuite.scala b/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronQuerySuite.scala index 56119a7cb..cf2943061 100644 --- a/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronQuerySuite.scala +++ b/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronQuerySuite.scala @@ -1014,4 +1014,23 @@ class AuronQuerySuite extends AuronQueryTest with BaseAuronSQLSuite with AuronSQ |FROM t_filter_agg_2289""".stripMargin) } } + + test("test not null filter for orc table") { + withTable("orc_string_filter") { + sql("create table orc_string_filter(id int, b string) using orc") + sql("insert into orc_string_filter values (1, 'abc'), (2, null), (3, 'def')") + checkSparkAnswerAndOperator("select * from orc_string_filter where b is not null") + } + } + + test("test string filter for orc table with truncated stats") { + withTable("orc_string_trunc") { + sql("create table orc_string_trunc(id int, b string) using orc") + val longA = "a" * 2000 // > 1024 bytes -> min/max truncated (lower_bound set) + val longB = "b" * 2000 // > 1024 bytes -> upper_bound set + sql(s"insert into orc_string_trunc values (1, '$longA'), (2, '$longB'), (3, 'mid')") + checkSparkAnswerAndOperator(s"select * from orc_string_trunc where b = '$longA'") + checkSparkAnswerAndOperator("select * from orc_string_trunc where b > 'a'") + } + } }