From cc42fc1a5ef93b3008f4acf61c00406c7f37a2ff Mon Sep 17 00:00:00 2001 From: geyanggang Date: Wed, 15 Apr 2026 19:22:09 +0800 Subject: [PATCH 1/3] improvement(deps): upgrade Hadoop from 2.10.2 to 3.3.6 and remove hadoop2 dependency line --- catalogs/catalog-hive/build.gradle.kts | 30 ++++++++++--------- .../catalog-lakehouse-hudi/build.gradle.kts | 20 +++++++++---- .../catalog-lakehouse-paimon/build.gradle.kts | 8 ++--- .../hive-metastore-common/build.gradle.kts | 11 +++++-- .../hive-metastore2-libs/build.gradle.kts | 4 +-- .../hive-metastore3-libs/build.gradle.kts | 2 +- flink-connector/flink/build.gradle.kts | 11 +++++-- gradle/libs.versions.toml | 21 +++++++------ .../integration-test/build.gradle.kts | 11 +++++-- 9 files changed, 72 insertions(+), 46 deletions(-) diff --git a/catalogs/catalog-hive/build.gradle.kts b/catalogs/catalog-hive/build.gradle.kts index dba710c5876..7c880de803e 100644 --- a/catalogs/catalog-hive/build.gradle.kts +++ b/catalogs/catalog-hive/build.gradle.kts @@ -49,12 +49,17 @@ dependencies { implementation(libs.commons.io) implementation(libs.commons.lang3) implementation(libs.guava) - implementation(libs.hadoop2.auth) { + implementation(libs.hadoop3.auth) { exclude("*") } - implementation(libs.hadoop2.common) { + implementation(libs.hadoop3.common) { exclude("*") } + // Hadoop 3.x runtime requirements (stripped by exclude("*") above) + implementation(libs.hadoop3.shaded.guava) + implementation(libs.hadoop3.shaded.protobuf) + implementation("org.apache.commons:commons-configuration2:2.8.0") + implementation(libs.re2j) implementation(libs.htrace.core4) implementation(libs.slf4j.api) implementation(libs.woodstox.core) @@ -89,14 +94,19 @@ dependencies { testImplementation(libs.bundles.jersey) testImplementation(libs.bundles.jetty) testImplementation(libs.bundles.log4j) - testImplementation(libs.hadoop2.aws) - testImplementation(libs.hadoop2.common) { + testImplementation(libs.hadoop3.aws) + testImplementation(libs.hadoop3.common) { exclude("*") } - testImplementation(libs.hadoop2.hdfs) - testImplementation(libs.hadoop2.mapreduce.client.core) { + testImplementation(libs.hadoop3.hdfs) + testImplementation(libs.hadoop3.mapreduce.client.core) { exclude("*") } + // Hadoop 3.x runtime requirements (stripped by exclude("*") above) + testImplementation(libs.hadoop3.shaded.guava) + testImplementation(libs.hadoop3.shaded.protobuf) + testImplementation("org.apache.commons:commons-configuration2:2.8.0") + testImplementation(libs.re2j) testImplementation(libs.hadoop3.abs) testImplementation(libs.hadoop3.gcs) testImplementation(libs.hive2.common) { @@ -112,14 +122,6 @@ dependencies { testImplementation(libs.testcontainers.localstack) testImplementation(libs.testcontainers.mysql) - // You need this to run test CatalogHiveABSIT as it required hadoop3 environment introduced by hadoop3.abs - // (The protocol `abfss` was first introduced in Hadoop 3.2.0), However, as the there already exists - // hadoop2.common in the test classpath, If we added the following dependencies directly, it will - // cause the conflict between hadoop2 and hadoop3, resulting test failures, so we comment the - // following line temporarily, if you want to run the test, please uncomment it. - // In the future, we may need to refactor the test to avoid the conflict. - // testImplementation(libs.hadoop3.common) - testRuntimeOnly(libs.junit.jupiter.engine) } diff --git a/catalogs/catalog-lakehouse-hudi/build.gradle.kts b/catalogs/catalog-lakehouse-hudi/build.gradle.kts index e50fed07784..be7b1026a5f 100644 --- a/catalogs/catalog-lakehouse-hudi/build.gradle.kts +++ b/catalogs/catalog-lakehouse-hudi/build.gradle.kts @@ -48,12 +48,17 @@ dependencies { implementation(libs.commons.io) implementation(libs.commons.lang3) implementation(libs.guava) - implementation(libs.hadoop2.auth) { + implementation(libs.hadoop3.auth) { exclude("*") } - implementation(libs.hadoop2.common) { + implementation(libs.hadoop3.common) { exclude("*") } + // Hadoop 3.x runtime requirements (stripped by exclude("*") above) + implementation(libs.hadoop3.shaded.guava) + implementation(libs.hadoop3.shaded.protobuf) + implementation("org.apache.commons:commons-configuration2:2.8.0") + implementation(libs.re2j) implementation(libs.htrace.core4) implementation(libs.slf4j.api) implementation(libs.woodstox.core) @@ -113,13 +118,18 @@ dependencies { testImplementation(libs.datanucleus.jdo) testImplementation(libs.datanucleus.rdbms) testImplementation(libs.derby) - testImplementation(libs.hadoop2.auth) { + testImplementation(libs.hadoop3.auth) { exclude("*") } - testImplementation(libs.hadoop2.hdfs) - testImplementation(libs.hadoop2.mapreduce.client.core) { + testImplementation(libs.hadoop3.hdfs) + testImplementation(libs.hadoop3.mapreduce.client.core) { exclude("*") } + // Hadoop 3.x runtime requirements (stripped by exclude("*") above) + testImplementation(libs.hadoop3.shaded.guava) + testImplementation(libs.hadoop3.shaded.protobuf) + testImplementation("org.apache.commons:commons-configuration2:2.8.0") + testImplementation(libs.re2j) testImplementation(libs.htrace.core4) testImplementation(libs.junit.jupiter.api) testImplementation(libs.mysql.driver) diff --git a/catalogs/catalog-lakehouse-paimon/build.gradle.kts b/catalogs/catalog-lakehouse-paimon/build.gradle.kts index 903814bd1c4..183b6e3379c 100644 --- a/catalogs/catalog-lakehouse-paimon/build.gradle.kts +++ b/catalogs/catalog-lakehouse-paimon/build.gradle.kts @@ -103,7 +103,7 @@ dependencies { } implementation(libs.commons.lang3) implementation(libs.guava) - implementation(libs.hadoop2.common) { + implementation(libs.hadoop3.common) { exclude("com.github.spotbugs") exclude("com.sun.jersey") exclude("javax.servlet") @@ -113,16 +113,16 @@ dependencies { exclude("org.apache.zookeeper") exclude("org.mortbay.jetty") } - implementation(libs.hadoop2.hdfs) { + implementation(libs.hadoop3.hdfs) { exclude("*") } - implementation(libs.hadoop2.hdfs.client) { + implementation(libs.hadoop3.hdfs.client) { exclude("com.sun.jersey") exclude("javax.servlet") exclude("org.fusesource.leveldbjni") exclude("org.mortbay.jetty") } - implementation(libs.hadoop2.mapreduce.client.core) { + implementation(libs.hadoop3.mapreduce.client.core) { exclude("*") } diff --git a/catalogs/hive-metastore-common/build.gradle.kts b/catalogs/hive-metastore-common/build.gradle.kts index aee7e7b8128..a5ec2db0163 100644 --- a/catalogs/hive-metastore-common/build.gradle.kts +++ b/catalogs/hive-metastore-common/build.gradle.kts @@ -55,15 +55,20 @@ dependencies { testImplementation(libs.datanucleus.jdo) testImplementation(libs.datanucleus.rdbms) testImplementation(libs.derby) - testImplementation(libs.hadoop2.auth) { + testImplementation(libs.hadoop3.auth) { exclude("*") } - testImplementation(libs.hadoop2.common) { + testImplementation(libs.hadoop3.common) { exclude("*") } - testImplementation(libs.hadoop2.mapreduce.client.core) { + testImplementation(libs.hadoop3.mapreduce.client.core) { exclude("*") } + // Hadoop 3.x runtime requirements (stripped by exclude("*") above) + testImplementation(libs.hadoop3.shaded.guava) + testImplementation(libs.hadoop3.shaded.protobuf) + testImplementation("org.apache.commons:commons-configuration2:2.8.0") + testImplementation(libs.re2j) testImplementation(libs.hive2.exec) { artifact { classifier = "core" diff --git a/catalogs/hive-metastore2-libs/build.gradle.kts b/catalogs/hive-metastore2-libs/build.gradle.kts index c16395e77d6..4f3092aa245 100644 --- a/catalogs/hive-metastore2-libs/build.gradle.kts +++ b/catalogs/hive-metastore2-libs/build.gradle.kts @@ -29,7 +29,7 @@ plugins { // Guava and Logback are excluded because they are provided by the Gravitino runtime classpath. dependencies { - implementation(libs.hadoop2.common) { + implementation(libs.hadoop3.common) { exclude(group = "ch.qos.logback") exclude(group = "com.fasterxml.jackson.core") exclude(group = "com.github.spotbugs") @@ -43,7 +43,7 @@ dependencies { exclude(group = "org.eclipse.jetty.orbit", module = "javax.servlet") exclude(group = "org.slf4j") } - implementation(libs.hadoop2.mapreduce.client.core) { + implementation(libs.hadoop3.mapreduce.client.core) { exclude(group = "com.github.spotbugs") exclude(group = "com.google.code.findbugs") exclude(group = "com.google.guava") diff --git a/catalogs/hive-metastore3-libs/build.gradle.kts b/catalogs/hive-metastore3-libs/build.gradle.kts index bfb06c7d8d1..6ed634344ae 100644 --- a/catalogs/hive-metastore3-libs/build.gradle.kts +++ b/catalogs/hive-metastore3-libs/build.gradle.kts @@ -29,7 +29,7 @@ plugins { // Guava and Logback are excluded because they are provided by the Gravitino runtime classpath. dependencies { - implementation(libs.hadoop2.common) { + implementation(libs.hadoop3.common) { exclude(group = "ch.qos.logback") exclude(group = "com.fasterxml.jackson.core") exclude(group = "com.github.spotbugs") diff --git a/flink-connector/flink/build.gradle.kts b/flink-connector/flink/build.gradle.kts index da01614f51b..6288772d70e 100644 --- a/flink-connector/flink/build.gradle.kts +++ b/flink-connector/flink/build.gradle.kts @@ -129,10 +129,10 @@ dependencies { exclude("org.slf4j") } - testImplementation(libs.hadoop2.common) { + testImplementation(libs.hadoop3.common) { exclude("*") } - testImplementation(libs.hadoop2.hdfs) { + testImplementation(libs.hadoop3.hdfs) { exclude("com.sun.jersey") exclude("commons-cli", "commons-cli") exclude("commons-io", "commons-io") @@ -141,9 +141,14 @@ dependencies { exclude("javax.servlet", "servlet-api") exclude("org.mortbay.jetty") } - testImplementation(libs.hadoop2.mapreduce.client.core) { + testImplementation(libs.hadoop3.mapreduce.client.core) { exclude("*") } + // Hadoop 3.x runtime requirements (stripped by exclude("*") above) + testImplementation(libs.hadoop3.shaded.guava) + testImplementation(libs.hadoop3.shaded.protobuf) + testImplementation("org.apache.commons:commons-configuration2:2.8.0") + testImplementation(libs.re2j) testImplementation(libs.hive2.common) { exclude("org.eclipse.jetty.aggregate", "jetty-all") exclude("org.eclipse.jetty.orbit", "javax.servlet") diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 5fa2a69779d..361bb785c64 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -38,12 +38,11 @@ airlift-json = "237" airlift-resolver = "1.6" hive2 = "2.3.9" hive3 = "3.1.3" -hadoop2 = "2.10.2" -hadoop3 = "3.3.1" +hadoop3 = "3.3.6" hadoop3-gcs = "1.9.4-hadoop3" -hadoop3-abs = "3.3.1" -hadoop3-aliyun = "3.3.1" -hadoop-minikdc = "3.3.1" +hadoop3-abs = "3.3.6" +hadoop3-aliyun = "3.3.6" +hadoop-minikdc = "3.3.6" htrace-core4 = "4.1.0-incubating" httpclient = "4.4.1" httpclient5 = "5.4.4" @@ -197,22 +196,22 @@ hive2-common = { group = "org.apache.hive", name = "hive-common", version.ref = hive2-jdbc = { group = "org.apache.hive", name = "hive-jdbc", version.ref = "hive2"} hive3-metastore = { group = "org.apache.hive", name = "hive-metastore", version.ref = "hive3"} hive3-common = { group = "org.apache.hive", name = "hive-common", version.ref = "hive3"} -hadoop2-auth = { group = "org.apache.hadoop", name = "hadoop-auth", version.ref = "hadoop2" } -hadoop2-hdfs = { group = "org.apache.hadoop", name = "hadoop-hdfs", version.ref = "hadoop2" } -hadoop2-hdfs-client = { group = "org.apache.hadoop", name = "hadoop-hdfs-client", version.ref = "hadoop2" } -hadoop2-common = { group = "org.apache.hadoop", name = "hadoop-common", version.ref = "hadoop2"} -hadoop2-mapreduce-client-core = { group = "org.apache.hadoop", name = "hadoop-mapreduce-client-core", version.ref = "hadoop2"} -hadoop2-aws = { group = "org.apache.hadoop", name = "hadoop-aws", version.ref = "hadoop2"} hadoop3-aws = { group = "org.apache.hadoop", name = "hadoop-aws", version.ref = "hadoop3"} +hadoop3-auth = { group = "org.apache.hadoop", name = "hadoop-auth", version.ref = "hadoop3" } hadoop3-hdfs = { group = "org.apache.hadoop", name = "hadoop-hdfs", version.ref = "hadoop3" } +hadoop3-hdfs-client = { group = "org.apache.hadoop", name = "hadoop-hdfs-client", version.ref = "hadoop3" } hadoop3-common = { group = "org.apache.hadoop", name = "hadoop-common", version.ref = "hadoop3"} hadoop3-client = { group = "org.apache.hadoop", name = "hadoop-client", version.ref = "hadoop3"} hadoop3-client-api = { group = "org.apache.hadoop", name = "hadoop-client-api", version.ref = "hadoop3"} hadoop3-client-runtime = { group = "org.apache.hadoop", name = "hadoop-client-runtime", version.ref = "hadoop3"} +hadoop3-mapreduce-client-core = { group = "org.apache.hadoop", name = "hadoop-mapreduce-client-core", version.ref = "hadoop3"} hadoop3-minicluster = { group = "org.apache.hadoop", name = "hadoop-minicluster", version.ref = "hadoop-minikdc"} hadoop3-gcs = { group = "com.google.cloud.bigdataoss", name = "gcs-connector", version.ref = "hadoop3-gcs"} hadoop3-oss = { group = "org.apache.hadoop", name = "hadoop-aliyun", version.ref = "hadoop3-aliyun"} hadoop3-abs = { group = "org.apache.hadoop", name = "hadoop-azure", version.ref = "hadoop3-abs"} +hadoop3-shaded-guava = { group = "org.apache.hadoop.thirdparty", name = "hadoop-shaded-guava", version = "1.1.1" } +hadoop3-shaded-protobuf = { group = "org.apache.hadoop.thirdparty", name = "hadoop-shaded-protobuf_3_7", version = "1.1.1" } +re2j = { group = "com.google.re2j", name = "re2j", version = "1.7" } htrace-core4 = { group = "org.apache.htrace", name = "htrace-core4", version.ref = "htrace-core4" } airlift-json = { group = "io.airlift", name = "json", version.ref = "airlift-json"} airlift-resolver = { group = "io.airlift.resolver", name = "resolver", version.ref = "airlift-resolver"} diff --git a/trino-connector/integration-test/build.gradle.kts b/trino-connector/integration-test/build.gradle.kts index bb15ef356e5..9cf70199c30 100644 --- a/trino-connector/integration-test/build.gradle.kts +++ b/trino-connector/integration-test/build.gradle.kts @@ -41,13 +41,18 @@ dependencies { testImplementation(libs.bundles.jetty) testImplementation(libs.bundles.log4j) testImplementation(libs.commons.cli) - testImplementation(libs.hadoop2.common) { + testImplementation(libs.hadoop3.common) { exclude("*") } - testImplementation(libs.hadoop2.hdfs) - testImplementation(libs.hadoop2.mapreduce.client.core) { + testImplementation(libs.hadoop3.hdfs) + testImplementation(libs.hadoop3.mapreduce.client.core) { exclude("*") } + // Hadoop 3.x runtime requirements (stripped by exclude("*") above) + testImplementation(libs.hadoop3.shaded.guava) + testImplementation(libs.hadoop3.shaded.protobuf) + testImplementation("org.apache.commons:commons-configuration2:2.8.0") + testImplementation(libs.re2j) testImplementation(libs.hive2.common) { exclude("org.eclipse.jetty.aggregate", "jetty-all") exclude("org.eclipse.jetty.orbit", "javax.servlet") From fa70c78f5179078d63eabf8e5c6fb480ed8c4460 Mon Sep 17 00:00:00 2001 From: geyanggang Date: Thu, 16 Apr 2026 11:52:30 +0800 Subject: [PATCH 2/3] fix: add missing hadoop-hdfs-client dependencies for Hadoop 3.3.6(provided) --- authorizations/authorization-chain/build.gradle.kts | 1 + authorizations/authorization-ranger/build.gradle.kts | 1 + catalogs/catalog-hive/build.gradle.kts | 1 + catalogs/catalog-lakehouse-hudi/build.gradle.kts | 1 + clients/filesystem-hadoop3/build.gradle.kts | 1 + flink-connector/flink/build.gradle.kts | 1 + trino-connector/integration-test/build.gradle.kts | 1 + 7 files changed, 7 insertions(+) diff --git a/authorizations/authorization-chain/build.gradle.kts b/authorizations/authorization-chain/build.gradle.kts index cf19b238346..8ab42a9e4db 100644 --- a/authorizations/authorization-chain/build.gradle.kts +++ b/authorizations/authorization-chain/build.gradle.kts @@ -104,6 +104,7 @@ dependencies { exclude("javax.servlet", "servlet-api") exclude("io.netty") } + testImplementation(libs.hadoop3.hdfs.client) } tasks { diff --git a/authorizations/authorization-ranger/build.gradle.kts b/authorizations/authorization-ranger/build.gradle.kts index 4b6ee3b7c47..506466fa388 100644 --- a/authorizations/authorization-ranger/build.gradle.kts +++ b/authorizations/authorization-ranger/build.gradle.kts @@ -105,6 +105,7 @@ dependencies { exclude("javax.servlet", "servlet-api") exclude("io.netty") } + testImplementation(libs.hadoop3.hdfs.client) testImplementation("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") testImplementation("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") } diff --git a/catalogs/catalog-hive/build.gradle.kts b/catalogs/catalog-hive/build.gradle.kts index 7c880de803e..3f6b387a07d 100644 --- a/catalogs/catalog-hive/build.gradle.kts +++ b/catalogs/catalog-hive/build.gradle.kts @@ -99,6 +99,7 @@ dependencies { exclude("*") } testImplementation(libs.hadoop3.hdfs) + testImplementation(libs.hadoop3.hdfs.client) testImplementation(libs.hadoop3.mapreduce.client.core) { exclude("*") } diff --git a/catalogs/catalog-lakehouse-hudi/build.gradle.kts b/catalogs/catalog-lakehouse-hudi/build.gradle.kts index be7b1026a5f..cbb3cfe99b1 100644 --- a/catalogs/catalog-lakehouse-hudi/build.gradle.kts +++ b/catalogs/catalog-lakehouse-hudi/build.gradle.kts @@ -122,6 +122,7 @@ dependencies { exclude("*") } testImplementation(libs.hadoop3.hdfs) + testImplementation(libs.hadoop3.hdfs.client) testImplementation(libs.hadoop3.mapreduce.client.core) { exclude("*") } diff --git a/clients/filesystem-hadoop3/build.gradle.kts b/clients/filesystem-hadoop3/build.gradle.kts index 48c2d9b2c6a..76000c6abe8 100644 --- a/clients/filesystem-hadoop3/build.gradle.kts +++ b/clients/filesystem-hadoop3/build.gradle.kts @@ -68,6 +68,7 @@ dependencies { exclude("javax.servlet", "servlet-api") exclude("io.netty") } + testImplementation(libs.hadoop3.hdfs.client) testImplementation(libs.httpclient5) testImplementation(libs.javax.jaxb.api) { exclude("*") diff --git a/flink-connector/flink/build.gradle.kts b/flink-connector/flink/build.gradle.kts index 6288772d70e..f1a8735224b 100644 --- a/flink-connector/flink/build.gradle.kts +++ b/flink-connector/flink/build.gradle.kts @@ -141,6 +141,7 @@ dependencies { exclude("javax.servlet", "servlet-api") exclude("org.mortbay.jetty") } + testImplementation(libs.hadoop3.hdfs.client) testImplementation(libs.hadoop3.mapreduce.client.core) { exclude("*") } diff --git a/trino-connector/integration-test/build.gradle.kts b/trino-connector/integration-test/build.gradle.kts index 9cf70199c30..cc1722c06cf 100644 --- a/trino-connector/integration-test/build.gradle.kts +++ b/trino-connector/integration-test/build.gradle.kts @@ -45,6 +45,7 @@ dependencies { exclude("*") } testImplementation(libs.hadoop3.hdfs) + testImplementation(libs.hadoop3.hdfs.client) testImplementation(libs.hadoop3.mapreduce.client.core) { exclude("*") } From c7b35847ff600cb45973dad67164ae7073e3b048 Mon Sep 17 00:00:00 2001 From: geyanggang Date: Thu, 16 Apr 2026 22:31:13 +0800 Subject: [PATCH 3/3] fix(hudi) : set to avoid Hudi 0.15.0 + Hadoop 3 incompatibility --- .../lakehouse/hudi/integration/test/HudiCatalogHMSIT.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/catalogs/catalog-lakehouse-hudi/src/test/java/org/apache/gravitino/catalog/lakehouse/hudi/integration/test/HudiCatalogHMSIT.java b/catalogs/catalog-lakehouse-hudi/src/test/java/org/apache/gravitino/catalog/lakehouse/hudi/integration/test/HudiCatalogHMSIT.java index c893997b4cf..dca8dbbc40b 100644 --- a/catalogs/catalog-lakehouse-hudi/src/test/java/org/apache/gravitino/catalog/lakehouse/hudi/integration/test/HudiCatalogHMSIT.java +++ b/catalogs/catalog-lakehouse-hudi/src/test/java/org/apache/gravitino/catalog/lakehouse/hudi/integration/test/HudiCatalogHMSIT.java @@ -469,6 +469,10 @@ private static void createHudiTables() { "org.apache.spark.sql.hudi.catalog.HoodieCatalog") .config("spark.kryo.registrator", "org.apache.spark.HoodieSparkKryoRegistrar") .config("dfs.replication", "1") + // Disable Hudi metadata table to avoid NoSuchMethodError caused by Hudi 0.15.0's + // shaded HBase code (HoodieHFileReader) being incompatible with hadoop-hdfs-client + // 3.3.6. See: https://github.com/apache/hudi/issues/5765 + .config("hoodie.metadata.enable", "false") .enableHiveSupport() .getOrCreate();