diff --git a/python/pyspark/install.py b/python/pyspark/install.py index f7d44d3b942af..55f3b0c2d8bcf 100644 --- a/python/pyspark/install.py +++ b/python/pyspark/install.py @@ -68,7 +68,7 @@ def checked_versions( fully-qualified versions of Spark, Hadoop and Hive in a tuple. For example, spark-3.2.0, hadoop3 and hive2.3. """ - if re.match("^[0-9]+\\.[0-9]+\\.[0-9]+$", spark_version): + if re.match("^[0-9]+\\.[0-9]+\\.[0-9]+(?:\\.dev[0-9]+)?$", spark_version): spark_version = "spark-%s" % spark_version if not spark_version.startswith("spark-"): raise RuntimeError( @@ -106,7 +106,9 @@ def convert_old_hadoop_version(spark_version: str, hadoop_version: str) -> str: "without": "without", "without-hadoop": "without-hadoop", } - spark_version_parts = re.search("^spark-([0-9]+)\\.([0-9]+)\\.[0-9]+$", spark_version) + spark_version_parts = re.search( + "^spark-([0-9]+)\\.([0-9]+)\\.[0-9]+(?:\\.dev[0-9]+)?$", spark_version + ) assert spark_version_parts is not None spark_major_version = int(spark_version_parts.group(1)) spark_minor_version = int(spark_version_parts.group(2)) diff --git a/python/pyspark/tests/test_install_spark.py b/python/pyspark/tests/test_install_spark.py index b977fdf510248..ca4ea60fb1144 100644 --- a/python/pyspark/tests/test_install_spark.py +++ b/python/pyspark/tests/test_install_spark.py @@ -106,6 +106,17 @@ def test_checked_versions(self): checked_versions("spark-3.3.0", "hadoop3", "hive2.3"), ) + # Prerelease version (e.g. pip dev builds) + self.assertEqual( + ("spark-4.2.0.dev4", "hadoop3", "hive2.3"), + checked_versions("4.2.0.dev4", "3", "2.3"), + ) + + self.assertEqual( + ("spark-4.2.0.dev4", "hadoop3", "hive2.3"), + checked_versions("spark-4.2.0.dev4", "hadoop3", "hive2.3"), + ) + # Negative test cases for hadoop_version, hive_version in UNSUPPORTED_COMBINATIONS: with self.assertRaisesRegex(RuntimeError, "Hive.*should.*Hadoop"):