From 48370b52dfc836358c5f2a7dce6e9d81989273dc Mon Sep 17 00:00:00 2001 From: Shreck Ye Date: Sat, 30 Dec 2023 17:50:08 +0700 Subject: [PATCH] Replace calls of `toList()` on `Array`s with `asList()` to improve performance of the affected functions by reducing the overhead of copying the array elements for once --- .../kotlin/org/jetbrains/kotlinx/spark/api/Dataset.kt | 8 ++++---- .../main/kotlin/org/jetbrains/kotlinx/spark/api/Rdd.kt | 2 +- .../org/jetbrains/kotlinx/spark/api/SparkSession.kt | 2 +- .../org/jetbrains/kotlinx/spark/api/TypeInferenceTest.kt | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/kotlin-spark-api/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Dataset.kt b/kotlin-spark-api/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Dataset.kt index fe936e58..65a54fc7 100644 --- a/kotlin-spark-api/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Dataset.kt +++ b/kotlin-spark-api/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Dataset.kt @@ -61,19 +61,19 @@ inline fun SparkSession.toDF(list: List, vararg colNames: String) * Utility method to create dataset from *array or vararg arguments */ inline fun SparkSession.dsOf(vararg t: T): Dataset = - createDataset(t.toList(), encoder()) + createDataset(t.asList(), encoder()) /** * Utility method to create dataframe from *array or vararg arguments */ inline fun SparkSession.dfOf(vararg t: T): Dataset = - createDataset(t.toList(), encoder()).toDF() + createDataset(t.asList(), encoder()).toDF() /** * Utility method to create dataframe from *array or vararg arguments with given column names */ inline fun SparkSession.dfOf(colNames: Array, vararg t: T): Dataset = - createDataset(t.toList(), encoder()) + createDataset(t.asList(), encoder()) .run { if (colNames.isEmpty()) toDF() else toDF(*colNames) } /** @@ -92,7 +92,7 @@ inline fun List.toDF(spark: SparkSession, vararg colNames: String * Utility method to create dataset from list */ inline fun Array.toDS(spark: SparkSession): Dataset = - toList().toDS(spark) + asList().toDS(spark) /** * Utility method to create dataframe from list diff --git a/kotlin-spark-api/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Rdd.kt b/kotlin-spark-api/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Rdd.kt index 8ba90d25..0ab701b4 100644 --- a/kotlin-spark-api/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Rdd.kt +++ b/kotlin-spark-api/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Rdd.kt @@ -11,7 +11,7 @@ import java.io.Serializable fun JavaSparkContext.rddOf( vararg elements: T, numSlices: Int = defaultParallelism(), -): JavaRDD = parallelize(elements.toList(), numSlices) +): JavaRDD = parallelize(elements.asList(), numSlices) /** * Utility method to create an RDD from a list. diff --git a/kotlin-spark-api/src/main/kotlin/org/jetbrains/kotlinx/spark/api/SparkSession.kt b/kotlin-spark-api/src/main/kotlin/org/jetbrains/kotlinx/spark/api/SparkSession.kt index f9a8b079..d2f79aca 100644 --- a/kotlin-spark-api/src/main/kotlin/org/jetbrains/kotlinx/spark/api/SparkSession.kt +++ b/kotlin-spark-api/src/main/kotlin/org/jetbrains/kotlinx/spark/api/SparkSession.kt @@ -114,7 +114,7 @@ class KSparkSession(val spark: SparkSession) { * NOTE: [T] must be [Serializable]. */ fun rddOf(vararg elements: T, numSlices: Int = sc.defaultParallelism()): JavaRDD = - sc.toRDD(elements.toList(), numSlices) + sc.toRDD(elements.asList(), numSlices) /** * A collection of methods for registering user-defined functions (UDF). diff --git a/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/TypeInferenceTest.kt b/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/TypeInferenceTest.kt index 1eb54ba4..0d65dafe 100644 --- a/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/TypeInferenceTest.kt +++ b/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/TypeInferenceTest.kt @@ -215,7 +215,7 @@ class TypeInferenceTest : ShouldSpec({ should("generate valid serializer schema") { expect(encoder().schema()) { this - .feature("data type", { this.fields()?.toList() }) { + .feature("data type", { this.fields()?.asList() }) { this.notToEqualNull().toContain.inOrder.only.entry { this .feature("element name", { name() }) { toEqual("optionList") }