Skip to content

Commit 3167692

Browse files
turboFeipan3793
authored andcommitted
[KYUUBI #6829] Add metrics for batch pending max elapse time
### Why are the changes needed? 1. add metrics `kyuubi.operartion.batch_pending_max_elapse` for the batch pending max elapse time, which is helpful for batch health monitoring, and we can send alert if the batch pending elapse time too long 2. For `GET /api/v1/batches` api, limit the max time window for listing batches, which is helpful that, we want to reserve more metadata in kyuubi server end, for example: 90 days, but for list batches, we just want to allow user to search the last 7 days. It is optional. And if `create_time` is specified, order by `create_time` instead of `key_id`. https://github.com/apache/kyuubi/blob/68a6f48da53dd0ad2e20b450a41ca600b8c1e1d2/kyuubi-server/src/main/resources/sql/mysql/metadata-store-schema-1.8.0.mysql.sql#L32 ### How was this patch tested? GA. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #6829 from turboFei/batch_pending_time. Closes #6829 ee4f931 [Wang, Fei] docs bf8169a [Wang, Fei] comments f493a2a [Wang, Fei] new config ab7b6db [Wang, Fei] ut 1680175 [Wang, Fei] in memory session 510a30b [Wang, Fei] batchSearchWindow opt 1e93dd2 [Wang, Fei] save Authored-by: Wang, Fei <[email protected]> Signed-off-by: Cheng Pan <[email protected]>
1 parent f2cacd3 commit 3167692

File tree

11 files changed

+125
-76
lines changed

11 files changed

+125
-76
lines changed

docs/configuration/settings.md

+19-18
Large diffs are not rendered by default.

docs/monitor/metrics.md

+51-50
Large diffs are not rendered by default.

kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala

+13
Original file line numberDiff line numberDiff line change
@@ -2023,6 +2023,19 @@ object KyuubiConf {
20232023
.intConf
20242024
.createWithDefault(65536)
20252025

2026+
val METADATA_SEARCH_WINDOW: OptionalConfigEntry[Long] =
2027+
buildConf("kyuubi.metadata.search.window")
2028+
.doc("The time window to restrict user queries to metadata within a specific period, " +
2029+
"starting from the current time to the past. It only affects `GET /api/v1/batches` API. " +
2030+
"You may want to set this to short period to improve query performance and reduce load " +
2031+
"on the metadata store when administer want to reserve the metadata for long time. " +
2032+
"The side-effects is that, the metadata created outside the window will not be " +
2033+
"invisible to users. If it is undefined, all metadata will be visible for users.")
2034+
.version("1.10.1")
2035+
.timeConf
2036+
.checkValue(_ > 0, "must be positive number")
2037+
.createOptional
2038+
20262039
val ENGINE_EXEC_WAIT_QUEUE_SIZE: ConfigEntry[Int] =
20272040
buildConf("kyuubi.backend.engine.exec.pool.wait.queue.size")
20282041
.doc("Size of the wait queue for the operation execution thread pool in SQL engine" +

kyuubi-metrics/src/main/scala/org/apache/kyuubi/metrics/MetricsConstants.scala

+1
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ object MetricsConstants {
6464
final val OPERATION_TOTAL: String = OPERATION + "total"
6565
final val OPERATION_STATE: String = OPERATION + "state"
6666
final val OPERATION_EXEC_TIME: String = OPERATION + "exec_time"
67+
final val OPERATION_BATCH_PENDING_MAX_ELAPSE: String = OPERATION + "batch_pending_max_elapse"
6768

6869
final private val BACKEND_SERVICE = KYUUBI + "backend_service."
6970
final val BS_FETCH_LOG_ROWS_RATE = BACKEND_SERVICE + "fetch_log_rows_rate"

kyuubi-server/src/main/scala/org/apache/kyuubi/operation/BatchJobSubmission.scala

+8
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,14 @@ class BatchJobSubmission(
424424
Utils.deleteDirectoryRecursively(session.resourceUploadFolderPath.toFile)
425425
}
426426
}
427+
428+
def getPendingElapsedTime: Long = {
429+
if (state == OperationState.PENDING) {
430+
System.currentTimeMillis() - createTime
431+
} else {
432+
0L
433+
}
434+
}
427435
}
428436

429437
object BatchJobSubmission {

kyuubi-server/src/main/scala/org/apache/kyuubi/server/KyuubiRestFrontendService.scala

+14-1
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,14 @@ import org.eclipse.jetty.servlet.{ErrorPageErrorHandler, FilterHolder}
3131
import org.apache.kyuubi.{KyuubiException, Utils}
3232
import org.apache.kyuubi.config.KyuubiConf
3333
import org.apache.kyuubi.config.KyuubiConf._
34+
import org.apache.kyuubi.metrics.MetricsConstants.OPERATION_BATCH_PENDING_MAX_ELAPSE
35+
import org.apache.kyuubi.metrics.MetricsSystem
3436
import org.apache.kyuubi.server.api.v1.ApiRootResource
3537
import org.apache.kyuubi.server.http.authentication.{AuthenticationFilter, KyuubiHttpAuthenticationFactory}
3638
import org.apache.kyuubi.server.ui.{JettyServer, JettyUtils}
3739
import org.apache.kyuubi.service.{AbstractFrontendService, Serverable, Service, ServiceUtils}
3840
import org.apache.kyuubi.service.authentication.{AuthTypes, AuthUtils}
39-
import org.apache.kyuubi.session.{KyuubiSessionManager, SessionHandle}
41+
import org.apache.kyuubi.session.{KyuubiBatchSession, KyuubiSessionManager, SessionHandle}
4042
import org.apache.kyuubi.util.{JavaUtils, ThreadUtils}
4143
import org.apache.kyuubi.util.ThreadUtils.scheduleTolerableRunnableWithFixedDelay
4244

@@ -202,6 +204,14 @@ class KyuubiRestFrontendService(override val serverable: Serverable)
202204
}
203205
}
204206

207+
private def getBatchPendingMaxElapse(): Long = {
208+
val batchPendingElapseTimes = sessionManager.allSessions().map {
209+
case session: KyuubiBatchSession => session.batchJobSubmissionOp.getPendingElapsedTime
210+
case _ => 0L
211+
}
212+
if (batchPendingElapseTimes.isEmpty) 0L else batchPendingElapseTimes.max
213+
}
214+
205215
def waitForServerStarted(): Unit = {
206216
// block until the HTTP server is started, otherwise, we may get
207217
// the wrong HTTP server port -1
@@ -220,6 +230,9 @@ class KyuubiRestFrontendService(override val serverable: Serverable)
220230
isStarted.set(true)
221231
startBatchChecker()
222232
recoverBatchSessions()
233+
MetricsSystem.tracing { ms =>
234+
ms.registerGauge(OPERATION_BATCH_PENDING_MAX_ELAPSE, getBatchPendingMaxElapse, 0)
235+
}
223236
} catch {
224237
case e: Exception => throw new KyuubiException(s"Cannot start $getName", e)
225238
}

kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/BatchesResource.scala

+4-1
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ private[v1] class BatchesResource extends ApiRequestContext with Logging {
6767
fe.getConf.get(ENGINE_SECURITY_ENABLED)
6868
private lazy val resourceFileMaxSize = fe.getConf.get(BATCH_RESOURCE_FILE_MAX_SIZE)
6969
private lazy val extraResourceFileMaxSize = fe.getConf.get(BATCH_EXTRA_RESOURCE_FILE_MAX_SIZE)
70+
private lazy val metadataSearchWindow = fe.getConf.get(METADATA_SEARCH_WINDOW)
7071

7172
private def batchV2Enabled(reqConf: Map[String, String]): Boolean = {
7273
fe.getConf.get(BATCH_SUBMITTER_ENABLED) &&
@@ -420,13 +421,15 @@ private[v1] class BatchesResource extends ApiRequestContext with Logging {
420421
s"The valid batch state can be one of the following: ${VALID_BATCH_STATES.mkString(",")}")
421422
}
422423

424+
val createTimeFilter =
425+
math.max(createTime, metadataSearchWindow.map(System.currentTimeMillis() - _).getOrElse(0L))
423426
val filter = MetadataFilter(
424427
sessionType = SessionType.BATCH,
425428
engineType = batchType,
426429
username = batchUser,
427430
state = batchState,
428431
requestName = batchName,
429-
createTime = createTime,
432+
createTime = createTimeFilter,
430433
endTime = endTime)
431434
val batches = sessionManager.getBatchesFromMetadataStore(filter, from, size, desc)
432435
new GetBatchesResponse(from, batches.size, batches.asJava)

kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/MetadataManager.scala

+7-2
Original file line numberDiff line numberDiff line change
@@ -139,8 +139,13 @@ class MetadataManager extends AbstractService("MetadataManager") {
139139
from: Int,
140140
size: Int,
141141
desc: Boolean = false): Seq[Batch] = {
142-
withMetadataRequestMetrics(_metadataStore.getMetadataList(filter, from, size, desc)).map(
143-
buildBatch)
142+
withMetadataRequestMetrics(_metadataStore.getMetadataList(
143+
filter,
144+
from,
145+
size,
146+
// if create_file field is set, order by create_time, which is faster, otherwise by key_id
147+
orderBy = if (filter.createTime > 0) Some("create_time") else Some("key_id"),
148+
direction = if (desc) "DESC" else "ASC")).map(buildBatch)
144149
}
145150

146151
def countBatch(

kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/MetadataStore.scala

+4-1
Original file line numberDiff line numberDiff line change
@@ -58,13 +58,16 @@ trait MetadataStore extends Closeable {
5858
* @param from the metadata offset.
5959
* @param size the size to get.
6060
* @param desc the order of metadata list.
61+
* @param orderBy the order by column, default is the auto increment primary key, `key_id`.
62+
* @param direction the order direction, default is `ASC`.
6163
* @return selected metadata list.
6264
*/
6365
def getMetadataList(
6466
filter: MetadataFilter,
6567
from: Int,
6668
size: Int,
67-
desc: Boolean = false): Seq[Metadata]
69+
orderBy: Option[String] = Some("key_id"),
70+
direction: String = "ASC"): Seq[Metadata]
6871

6972
/**
7073
* Count the metadata list with filter conditions.

kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStore.scala

+3-3
Original file line numberDiff line numberDiff line change
@@ -257,15 +257,15 @@ class JDBCMetadataStore(conf: KyuubiConf) extends MetadataStore with Logging {
257257
filter: MetadataFilter,
258258
from: Int,
259259
size: Int,
260-
desc: Boolean = false): Seq[Metadata] = {
260+
orderBy: Option[String] = Some("key_id"),
261+
direction: String = "ASC"): Seq[Metadata] = {
261262
val queryBuilder = new StringBuilder
262263
val params = ListBuffer[Any]()
263264
queryBuilder.append("SELECT ")
264265
queryBuilder.append(METADATA_COLUMNS)
265266
queryBuilder.append(s" FROM $METADATA_TABLE")
266267
queryBuilder.append(s" ${assembleWhereClause(filter, params)}")
267-
queryBuilder.append(" ORDER BY key_id ")
268-
queryBuilder.append(if (desc) "DESC " else "ASC ")
268+
orderBy.foreach(o => queryBuilder.append(s" ORDER BY $o $direction "))
269269
queryBuilder.append(dialect.limitClause(size, from))
270270
val query = queryBuilder.toString
271271
JdbcUtils.withConnection { connection =>

kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/BatchesResourceSuite.scala

+1
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,7 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite
486486
.queryParam("from", "0")
487487
.queryParam("size", "1")
488488
.queryParam("desc", "true")
489+
.queryParam("createTime", "1")
489490
.request(MediaType.APPLICATION_JSON_TYPE)
490491
.header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous"))
491492
.get()

0 commit comments

Comments
 (0)