From bd06f70ad0bd89b577f1c7a7ad5495c7ead049dd Mon Sep 17 00:00:00 2001 From: titaneric Date: Fri, 10 Oct 2025 17:55:17 +0800 Subject: [PATCH 01/32] publish and subscribe the pod info --- src/sources/kubernetes_logs/mod.rs | 84 +++++++++++++++++++++++++++++- 1 file changed, 83 insertions(+), 1 deletion(-) diff --git a/src/sources/kubernetes_logs/mod.rs b/src/sources/kubernetes_logs/mod.rs index 45f271831b0ac..a2927258fc509 100644 --- a/src/sources/kubernetes_logs/mod.rs +++ b/src/sources/kubernetes_logs/mod.rs @@ -278,6 +278,9 @@ pub struct Config { #[configurable(metadata(docs::type_unit = "seconds"))] #[serde(default = "default_rotate_wait", rename = "rotate_wait_secs")] rotate_wait: Duration, + + /// Whether use k8s logs API or not + api_log: bool, } const fn default_read_from() -> ReadFromConfig { @@ -326,6 +329,7 @@ impl Default for Config { log_namespace: None, internal_metrics: Default::default(), rotate_wait: default_rotate_wait(), + api_log: default_api_log(), } } } @@ -584,6 +588,7 @@ struct Source { delay_deletion: Duration, include_file_metric_tag: bool, rotate_wait: Duration, + api_log: bool, } impl Source { @@ -673,6 +678,7 @@ impl Source { delay_deletion, include_file_metric_tag: config.internal_metrics.include_file_tag, rotate_wait: config.rotate_wait, + api_log: config.api_log, }) } @@ -710,8 +716,11 @@ impl Source { delay_deletion, include_file_metric_tag, rotate_wait, + api_log, } = self; + info!(%api_log); + let mut reflectors = Vec::new(); let pods = Api::::all(client.clone()); @@ -801,7 +810,8 @@ impl Source { exclude_paths, insert_namespace_fields, ); - let annotator = PodMetadataAnnotator::new(pod_state, pod_fields_spec, log_namespace); + let annotator = + PodMetadataAnnotator::new(pod_state.clone(), pod_fields_spec, log_namespace); let ns_annotator = NamespaceMetadataAnnotator::new(ns_state, namespace_fields_spec, log_namespace); let node_annotator = NodeMetadataAnnotator::new(node_state, node_field_spec, log_namespace); @@ -875,6 +885,10 @@ impl Source { let (file_source_tx, file_source_rx) = futures::channel::mpsc::channel::>(2); + // Channel for communication between main task and pod monitoring task + // Similar to Docker logs source pattern: spawned task sends data to main task via channel + let (pod_info_tx, mut pod_info_rx) = tokio::sync::mpsc::unbounded_channel::(); + let checkpoints = checkpointer.view(); let events = file_source_rx.flat_map(futures::stream::iter); let bytes_received = register!(BytesReceived::from(Protocol::HTTP)); @@ -976,6 +990,71 @@ impl Source { }); slot.bind(Box::pin(fut)); } + { + // New task: Pod monitoring task - similar to Docker logs EventStreamBuilder pattern + // This task monitors pod_state changes and publishes pod information via channel + let (slot, shutdown) = lifecycle.add(); + let pod_state_clone = pod_state.clone(); + let fut = async move { + let mut interval = tokio::time::interval(Duration::from_secs(5)); + let mut shutdown = shutdown; + + loop { + tokio::select! { + _ = interval.tick() => { + // Get all current pods + let pods = pod_state_clone.state(); + for pod in pods.iter() { + if let Some(name) = pod.metadata.name.as_ref() { + if let Err(_) = pod_info_tx.send(name.clone()) { + warn!("Failed to send pod info through channel"); + return; + } + } + } + } + _ = &mut shutdown => { + info!("Pod monitoring task shutting down"); + return; + } + } + } + } + .map(|_| { + info!(message = "Pod monitoring task completed gracefully."); + }); + slot.bind(Box::pin(fut)); + } + + // Spawn a task to consume from the pod info channel and print pod names + // Similar to Docker logs main future pattern: main task receives data from spawned tasks + let pod_consumer_task = { + let shutdown_signal = global_shutdown.clone(); + async move { + loop { + tokio::select! { + pod_name = pod_info_rx.recv() => { + match pod_name { + Some(name) => { + info!("Received pod name: {}", name); + } + None => { + info!("Pod info channel closed"); + break; + } + } + } + _ = shutdown_signal.clone() => { + info!("Pod consumer task shutting down"); + break; + } + } + } + } + }; + + // Spawn the consumer task + tokio::spawn(pod_consumer_task); lifecycle.run(global_shutdown).await; // Stop Kubernetes object reflectors to avoid their leak on vector reload. @@ -1094,6 +1173,9 @@ const fn default_delay_deletion_ms() -> Duration { const fn default_rotate_wait() -> Duration { Duration::from_secs(u64::MAX / 2) } +const fn default_api_log() -> bool { + false +} // This function constructs the patterns we include for file watching, created // from the defaults or user provided configuration. From 504e5a941b02724932adbadc0a4d398ccb535fe9 Mon Sep 17 00:00:00 2001 From: titaneric Date: Fri, 10 Oct 2025 18:05:32 +0800 Subject: [PATCH 02/32] refactor pod subscriber and publisher into its own module --- src/sources/kubernetes_logs/mod.rs | 66 +++--------------- src/sources/kubernetes_logs/pod_publisher.rs | 67 +++++++++++++++++++ src/sources/kubernetes_logs/pod_subscriber.rs | 56 ++++++++++++++++ 3 files changed, 134 insertions(+), 55 deletions(-) create mode 100644 src/sources/kubernetes_logs/pod_publisher.rs create mode 100644 src/sources/kubernetes_logs/pod_subscriber.rs diff --git a/src/sources/kubernetes_logs/mod.rs b/src/sources/kubernetes_logs/mod.rs index a2927258fc509..35bfa31a08492 100644 --- a/src/sources/kubernetes_logs/mod.rs +++ b/src/sources/kubernetes_logs/mod.rs @@ -66,13 +66,16 @@ mod parser; mod partial_events_merger; mod path_helpers; mod pod_metadata_annotator; +mod pod_publisher; +mod pod_subscriber; mod transform_utils; mod util; use self::{ namespace_metadata_annotator::NamespaceMetadataAnnotator, node_metadata_annotator::NodeMetadataAnnotator, parser::Parser, - pod_metadata_annotator::PodMetadataAnnotator, + pod_metadata_annotator::PodMetadataAnnotator, pod_publisher::PodPublisher, + pod_subscriber::PodSubscriber, }; /// The `self_node_name` value env var key. @@ -588,6 +591,8 @@ struct Source { delay_deletion: Duration, include_file_metric_tag: bool, rotate_wait: Duration, + // TODO: This will be used when implementing K8s logs API integration + #[allow(dead_code)] api_log: bool, } @@ -887,7 +892,7 @@ impl Source { // Channel for communication between main task and pod monitoring task // Similar to Docker logs source pattern: spawned task sends data to main task via channel - let (pod_info_tx, mut pod_info_rx) = tokio::sync::mpsc::unbounded_channel::(); + let (pod_info_tx, pod_info_rx) = tokio::sync::mpsc::unbounded_channel::(); let checkpoints = checkpointer.view(); let events = file_source_rx.flat_map(futures::stream::iter); @@ -995,32 +1000,8 @@ impl Source { // This task monitors pod_state changes and publishes pod information via channel let (slot, shutdown) = lifecycle.add(); let pod_state_clone = pod_state.clone(); - let fut = async move { - let mut interval = tokio::time::interval(Duration::from_secs(5)); - let mut shutdown = shutdown; - - loop { - tokio::select! { - _ = interval.tick() => { - // Get all current pods - let pods = pod_state_clone.state(); - for pod in pods.iter() { - if let Some(name) = pod.metadata.name.as_ref() { - if let Err(_) = pod_info_tx.send(name.clone()) { - warn!("Failed to send pod info through channel"); - return; - } - } - } - } - _ = &mut shutdown => { - info!("Pod monitoring task shutting down"); - return; - } - } - } - } - .map(|_| { + let publisher = PodPublisher::new(pod_state_clone, pod_info_tx, shutdown); + let fut = publisher.run().map(|_| { info!(message = "Pod monitoring task completed gracefully."); }); slot.bind(Box::pin(fut)); @@ -1028,33 +1009,8 @@ impl Source { // Spawn a task to consume from the pod info channel and print pod names // Similar to Docker logs main future pattern: main task receives data from spawned tasks - let pod_consumer_task = { - let shutdown_signal = global_shutdown.clone(); - async move { - loop { - tokio::select! { - pod_name = pod_info_rx.recv() => { - match pod_name { - Some(name) => { - info!("Received pod name: {}", name); - } - None => { - info!("Pod info channel closed"); - break; - } - } - } - _ = shutdown_signal.clone() => { - info!("Pod consumer task shutting down"); - break; - } - } - } - } - }; - - // Spawn the consumer task - tokio::spawn(pod_consumer_task); + let subscriber = PodSubscriber::new(pod_info_rx, global_shutdown.clone()); + tokio::spawn(subscriber.run()); lifecycle.run(global_shutdown).await; // Stop Kubernetes object reflectors to avoid their leak on vector reload. diff --git a/src/sources/kubernetes_logs/pod_publisher.rs b/src/sources/kubernetes_logs/pod_publisher.rs new file mode 100644 index 0000000000000..b25d30bec6db8 --- /dev/null +++ b/src/sources/kubernetes_logs/pod_publisher.rs @@ -0,0 +1,67 @@ +use std::time::Duration; + +use k8s_openapi::api::core::v1::Pod; +use kube::runtime::reflector::Store; +use tokio::sync::mpsc; +use tracing::{info, warn}; + +use super::lifecycle::ShutdownHandle; + +/// Pod information publisher that monitors Kubernetes pod state changes +/// and publishes pod information through a channel. +/// +/// This follows the Docker logs EventStreamBuilder pattern where spawned tasks +/// communicate with the main task via channels. +pub struct PodPublisher { + pod_state: Store, + sender: mpsc::UnboundedSender, + shutdown: ShutdownHandle, +} +impl PodPublisher { + /// Create a new pod publisher + pub fn new( + pod_state: Store, + sender: mpsc::UnboundedSender, + shutdown: ShutdownHandle, + ) -> Self { + Self { + pod_state, + sender, + shutdown, + } + } + + /// Start the pod monitoring task that publishes pod information periodically + /// + /// This task: + /// - Monitors pod_state every 5 seconds + /// - Extracts pod names from the current pod state + /// - Sends pod names through the channel to the main task + /// - Handles shutdown signals gracefully + pub async fn run(mut self) { + let mut interval = tokio::time::interval(Duration::from_secs(5)); + + info!("Pod monitoring task started"); + + loop { + tokio::select! { + _ = interval.tick() => { + // Get all current pods and publish their names + let pods = self.pod_state.state(); + for pod in pods.iter() { + if let Some(name) = pod.metadata.name.as_ref() { + if let Err(_) = self.sender.send(name.clone()) { + warn!("Failed to send pod info through channel"); + return; + } + } + } + } + _ = &mut self.shutdown => { + info!("Pod monitoring task shutting down"); + return; + } + } + } + } +} diff --git a/src/sources/kubernetes_logs/pod_subscriber.rs b/src/sources/kubernetes_logs/pod_subscriber.rs new file mode 100644 index 0000000000000..8fba8bdc7444d --- /dev/null +++ b/src/sources/kubernetes_logs/pod_subscriber.rs @@ -0,0 +1,56 @@ +use tokio::sync::mpsc; +use tracing::info; + +use crate::shutdown::ShutdownSignal; + +/// Pod information subscriber that receives pod information from the publisher +/// and processes it (currently just logging). +/// +/// This follows the Docker logs main future pattern where the main task +/// receives data from spawned tasks via channels. +pub struct PodSubscriber { + receiver: mpsc::UnboundedReceiver, + shutdown: ShutdownSignal, +} + +impl PodSubscriber { + /// Create a new pod subscriber + pub fn new(receiver: mpsc::UnboundedReceiver, shutdown: ShutdownSignal) -> Self { + Self { receiver, shutdown } + } + + /// Start the pod consumer task that receives and processes pod information + /// + /// This task: + /// - Receives pod names from the channel + /// - Processes pod information (currently just prints names) + /// - Handles channel closure and shutdown gracefully + /// + /// In the future, this is where K8s logs API calls would be made + /// instead of just printing the pod names. + pub async fn run(mut self) { + info!("Pod consumer task started"); + + loop { + tokio::select! { + pod_name = self.receiver.recv() => { + match pod_name { + Some(name) => { + info!("Received pod name: {}", name); + // TODO: Here is where K8s logs API calls would be made + // instead of just logging the pod name + } + None => { + info!("Pod info channel closed"); + break; + } + } + } + _ = self.shutdown.clone() => { + info!("Pod consumer task shutting down"); + break; + } + } + } + } +} From 2942b5d3af65c038968524762d91be5818c6069c Mon Sep 17 00:00:00 2001 From: titaneric Date: Fri, 10 Oct 2025 20:44:51 +0800 Subject: [PATCH 03/32] implement `fetch_pod_logs` for pod subscriber --- src/sources/kubernetes_logs/mod.rs | 11 +- src/sources/kubernetes_logs/pod_info.rs | 95 +++++++++++++++ src/sources/kubernetes_logs/pod_publisher.rs | 19 +-- src/sources/kubernetes_logs/pod_subscriber.rs | 108 +++++++++++++++--- 4 files changed, 204 insertions(+), 29 deletions(-) create mode 100644 src/sources/kubernetes_logs/pod_info.rs diff --git a/src/sources/kubernetes_logs/mod.rs b/src/sources/kubernetes_logs/mod.rs index 35bfa31a08492..c79fcba73f2f8 100644 --- a/src/sources/kubernetes_logs/mod.rs +++ b/src/sources/kubernetes_logs/mod.rs @@ -65,6 +65,7 @@ mod node_metadata_annotator; mod parser; mod partial_events_merger; mod path_helpers; +mod pod_info; mod pod_metadata_annotator; mod pod_publisher; mod pod_subscriber; @@ -73,7 +74,7 @@ mod util; use self::{ namespace_metadata_annotator::NamespaceMetadataAnnotator, - node_metadata_annotator::NodeMetadataAnnotator, parser::Parser, + node_metadata_annotator::NodeMetadataAnnotator, parser::Parser, pod_info::PodInfo, pod_metadata_annotator::PodMetadataAnnotator, pod_publisher::PodPublisher, pod_subscriber::PodSubscriber, }; @@ -786,7 +787,7 @@ impl Source { // ----------------------------------------------------------------- - let nodes = Api::::all(client); + let nodes = Api::::all(client.clone()); let node_watcher = watcher( nodes, watcher::Config { @@ -892,7 +893,7 @@ impl Source { // Channel for communication between main task and pod monitoring task // Similar to Docker logs source pattern: spawned task sends data to main task via channel - let (pod_info_tx, pod_info_rx) = tokio::sync::mpsc::unbounded_channel::(); + let (pod_info_tx, pod_info_rx) = tokio::sync::mpsc::unbounded_channel::(); let checkpoints = checkpointer.view(); let events = file_source_rx.flat_map(futures::stream::iter); @@ -1007,9 +1008,9 @@ impl Source { slot.bind(Box::pin(fut)); } - // Spawn a task to consume from the pod info channel and print pod names + // Spawn a task to consume from the pod info channel and fetch K8s logs // Similar to Docker logs main future pattern: main task receives data from spawned tasks - let subscriber = PodSubscriber::new(pod_info_rx, global_shutdown.clone()); + let subscriber = PodSubscriber::new(pod_info_rx, global_shutdown.clone(), client.clone()); tokio::spawn(subscriber.run()); lifecycle.run(global_shutdown).await; diff --git a/src/sources/kubernetes_logs/pod_info.rs b/src/sources/kubernetes_logs/pod_info.rs new file mode 100644 index 0000000000000..e5026c3864f6f --- /dev/null +++ b/src/sources/kubernetes_logs/pod_info.rs @@ -0,0 +1,95 @@ +use k8s_openapi::api::core::v1::Pod; +use serde::{Deserialize, Serialize}; + +/// Pod information struct that contains essential details for log fetching +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct PodInfo { + /// Pod name + pub name: String, + /// Pod namespace + pub namespace: String, + /// Pod UID for uniqueness + pub uid: String, + /// Pod phase (Running, Pending, etc.) + pub phase: Option, + /// Container names within the pod + pub containers: Vec, +} + +/// Error type for failed Pod to PodInfo conversion +#[derive(Debug, Clone)] +pub enum PodConversionError { + MissingName, + MissingUid, +} + +impl std::fmt::Display for PodConversionError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + PodConversionError::MissingName => write!(f, "Pod is missing required name field"), + PodConversionError::MissingUid => write!(f, "Pod is missing required UID field"), + } + } +} + +impl std::error::Error for PodConversionError {} + +impl TryFrom<&Pod> for PodInfo { + type Error = PodConversionError; + + fn try_from(pod: &Pod) -> Result { + let metadata = &pod.metadata; + + let name = metadata + .name + .as_ref() + .ok_or(PodConversionError::MissingName)? + .clone(); + + let namespace = metadata.namespace.as_ref().cloned().unwrap_or_default(); + + let uid = metadata + .uid + .as_ref() + .ok_or(PodConversionError::MissingUid)? + .clone(); + + let phase = pod.status.as_ref().and_then(|status| status.phase.clone()); + + let containers = pod + .spec + .as_ref() + .map(|spec| { + spec.containers + .iter() + .map(|container| container.name.clone()) + .collect() + }) + .unwrap_or_default(); + + Ok(PodInfo { + name, + namespace, + uid, + phase, + containers, + }) + } +} + +impl TryFrom for PodInfo { + type Error = PodConversionError; + + fn try_from(pod: Pod) -> Result { + Self::try_from(&pod) + } +} + +impl PodInfo { + /// Check if this pod is in Running phase + pub fn is_running(&self) -> bool { + self.phase + .as_ref() + .map_or(false, |phase| phase == "Running") + } +} diff --git a/src/sources/kubernetes_logs/pod_publisher.rs b/src/sources/kubernetes_logs/pod_publisher.rs index b25d30bec6db8..a835f9b47d3e7 100644 --- a/src/sources/kubernetes_logs/pod_publisher.rs +++ b/src/sources/kubernetes_logs/pod_publisher.rs @@ -5,7 +5,7 @@ use kube::runtime::reflector::Store; use tokio::sync::mpsc; use tracing::{info, warn}; -use super::lifecycle::ShutdownHandle; +use super::{lifecycle::ShutdownHandle, pod_info::PodInfo}; /// Pod information publisher that monitors Kubernetes pod state changes /// and publishes pod information through a channel. @@ -14,14 +14,14 @@ use super::lifecycle::ShutdownHandle; /// communicate with the main task via channels. pub struct PodPublisher { pod_state: Store, - sender: mpsc::UnboundedSender, + sender: mpsc::UnboundedSender, shutdown: ShutdownHandle, } impl PodPublisher { /// Create a new pod publisher pub fn new( pod_state: Store, - sender: mpsc::UnboundedSender, + sender: mpsc::UnboundedSender, shutdown: ShutdownHandle, ) -> Self { Self { @@ -46,13 +46,16 @@ impl PodPublisher { loop { tokio::select! { _ = interval.tick() => { - // Get all current pods and publish their names + // Get all current pods and publish their detailed information let pods = self.pod_state.state(); for pod in pods.iter() { - if let Some(name) = pod.metadata.name.as_ref() { - if let Err(_) = self.sender.send(name.clone()) { - warn!("Failed to send pod info through channel"); - return; + if let Ok(pod_info) = PodInfo::try_from(pod.as_ref()) { + // Only publish running pods for log fetching + if pod_info.is_running() { + if let Err(_) = self.sender.send(pod_info) { + warn!("Failed to send pod info through channel"); + return; + } } } } diff --git a/src/sources/kubernetes_logs/pod_subscriber.rs b/src/sources/kubernetes_logs/pod_subscriber.rs index 8fba8bdc7444d..911c47dfc27c4 100644 --- a/src/sources/kubernetes_logs/pod_subscriber.rs +++ b/src/sources/kubernetes_logs/pod_subscriber.rs @@ -1,44 +1,65 @@ +use std::collections::HashSet; + +use k8s_openapi::api::core::v1::Pod; +use kube::{Api, Client, api::LogParams}; use tokio::sync::mpsc; -use tracing::info; +use tracing::{error, info, warn}; +use super::pod_info::PodInfo; use crate::shutdown::ShutdownSignal; /// Pod information subscriber that receives pod information from the publisher -/// and processes it (currently just logging). +/// and fetches logs from Kubernetes API. /// /// This follows the Docker logs main future pattern where the main task /// receives data from spawned tasks via channels. pub struct PodSubscriber { - receiver: mpsc::UnboundedReceiver, + receiver: mpsc::UnboundedReceiver, shutdown: ShutdownSignal, + client: Client, + /// Track pods we've already started processing to avoid duplicates + processed_pods: HashSet, } impl PodSubscriber { /// Create a new pod subscriber - pub fn new(receiver: mpsc::UnboundedReceiver, shutdown: ShutdownSignal) -> Self { - Self { receiver, shutdown } + pub fn new( + receiver: mpsc::UnboundedReceiver, + shutdown: ShutdownSignal, + client: Client, + ) -> Self { + Self { + receiver, + shutdown, + client, + processed_pods: HashSet::new(), + } } /// Start the pod consumer task that receives and processes pod information /// /// This task: - /// - Receives pod names from the channel - /// - Processes pod information (currently just prints names) + /// - Receives pod information from the channel + /// - Fetches logs from Kubernetes API for each pod /// - Handles channel closure and shutdown gracefully - /// - /// In the future, this is where K8s logs API calls would be made - /// instead of just printing the pod names. pub async fn run(mut self) { info!("Pod consumer task started"); loop { tokio::select! { - pod_name = self.receiver.recv() => { - match pod_name { - Some(name) => { - info!("Received pod name: {}", name); - // TODO: Here is where K8s logs API calls would be made - // instead of just logging the pod name + pod_info = self.receiver.recv() => { + match pod_info { + Some(pod_info) => { + // Check if we've already processed this pod to avoid duplicates + if !self.processed_pods.contains(&pod_info.uid) { + self.processed_pods.insert(pod_info.uid.clone()); + info!("Processing new pod: {} in namespace {}", pod_info.name, pod_info.namespace); + + // Fetch logs for this pod + if let Err(e) = self.fetch_pod_logs(&pod_info).await { + error!("Failed to fetch logs for pod {}: {}", pod_info.name, e); + } + } } None => { info!("Pod info channel closed"); @@ -53,4 +74,59 @@ impl PodSubscriber { } } } + + /// Fetch logs from Kubernetes API for a specific pod + async fn fetch_pod_logs( + &self, + pod_info: &PodInfo, + ) -> Result<(), Box> { + let pods: Api = Api::namespaced(self.client.clone(), &pod_info.namespace); + + // For each container in the pod, fetch its logs + for container in &pod_info.containers { + info!( + "Fetching logs for container '{}' in pod '{}'", + container, pod_info.name + ); + + let log_params = LogParams { + container: Some(container.clone()), + follow: false, // For now, just get recent logs, not streaming + tail_lines: Some(10), // Get last 10 lines + timestamps: true, + ..Default::default() + }; + + match pods.logs(&pod_info.name, &log_params).await { + Ok(logs) => { + // Process the logs - for now just print them + // In a full implementation, these would be sent to the Vector event pipeline + if !logs.is_empty() { + info!( + "=== Logs from pod '{}', container '{}' ===", + pod_info.name, container + ); + for line in logs.lines().take(5) { + // Limit output for demo + info!("LOG: {}", line); + } + info!("=== End of logs ==="); + } else { + info!( + "No logs available for pod '{}', container '{}'", + pod_info.name, container + ); + } + } + Err(e) => { + warn!( + "Failed to fetch logs for pod '{}', container '{}': {}", + pod_info.name, container, e + ); + } + } + } + + Ok(()) + } } From ce3044ae27b5f783b4414426b7518d6f292104ac Mon Sep 17 00:00:00 2001 From: titaneric Date: Sat, 11 Oct 2025 14:25:36 +0800 Subject: [PATCH 04/32] add basic reconciler for running pods --- src/sources/kubernetes_logs/mod.rs | 43 +++--- src/sources/kubernetes_logs/pod_info.rs | 59 +------- src/sources/kubernetes_logs/pod_publisher.rs | 70 ---------- src/sources/kubernetes_logs/pod_subscriber.rs | 132 ------------------ src/sources/kubernetes_logs/reconciler.rs | 126 +++++++++++++++++ 5 files changed, 154 insertions(+), 276 deletions(-) delete mode 100644 src/sources/kubernetes_logs/pod_publisher.rs delete mode 100644 src/sources/kubernetes_logs/pod_subscriber.rs create mode 100644 src/sources/kubernetes_logs/reconciler.rs diff --git a/src/sources/kubernetes_logs/mod.rs b/src/sources/kubernetes_logs/mod.rs index c79fcba73f2f8..f580dca82d12e 100644 --- a/src/sources/kubernetes_logs/mod.rs +++ b/src/sources/kubernetes_logs/mod.rs @@ -53,8 +53,7 @@ use crate::{ }, kubernetes::{custom_reflector, meta_cache::MetaCache}, shutdown::ShutdownSignal, - sources, - sources::kubernetes_logs::partial_events_merger::merge_partial_events, + sources::{self, kubernetes_logs::partial_events_merger::merge_partial_events}, transforms::{FunctionTransform, OutputBuffer}, }; @@ -67,16 +66,14 @@ mod partial_events_merger; mod path_helpers; mod pod_info; mod pod_metadata_annotator; -mod pod_publisher; -mod pod_subscriber; +mod reconciler; mod transform_utils; mod util; use self::{ namespace_metadata_annotator::NamespaceMetadataAnnotator, - node_metadata_annotator::NodeMetadataAnnotator, parser::Parser, pod_info::PodInfo, - pod_metadata_annotator::PodMetadataAnnotator, pod_publisher::PodPublisher, - pod_subscriber::PodSubscriber, + node_metadata_annotator::NodeMetadataAnnotator, parser::Parser, + pod_metadata_annotator::PodMetadataAnnotator, }; /// The `self_node_name` value env var key. @@ -891,10 +888,6 @@ impl Source { let (file_source_tx, file_source_rx) = futures::channel::mpsc::channel::>(2); - // Channel for communication between main task and pod monitoring task - // Similar to Docker logs source pattern: spawned task sends data to main task via channel - let (pod_info_tx, pod_info_rx) = tokio::sync::mpsc::unbounded_channel::(); - let checkpoints = checkpointer.view(); let events = file_source_rx.flat_map(futures::stream::iter); let bytes_received = register!(BytesReceived::from(Protocol::HTTP)); @@ -960,6 +953,7 @@ impl Source { let event_processing_loop = out.send_event_stream(&mut stream); + let reconciler = reconciler::Reconciler::new(pod_state, client.clone()); let mut lifecycle = Lifecycle::new(); { let (slot, shutdown) = lifecycle.add(); @@ -996,23 +990,28 @@ impl Source { }); slot.bind(Box::pin(fut)); } + { - // New task: Pod monitoring task - similar to Docker logs EventStreamBuilder pattern - // This task monitors pod_state changes and publishes pod information via channel let (slot, shutdown) = lifecycle.add(); - let pod_state_clone = pod_state.clone(); - let publisher = PodPublisher::new(pod_state_clone, pod_info_tx, shutdown); - let fut = publisher.run().map(|_| { - info!(message = "Pod monitoring task completed gracefully."); + let fut = reconciler.reconcile(); + let fut = util::complete_with_deadline_on_signal( + fut, + shutdown, + Duration::from_secs(30), // more than enough time to propagate + ) + .map(|result| { + match result { + Ok(_) => info!(message = "Event processing loop completed gracefully."), + Err(error) => emit!(KubernetesLifecycleError { + error, + message: "Event processing loop timed out during the shutdown.", + count: events_count, + }), + }; }); slot.bind(Box::pin(fut)); } - // Spawn a task to consume from the pod info channel and fetch K8s logs - // Similar to Docker logs main future pattern: main task receives data from spawned tasks - let subscriber = PodSubscriber::new(pod_info_rx, global_shutdown.clone(), client.clone()); - tokio::spawn(subscriber.run()); - lifecycle.run(global_shutdown).await; // Stop Kubernetes object reflectors to avoid their leak on vector reload. for reflector in reflectors { diff --git a/src/sources/kubernetes_logs/pod_info.rs b/src/sources/kubernetes_logs/pod_info.rs index e5026c3864f6f..0d91a675d2730 100644 --- a/src/sources/kubernetes_logs/pod_info.rs +++ b/src/sources/kubernetes_logs/pod_info.rs @@ -2,7 +2,7 @@ use k8s_openapi::api::core::v1::Pod; use serde::{Deserialize, Serialize}; /// Pod information struct that contains essential details for log fetching -#[derive(Clone, Debug, Serialize, Deserialize)] +#[derive(Clone, Debug, Serialize, Deserialize, Eq, Hash, PartialEq)] pub struct PodInfo { /// Pod name pub name: String, @@ -16,43 +16,15 @@ pub struct PodInfo { pub containers: Vec, } -/// Error type for failed Pod to PodInfo conversion -#[derive(Debug, Clone)] -pub enum PodConversionError { - MissingName, - MissingUid, -} - -impl std::fmt::Display for PodConversionError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - PodConversionError::MissingName => write!(f, "Pod is missing required name field"), - PodConversionError::MissingUid => write!(f, "Pod is missing required UID field"), - } - } -} - -impl std::error::Error for PodConversionError {} - -impl TryFrom<&Pod> for PodInfo { - type Error = PodConversionError; - - fn try_from(pod: &Pod) -> Result { +impl From<&Pod> for PodInfo { + fn from(pod: &Pod) -> Self { let metadata = &pod.metadata; - let name = metadata - .name - .as_ref() - .ok_or(PodConversionError::MissingName)? - .clone(); + let name = metadata.name.as_ref().cloned().unwrap_or_default(); let namespace = metadata.namespace.as_ref().cloned().unwrap_or_default(); - let uid = metadata - .uid - .as_ref() - .ok_or(PodConversionError::MissingUid)? - .clone(); + let uid = metadata.uid.as_ref().cloned().unwrap_or_default(); let phase = pod.status.as_ref().and_then(|status| status.phase.clone()); @@ -67,29 +39,12 @@ impl TryFrom<&Pod> for PodInfo { }) .unwrap_or_default(); - Ok(PodInfo { + PodInfo { name, namespace, uid, phase, containers, - }) - } -} - -impl TryFrom for PodInfo { - type Error = PodConversionError; - - fn try_from(pod: Pod) -> Result { - Self::try_from(&pod) - } -} - -impl PodInfo { - /// Check if this pod is in Running phase - pub fn is_running(&self) -> bool { - self.phase - .as_ref() - .map_or(false, |phase| phase == "Running") + } } } diff --git a/src/sources/kubernetes_logs/pod_publisher.rs b/src/sources/kubernetes_logs/pod_publisher.rs deleted file mode 100644 index a835f9b47d3e7..0000000000000 --- a/src/sources/kubernetes_logs/pod_publisher.rs +++ /dev/null @@ -1,70 +0,0 @@ -use std::time::Duration; - -use k8s_openapi::api::core::v1::Pod; -use kube::runtime::reflector::Store; -use tokio::sync::mpsc; -use tracing::{info, warn}; - -use super::{lifecycle::ShutdownHandle, pod_info::PodInfo}; - -/// Pod information publisher that monitors Kubernetes pod state changes -/// and publishes pod information through a channel. -/// -/// This follows the Docker logs EventStreamBuilder pattern where spawned tasks -/// communicate with the main task via channels. -pub struct PodPublisher { - pod_state: Store, - sender: mpsc::UnboundedSender, - shutdown: ShutdownHandle, -} -impl PodPublisher { - /// Create a new pod publisher - pub fn new( - pod_state: Store, - sender: mpsc::UnboundedSender, - shutdown: ShutdownHandle, - ) -> Self { - Self { - pod_state, - sender, - shutdown, - } - } - - /// Start the pod monitoring task that publishes pod information periodically - /// - /// This task: - /// - Monitors pod_state every 5 seconds - /// - Extracts pod names from the current pod state - /// - Sends pod names through the channel to the main task - /// - Handles shutdown signals gracefully - pub async fn run(mut self) { - let mut interval = tokio::time::interval(Duration::from_secs(5)); - - info!("Pod monitoring task started"); - - loop { - tokio::select! { - _ = interval.tick() => { - // Get all current pods and publish their detailed information - let pods = self.pod_state.state(); - for pod in pods.iter() { - if let Ok(pod_info) = PodInfo::try_from(pod.as_ref()) { - // Only publish running pods for log fetching - if pod_info.is_running() { - if let Err(_) = self.sender.send(pod_info) { - warn!("Failed to send pod info through channel"); - return; - } - } - } - } - } - _ = &mut self.shutdown => { - info!("Pod monitoring task shutting down"); - return; - } - } - } - } -} diff --git a/src/sources/kubernetes_logs/pod_subscriber.rs b/src/sources/kubernetes_logs/pod_subscriber.rs deleted file mode 100644 index 911c47dfc27c4..0000000000000 --- a/src/sources/kubernetes_logs/pod_subscriber.rs +++ /dev/null @@ -1,132 +0,0 @@ -use std::collections::HashSet; - -use k8s_openapi::api::core::v1::Pod; -use kube::{Api, Client, api::LogParams}; -use tokio::sync::mpsc; -use tracing::{error, info, warn}; - -use super::pod_info::PodInfo; -use crate::shutdown::ShutdownSignal; - -/// Pod information subscriber that receives pod information from the publisher -/// and fetches logs from Kubernetes API. -/// -/// This follows the Docker logs main future pattern where the main task -/// receives data from spawned tasks via channels. -pub struct PodSubscriber { - receiver: mpsc::UnboundedReceiver, - shutdown: ShutdownSignal, - client: Client, - /// Track pods we've already started processing to avoid duplicates - processed_pods: HashSet, -} - -impl PodSubscriber { - /// Create a new pod subscriber - pub fn new( - receiver: mpsc::UnboundedReceiver, - shutdown: ShutdownSignal, - client: Client, - ) -> Self { - Self { - receiver, - shutdown, - client, - processed_pods: HashSet::new(), - } - } - - /// Start the pod consumer task that receives and processes pod information - /// - /// This task: - /// - Receives pod information from the channel - /// - Fetches logs from Kubernetes API for each pod - /// - Handles channel closure and shutdown gracefully - pub async fn run(mut self) { - info!("Pod consumer task started"); - - loop { - tokio::select! { - pod_info = self.receiver.recv() => { - match pod_info { - Some(pod_info) => { - // Check if we've already processed this pod to avoid duplicates - if !self.processed_pods.contains(&pod_info.uid) { - self.processed_pods.insert(pod_info.uid.clone()); - info!("Processing new pod: {} in namespace {}", pod_info.name, pod_info.namespace); - - // Fetch logs for this pod - if let Err(e) = self.fetch_pod_logs(&pod_info).await { - error!("Failed to fetch logs for pod {}: {}", pod_info.name, e); - } - } - } - None => { - info!("Pod info channel closed"); - break; - } - } - } - _ = self.shutdown.clone() => { - info!("Pod consumer task shutting down"); - break; - } - } - } - } - - /// Fetch logs from Kubernetes API for a specific pod - async fn fetch_pod_logs( - &self, - pod_info: &PodInfo, - ) -> Result<(), Box> { - let pods: Api = Api::namespaced(self.client.clone(), &pod_info.namespace); - - // For each container in the pod, fetch its logs - for container in &pod_info.containers { - info!( - "Fetching logs for container '{}' in pod '{}'", - container, pod_info.name - ); - - let log_params = LogParams { - container: Some(container.clone()), - follow: false, // For now, just get recent logs, not streaming - tail_lines: Some(10), // Get last 10 lines - timestamps: true, - ..Default::default() - }; - - match pods.logs(&pod_info.name, &log_params).await { - Ok(logs) => { - // Process the logs - for now just print them - // In a full implementation, these would be sent to the Vector event pipeline - if !logs.is_empty() { - info!( - "=== Logs from pod '{}', container '{}' ===", - pod_info.name, container - ); - for line in logs.lines().take(5) { - // Limit output for demo - info!("LOG: {}", line); - } - info!("=== End of logs ==="); - } else { - info!( - "No logs available for pod '{}', container '{}'", - pod_info.name, container - ); - } - } - Err(e) => { - warn!( - "Failed to fetch logs for pod '{}', container '{}': {}", - pod_info.name, container, e - ); - } - } - } - - Ok(()) - } -} diff --git a/src/sources/kubernetes_logs/reconciler.rs b/src/sources/kubernetes_logs/reconciler.rs new file mode 100644 index 0000000000000..894f0ea24181b --- /dev/null +++ b/src/sources/kubernetes_logs/reconciler.rs @@ -0,0 +1,126 @@ +use super::pod_info::PodInfo; +use k8s_openapi::api::core::v1::Pod; +use kube::runtime::reflector::Store; +use kube::{Api, Client, api::LogParams}; +use tracing::{error, info, warn}; + +pub struct Reconciler { + pod_state: Store, + client: Client, +} + +impl Reconciler { + pub fn new(pod_state: Store, client: Client) -> Self { + Self { pod_state, client } + } + + pub async fn reconcile(&self) { + // TODO: replace timer with watcher for pod state changes and reconcile accordingly + let mut timer = tokio::time::interval(tokio::time::Duration::from_secs(10)); + loop { + tokio::select! { + _ = timer.tick() => { + self.perform_reconciliation().await; + } + } + } + } + + async fn perform_reconciliation(&self) { + // Placeholder for reconciliation logic + info!("Performing reconciliation of pod states"); + // Actual reconciliation logic would go here + self.pod_state + .state() + .iter() + .map(|pod| PodInfo::from(pod.as_ref())) + .for_each(|pod_info| { + info!("PodInfo: {:?}", pod_info); + // self.fetch_pod_logs(&pod_info).await; + let tailer = LogTailer::new(self.client.clone()); + let _status = tailer.start(&pod_info); + // TODO: Store tailer status in a thread-safe way + }); + } +} + +struct LogTailer { + client: Client, +} + +enum TailStatus { + Running, + // Stopped, +} + +impl LogTailer { + pub fn new(client: Client) -> Self { + Self { client } + } + + pub fn start(&self, pod_info: &PodInfo) -> TailStatus { + let pod_info = pod_info.clone(); + let client = self.client.clone(); + tokio::spawn(async move { + let tailer = LogTailer { client }; + if let Err(e) = tailer.tail_log(&pod_info).await { + error!("Error tailing logs for pod '{}': {}", pod_info.name, e); + } + }); + TailStatus::Running + } + + pub async fn tail_log( + &self, + pod_info: &PodInfo, + ) -> Result<(), Box> { + let pods: Api = Api::namespaced(self.client.clone(), &pod_info.namespace); + + // For each container in the pod, fetch its logs + for container in &pod_info.containers { + info!( + "Fetching logs for container '{}' in pod '{}'", + container, pod_info.name + ); + + let log_params = LogParams { + container: Some(container.clone()), + follow: false, // For now, just get recent logs, not streaming + tail_lines: Some(10), // Get last 10 lines + timestamps: true, + ..Default::default() + }; + + match pods.logs(&pod_info.name, &log_params).await { + Ok(logs) => { + // Process the logs - for now just print them + // In a full implementation, these would be sent to the Vector event pipeline + if !logs.is_empty() { + info!( + "=== Logs from pod '{}', container '{}' ===", + pod_info.name, container + ); + for line in logs.lines().take(5) { + // Limit output for demo + info!("LOG: {}", line); + } + info!("=== End of logs ==="); + } else { + info!( + "No logs available for pod '{}', container '{}'", + pod_info.name, container + ); + } + } + Err(e) => { + warn!( + "Failed to fetch logs for pod '{}', container '{}': {}", + pod_info.name, container, e + ); + } + } + } + + Ok(()) + } +} From 0697f404af43cfa74fd01c0eb042029ad3150bc7 Mon Sep 17 00:00:00 2001 From: titaneric Date: Sat, 11 Oct 2025 15:04:05 +0800 Subject: [PATCH 05/32] start reconciler after pod state is not empty --- src/sources/kubernetes_logs/mod.rs | 34 ++++++--- src/sources/kubernetes_logs/reconciler.rs | 90 +++++++++++++++++++---- 2 files changed, 99 insertions(+), 25 deletions(-) diff --git a/src/sources/kubernetes_logs/mod.rs b/src/sources/kubernetes_logs/mod.rs index f580dca82d12e..c33a29dad105d 100644 --- a/src/sources/kubernetes_logs/mod.rs +++ b/src/sources/kubernetes_logs/mod.rs @@ -722,8 +722,6 @@ impl Source { api_log, } = self; - info!(%api_log); - let mut reflectors = Vec::new(); let pods = Api::::all(client.clone()); @@ -953,7 +951,24 @@ impl Source { let event_processing_loop = out.send_event_stream(&mut stream); - let reconciler = reconciler::Reconciler::new(pod_state, client.clone()); + // Only run reconciler when api_log is enabled + let reconciler_fut = if api_log { + let reconciler = reconciler::Reconciler::new(pod_state, client.clone()); + Some(async move { + // Give some time for the pod store to be populated + tokio::time::sleep(tokio::time::Duration::from_secs(2)).await; + match reconciler.handle_running_pods().await { + Ok(reconciler) => reconciler.run().await, + Err(error) => emit!(KubernetesLifecycleError { + error, + message: "Reconciler exited with an error.", + count: events_count, + }), + } + }) + } else { + None + }; let mut lifecycle = Lifecycle::new(); { let (slot, shutdown) = lifecycle.add(); @@ -991,20 +1006,21 @@ impl Source { slot.bind(Box::pin(fut)); } - { + // Only add reconciler to lifecycle if api_log is enabled + if let Some(reconciler_future) = reconciler_fut { let (slot, shutdown) = lifecycle.add(); - let fut = reconciler.reconcile(); + let fut = util::complete_with_deadline_on_signal( - fut, + reconciler_future, shutdown, Duration::from_secs(30), // more than enough time to propagate ) .map(|result| { match result { - Ok(_) => info!(message = "Event processing loop completed gracefully."), + Ok(_) => info!(message = "Reconciler completed gracefully."), Err(error) => emit!(KubernetesLifecycleError { error, - message: "Event processing loop timed out during the shutdown.", + message: "Reconciler timed out during the shutdown.", count: events_count, }), }; @@ -1130,7 +1146,7 @@ const fn default_rotate_wait() -> Duration { Duration::from_secs(u64::MAX / 2) } const fn default_api_log() -> bool { - false + true // Enable api_log by default for now to test reconciler functionality } // This function constructs the patterns we include for file watching, created diff --git a/src/sources/kubernetes_logs/reconciler.rs b/src/sources/kubernetes_logs/reconciler.rs index 894f0ea24181b..0585d1e20b93d 100644 --- a/src/sources/kubernetes_logs/reconciler.rs +++ b/src/sources/kubernetes_logs/reconciler.rs @@ -6,41 +6,85 @@ use tracing::{error, info, warn}; pub struct Reconciler { pod_state: Store, - client: Client, + tailer: LogTailer, } impl Reconciler { pub fn new(pod_state: Store, client: Client) -> Self { - Self { pod_state, client } + let tailer = LogTailer::new(client.clone()); + Self { pod_state, tailer } } - pub async fn reconcile(&self) { + pub async fn run(&self) { // TODO: replace timer with watcher for pod state changes and reconcile accordingly let mut timer = tokio::time::interval(tokio::time::Duration::from_secs(10)); loop { tokio::select! { _ = timer.tick() => { - self.perform_reconciliation().await; + // self.perform_reconciliation().await; } } } } - async fn perform_reconciliation(&self) { - // Placeholder for reconciliation logic + pub async fn handle_running_pods(self) -> crate::Result { info!("Performing reconciliation of pod states"); - // Actual reconciliation logic would go here - self.pod_state + + let pods: Vec<_> = self + .pod_state .state() .iter() .map(|pod| PodInfo::from(pod.as_ref())) - .for_each(|pod_info| { - info!("PodInfo: {:?}", pod_info); - // self.fetch_pod_logs(&pod_info).await; - let tailer = LogTailer::new(self.client.clone()); - let _status = tailer.start(&pod_info); + .collect(); + + if pods.is_empty() { + warn!("No pods found in pod store. The store might not be populated yet."); + return Ok(self); + } + + info!("Found {} pods in store", pods.len()); + + // Filter for running pods and start tailing their logs + let running_pods: Vec<_> = pods + .into_iter() + .filter(|pod_info| match &pod_info.phase { + Some(phase) if phase == "Running" => { + info!( + "Pod '{}' is running with {} containers", + pod_info.name, + pod_info.containers.len() + ); + true + } + Some(phase) => { + info!("Skipping pod '{}' in phase '{}'", pod_info.name, phase); + false + } + None => { + info!("Skipping pod '{}' with unknown phase", pod_info.name); + false + } + }) + .collect(); + + if running_pods.is_empty() { + info!("No running pods found to tail logs from"); + } else { + info!( + "Starting log tailing for {} running pods", + running_pods.len() + ); + for pod_info in running_pods { + info!( + "Starting tailer for pod '{}' in namespace '{}'", + pod_info.name, pod_info.namespace + ); + let _status = self.tailer.start(&pod_info); // TODO: Store tailer status in a thread-safe way - }); + } + } + + Ok(self) } } @@ -76,6 +120,13 @@ impl LogTailer { ) -> Result<(), Box> { let pods: Api = Api::namespaced(self.client.clone(), &pod_info.namespace); + info!( + "Starting log tailing for pod '{}' in namespace '{}' with {} containers", + pod_info.name, + pod_info.namespace, + pod_info.containers.len() + ); + // For each container in the pod, fetch its logs for container in &pod_info.containers { info!( @@ -100,9 +151,15 @@ impl LogTailer { "=== Logs from pod '{}', container '{}' ===", pod_info.name, container ); - for line in logs.lines().take(5) { + let line_count = logs.lines().count(); + info!("Retrieved {} lines of logs", line_count); + + for (idx, line) in logs.lines().take(5).enumerate() { // Limit output for demo - info!("LOG: {}", line); + info!("LOG[{}]: {}", idx + 1, line); + } + if line_count > 5 { + info!("... ({} more lines)", line_count - 5); } info!("=== End of logs ==="); } else { @@ -121,6 +178,7 @@ impl LogTailer { } } + info!("Completed log tailing for pod '{}'", pod_info.name); Ok(()) } } From 18f64724bb1b0f86ce1c6ed68bd9d0c9af68f76d Mon Sep 17 00:00:00 2001 From: titaneric Date: Sat, 11 Oct 2025 15:29:16 +0800 Subject: [PATCH 06/32] Run the reconciler once the pod state is initialized --- src/kubernetes/reflector.rs | 13 +++++++++- src/sources/kubernetes_logs/mod.rs | 41 ++++++++++++++++++++++-------- 2 files changed, 43 insertions(+), 11 deletions(-) diff --git a/src/kubernetes/reflector.rs b/src/kubernetes/reflector.rs index 0ddaa5242f6ba..80476a56f865d 100644 --- a/src/kubernetes/reflector.rs +++ b/src/kubernetes/reflector.rs @@ -8,17 +8,19 @@ use kube::{ Resource, runtime::{reflector::store, watcher}, }; -use tokio::pin; +use tokio::{pin, sync::oneshot}; use tokio_util::time::DelayQueue; use super::meta_cache::{MetaCache, MetaDescribe}; /// Handles events from a [`kube::runtime::watcher()`] to delay the application of Deletion events. +/// Optionally notifies via `init_notify` when the initial state is loaded (on InitDone event). pub async fn custom_reflector( mut store: store::Writer, mut meta_cache: MetaCache, stream: W, delay_deletion: Duration, + init_notify: Option>, ) where K: Resource + Clone + std::fmt::Debug, K::DynamicType: Eq + Hash + Clone, @@ -27,6 +29,7 @@ pub async fn custom_reflector( pin!(stream); let mut delay_queue = DelayQueue::default(); let mut init_buffer_meta = Vec::new(); + let mut init_notify = init_notify; loop { tokio::select! { result = stream.next() => { @@ -84,6 +87,11 @@ pub async fn custom_reflector( }); init_buffer_meta.clear(); + + // Notify initialization complete if requested + if let Some(tx) = init_notify.take() { + let _ = tx.send(()); + } } } }, @@ -159,6 +167,7 @@ mod tests { meta_cache, rx, Duration::from_secs(1), + None, )); tokio::time::sleep(Duration::from_secs(1)).await; assert_eq!(store.get(&ObjectRef::from_obj(&cm)).as_deref(), Some(&cm)); @@ -188,6 +197,7 @@ mod tests { meta_cache, rx, Duration::from_secs(2), + None, )); // Ensure the Resource is still available after deletion tokio::time::sleep(Duration::from_secs(1)).await; @@ -225,6 +235,7 @@ mod tests { meta_cache, rx, Duration::from_secs(2), + None, )); tokio::time::sleep(Duration::from_secs(1)).await; // Ensure the Resource is still available after deletion diff --git a/src/sources/kubernetes_logs/mod.rs b/src/sources/kubernetes_logs/mod.rs index c33a29dad105d..d30d30b41b73d 100644 --- a/src/sources/kubernetes_logs/mod.rs +++ b/src/sources/kubernetes_logs/mod.rs @@ -21,6 +21,7 @@ use kube::{ }; use lifecycle::Lifecycle; use serde_with::serde_as; +use tokio::sync::oneshot; use vector_lib::{ EstimatedJsonEncodedSizeOf, TimeZone, codecs::{BytesDeserializer, BytesDeserializerConfig}, @@ -748,11 +749,20 @@ impl Source { let pod_state = pod_store_w.as_reader(); let pod_cacher = MetaCache::new(); + // Create oneshot channel to notify when pod store is initialized + let (pod_init_tx, pod_init_rx) = if api_log { + let (tx, rx) = oneshot::channel(); + (Some(tx), Some(rx)) + } else { + (None, None) + }; + reflectors.push(tokio::spawn(custom_reflector( pod_store_w, pod_cacher, pod_watcher, delay_deletion, + pod_init_tx, ))); // ----------------------------------------------------------------- @@ -777,6 +787,7 @@ impl Source { MetaCache::new(), ns_watcher, delay_deletion, + None, ))); } @@ -802,6 +813,7 @@ impl Source { node_cacher, node_watcher, delay_deletion, + None, ))); let paths_provider = K8sPathsProvider::new( @@ -952,18 +964,27 @@ impl Source { let event_processing_loop = out.send_event_stream(&mut stream); // Only run reconciler when api_log is enabled - let reconciler_fut = if api_log { + let reconciler_fut = if let Some(pod_init_rx) = pod_init_rx { let reconciler = reconciler::Reconciler::new(pod_state, client.clone()); Some(async move { - // Give some time for the pod store to be populated - tokio::time::sleep(tokio::time::Duration::from_secs(2)).await; - match reconciler.handle_running_pods().await { - Ok(reconciler) => reconciler.run().await, - Err(error) => emit!(KubernetesLifecycleError { - error, - message: "Reconciler exited with an error.", - count: events_count, - }), + // Wait for pod store to be initialized + match pod_init_rx.await { + Ok(_) => { + info!("Pod store initialized, starting reconciler"); + match reconciler.handle_running_pods().await { + Ok(reconciler) => reconciler.run().await, + Err(error) => emit!(KubernetesLifecycleError { + error, + message: "Reconciler exited with an error.", + count: events_count, + }), + } + } + Err(_) => { + warn!( + "Pod store initialization signal was dropped, reconciler will not start" + ); + } } }) } else { From 43dd5cac8cec0ca8c8e08e6df377be5f23844a5b Mon Sep 17 00:00:00 2001 From: titaneric Date: Sat, 11 Oct 2025 15:43:42 +0800 Subject: [PATCH 07/32] launch LogTailer for container instead of pod level --- src/sources/kubernetes_logs/reconciler.rs | 167 +++++++++++++--------- 1 file changed, 100 insertions(+), 67 deletions(-) diff --git a/src/sources/kubernetes_logs/reconciler.rs b/src/sources/kubernetes_logs/reconciler.rs index 0585d1e20b93d..d6c198bca4128 100644 --- a/src/sources/kubernetes_logs/reconciler.rs +++ b/src/sources/kubernetes_logs/reconciler.rs @@ -4,15 +4,32 @@ use kube::runtime::reflector::Store; use kube::{Api, Client, api::LogParams}; use tracing::{error, info, warn}; +/// Container information for log tailing +#[derive(Clone, Debug)] +pub struct ContainerInfo { + /// Pod name containing this container + pub pod_name: String, + /// Pod namespace + pub namespace: String, + /// Container name + pub container_name: String, + /// Pod UID for tracking (will be used for future state tracking) + #[allow(dead_code)] + pub pod_uid: String, +} + pub struct Reconciler { pod_state: Store, - tailer: LogTailer, + container_tailer: ContainerLogTailer, } impl Reconciler { pub fn new(pod_state: Store, client: Client) -> Self { - let tailer = LogTailer::new(client.clone()); - Self { pod_state, tailer } + let container_tailer = ContainerLogTailer::new(client.clone()); + Self { + pod_state, + container_tailer, + } } pub async fn run(&self) { @@ -70,16 +87,36 @@ impl Reconciler { if running_pods.is_empty() { info!("No running pods found to tail logs from"); } else { + // Convert pods to container info and start tailers + let containers: Vec = running_pods + .iter() + .flat_map(|pod_info| { + pod_info + .containers + .iter() + .map(|container_name| ContainerInfo { + pod_name: pod_info.name.clone(), + namespace: pod_info.namespace.clone(), + container_name: container_name.clone(), + pod_uid: pod_info.uid.clone(), + }) + }) + .collect(); + info!( - "Starting log tailing for {} running pods", + "Starting log tailing for {} containers across {} running pods", + containers.len(), running_pods.len() ); - for pod_info in running_pods { + + for container_info in containers { info!( - "Starting tailer for pod '{}' in namespace '{}'", - pod_info.name, pod_info.namespace + "Starting tailer for container '{}' in pod '{}' (namespace '{}')", + container_info.container_name, + container_info.pod_name, + container_info.namespace ); - let _status = self.tailer.start(&pod_info); + let _status = self.container_tailer.start(&container_info); // TODO: Store tailer status in a thread-safe way } } @@ -88,7 +125,7 @@ impl Reconciler { } } -struct LogTailer { +struct ContainerLogTailer { client: Client, } @@ -97,88 +134,84 @@ enum TailStatus { // Stopped, } -impl LogTailer { +impl ContainerLogTailer { pub fn new(client: Client) -> Self { Self { client } } - pub fn start(&self, pod_info: &PodInfo) -> TailStatus { - let pod_info = pod_info.clone(); + pub fn start(&self, container_info: &ContainerInfo) -> TailStatus { + let container_info = container_info.clone(); let client = self.client.clone(); tokio::spawn(async move { - let tailer = LogTailer { client }; - if let Err(e) = tailer.tail_log(&pod_info).await { - error!("Error tailing logs for pod '{}': {}", pod_info.name, e); + let tailer = ContainerLogTailer { client }; + if let Err(e) = tailer.tail_container_logs(&container_info).await { + error!( + "Error tailing logs for container '{}' in pod '{}': {}", + container_info.container_name, container_info.pod_name, e + ); } }); TailStatus::Running } - pub async fn tail_log( + pub async fn tail_container_logs( &self, - pod_info: &PodInfo, + container_info: &ContainerInfo, ) -> Result<(), Box> { - let pods: Api = Api::namespaced(self.client.clone(), &pod_info.namespace); + let pods: Api = Api::namespaced(self.client.clone(), &container_info.namespace); info!( - "Starting log tailing for pod '{}' in namespace '{}' with {} containers", - pod_info.name, - pod_info.namespace, - pod_info.containers.len() + "Starting log tailing for container '{}' in pod '{}' (namespace '{}')", + container_info.container_name, container_info.pod_name, container_info.namespace ); - // For each container in the pod, fetch its logs - for container in &pod_info.containers { - info!( - "Fetching logs for container '{}' in pod '{}'", - container, pod_info.name - ); + let log_params = LogParams { + container: Some(container_info.container_name.clone()), + follow: false, // For now, just get recent logs, not streaming + tail_lines: Some(10), // Get last 10 lines + timestamps: true, + ..Default::default() + }; + + match pods.logs(&container_info.pod_name, &log_params).await { + Ok(logs) => { + // Process the logs - for now just print them + // In a full implementation, these would be sent to the Vector event pipeline + if !logs.is_empty() { + info!( + "=== Logs from container '{}' in pod '{}' ===", + container_info.container_name, container_info.pod_name + ); + let line_count = logs.lines().count(); + info!("Retrieved {} lines of logs", line_count); - let log_params = LogParams { - container: Some(container.clone()), - follow: false, // For now, just get recent logs, not streaming - tail_lines: Some(10), // Get last 10 lines - timestamps: true, - ..Default::default() - }; - - match pods.logs(&pod_info.name, &log_params).await { - Ok(logs) => { - // Process the logs - for now just print them - // In a full implementation, these would be sent to the Vector event pipeline - if !logs.is_empty() { - info!( - "=== Logs from pod '{}', container '{}' ===", - pod_info.name, container - ); - let line_count = logs.lines().count(); - info!("Retrieved {} lines of logs", line_count); - - for (idx, line) in logs.lines().take(5).enumerate() { - // Limit output for demo - info!("LOG[{}]: {}", idx + 1, line); - } - if line_count > 5 { - info!("... ({} more lines)", line_count - 5); - } - info!("=== End of logs ==="); - } else { - info!( - "No logs available for pod '{}', container '{}'", - pod_info.name, container - ); + for (idx, line) in logs.lines().take(5).enumerate() { + // Limit output for demo + info!("LOG[{}]: {}", idx + 1, line); } - } - Err(e) => { - warn!( - "Failed to fetch logs for pod '{}', container '{}': {}", - pod_info.name, container, e + if line_count > 5 { + info!("... ({} more lines)", line_count - 5); + } + info!("=== End of logs ==="); + } else { + info!( + "No logs available for container '{}' in pod '{}'", + container_info.container_name, container_info.pod_name ); } } + Err(e) => { + warn!( + "Failed to fetch logs for container '{}' in pod '{}': {}", + container_info.container_name, container_info.pod_name, e + ); + } } - info!("Completed log tailing for pod '{}'", pod_info.name); + info!( + "Completed log tailing for container '{}' in pod '{}'", + container_info.container_name, container_info.pod_name + ); Ok(()) } } From bdc90e41d7b841065c66842f455ab2aa66bd1db8 Mon Sep 17 00:00:00 2001 From: titaneric Date: Sat, 11 Oct 2025 16:13:59 +0800 Subject: [PATCH 08/32] send the logs into channel --- src/sources/kubernetes_logs/mod.rs | 33 +++++++++++---- src/sources/kubernetes_logs/reconciler.rs | 50 +++++++++++++++-------- 2 files changed, 58 insertions(+), 25 deletions(-) diff --git a/src/sources/kubernetes_logs/mod.rs b/src/sources/kubernetes_logs/mod.rs index d30d30b41b73d..e2343279d6437 100644 --- a/src/sources/kubernetes_logs/mod.rs +++ b/src/sources/kubernetes_logs/mod.rs @@ -965,19 +965,38 @@ impl Source { // Only run reconciler when api_log is enabled let reconciler_fut = if let Some(pod_init_rx) = pod_init_rx { - let reconciler = reconciler::Reconciler::new(pod_state, client.clone()); + let (api_logs_tx, mut api_logs_rx) = futures::channel::mpsc::unbounded::(); + let reconciler = reconciler::Reconciler::new(pod_state, client.clone(), api_logs_tx); Some(async move { // Wait for pod store to be initialized match pod_init_rx.await { Ok(_) => { info!("Pod store initialized, starting reconciler"); + + // Start the reconciler match reconciler.handle_running_pods().await { - Ok(reconciler) => reconciler.run().await, - Err(error) => emit!(KubernetesLifecycleError { - error, - message: "Reconciler exited with an error.", - count: events_count, - }), + Ok(reconciler) => { + // Spawn reconciler run task + tokio::spawn(async move { + reconciler.run().await; + }); + + // Process incoming logs from the channel + while let Some(log_line) = api_logs_rx.next().await { + info!("API Log: {}", log_line); + // TODO: Convert log_line to Vector event and send to output + // This is where you would parse the log and send it through the Vector pipeline + } + + info!("Reconciler log processing completed"); + } + Err(error) => { + emit!(KubernetesLifecycleError { + error, + message: "Reconciler exited with an error.", + count: events_count, + }); + } } } Err(_) => { diff --git a/src/sources/kubernetes_logs/reconciler.rs b/src/sources/kubernetes_logs/reconciler.rs index d6c198bca4128..2f2b5aaa702a6 100644 --- a/src/sources/kubernetes_logs/reconciler.rs +++ b/src/sources/kubernetes_logs/reconciler.rs @@ -1,7 +1,10 @@ use super::pod_info::PodInfo; +use futures::SinkExt; +use futures::channel::mpsc; use k8s_openapi::api::core::v1::Pod; use kube::runtime::reflector::Store; use kube::{Api, Client, api::LogParams}; +use std::collections::HashMap; use tracing::{error, info, warn}; /// Container information for log tailing @@ -21,14 +24,20 @@ pub struct ContainerInfo { pub struct Reconciler { pod_state: Store, container_tailer: ContainerLogTailer, + tailer_state: HashMap, // Keyed by "namespace/pod/container" } impl Reconciler { - pub fn new(pod_state: Store, client: Client) -> Self { - let container_tailer = ContainerLogTailer::new(client.clone()); + pub fn new( + pod_state: Store, + client: Client, + log_sender: mpsc::UnboundedSender, + ) -> Self { + let container_tailer = ContainerLogTailer::new(client.clone(), log_sender); Self { pod_state, container_tailer, + tailer_state: HashMap::new(), } } @@ -44,7 +53,7 @@ impl Reconciler { } } - pub async fn handle_running_pods(self) -> crate::Result { + pub async fn handle_running_pods(mut self) -> crate::Result { info!("Performing reconciliation of pod states"); let pods: Vec<_> = self @@ -116,8 +125,16 @@ impl Reconciler { container_info.pod_name, container_info.namespace ); - let _status = self.container_tailer.start(&container_info); - // TODO: Store tailer status in a thread-safe way + let status = self.container_tailer.start(&container_info); + self.tailer_state.insert( + format!( + "{}/{}/{}", + container_info.namespace, + container_info.pod_name, + container_info.container_name + ), + status, + ); } } @@ -125,25 +142,29 @@ impl Reconciler { } } +#[derive(Clone)] struct ContainerLogTailer { client: Client, + log_sender: mpsc::UnboundedSender, } +#[derive(Clone)] enum TailStatus { Running, // Stopped, } impl ContainerLogTailer { - pub fn new(client: Client) -> Self { - Self { client } + pub fn new(client: Client, log_sender: mpsc::UnboundedSender) -> Self { + Self { client, log_sender } } pub fn start(&self, container_info: &ContainerInfo) -> TailStatus { let container_info = container_info.clone(); let client = self.client.clone(); + let log_sender = self.log_sender.clone(); tokio::spawn(async move { - let tailer = ContainerLogTailer { client }; + let mut tailer = ContainerLogTailer { client, log_sender }; if let Err(e) = tailer.tail_container_logs(&container_info).await { error!( "Error tailing logs for container '{}' in pod '{}': {}", @@ -155,7 +176,7 @@ impl ContainerLogTailer { } pub async fn tail_container_logs( - &self, + &mut self, container_info: &ContainerInfo, ) -> Result<(), Box> { let pods: Api = Api::namespaced(self.client.clone(), &container_info.namespace); @@ -182,17 +203,10 @@ impl ContainerLogTailer { "=== Logs from container '{}' in pod '{}' ===", container_info.container_name, container_info.pod_name ); - let line_count = logs.lines().count(); - info!("Retrieved {} lines of logs", line_count); - for (idx, line) in logs.lines().take(5).enumerate() { - // Limit output for demo - info!("LOG[{}]: {}", idx + 1, line); - } - if line_count > 5 { - info!("... ({} more lines)", line_count - 5); + for (_, line) in logs.lines().take(5).enumerate() { + let _ = self.log_sender.send(String::from(line)).await; } - info!("=== End of logs ==="); } else { info!( "No logs available for container '{}' in pod '{}'", From e42a5e43e3bd699d879005e39efc261d1a739e66 Mon Sep 17 00:00:00 2001 From: titaneric Date: Sat, 11 Oct 2025 18:22:19 +0800 Subject: [PATCH 09/32] stream log and record its timestamp --- src/sources/kubernetes_logs/reconciler.rs | 212 +++++++++++++++++----- 1 file changed, 164 insertions(+), 48 deletions(-) diff --git a/src/sources/kubernetes_logs/reconciler.rs b/src/sources/kubernetes_logs/reconciler.rs index 2f2b5aaa702a6..e85daea88ac40 100644 --- a/src/sources/kubernetes_logs/reconciler.rs +++ b/src/sources/kubernetes_logs/reconciler.rs @@ -1,11 +1,13 @@ use super::pod_info::PodInfo; +use chrono::{DateTime, FixedOffset, Utc}; use futures::SinkExt; use futures::channel::mpsc; +use futures::{AsyncBufReadExt, TryStreamExt}; use k8s_openapi::api::core::v1::Pod; use kube::runtime::reflector::Store; use kube::{Api, Client, api::LogParams}; use std::collections::HashMap; -use tracing::{error, info, warn}; +use tracing::{error, info, trace, warn}; /// Container information for log tailing #[derive(Clone, Debug)] @@ -21,10 +23,81 @@ pub struct ContainerInfo { pub pod_uid: String, } +/// Container log information with timestamp tracking +/// Similar to docker_logs ContainerLogInfo for position tracking +#[derive(Clone, Debug)] +struct ContainerLogInfo { + /// Container information + container_info: ContainerInfo, + /// Timestamp of when this tracking started + created: DateTime, + /// Timestamp of last log message processed + last_log: Option>, +} + +impl ContainerLogInfo { + fn new(container_info: ContainerInfo, created: DateTime) -> Self { + Self { + container_info, + created, + last_log: None, + } + } + + /// Get the timestamp from which logs should be fetched + /// Only logs after this point need to be fetched + fn log_since(&self) -> DateTime { + self.last_log + .map(|dt| dt.with_timezone(&Utc)) + .unwrap_or(self.created) + } + + /// Update the last log timestamp when processing a log line + /// Returns true if the timestamp was successfully parsed and updated + fn update_last_log_timestamp(&mut self, log_line: &str) -> bool { + // Kubernetes log format typically starts with RFC3339 timestamp + // e.g., "2023-10-11T10:30:00.123456789Z message content" + if let Some(timestamp_end) = log_line.find(' ') { + let timestamp_str = &log_line[..timestamp_end]; + if let Ok(timestamp) = DateTime::parse_from_rfc3339(timestamp_str) { + // Only update if this timestamp is newer than our last recorded timestamp + if let Some(last) = self.last_log { + if timestamp > last { + self.last_log = Some(timestamp); + return true; + } + } else { + // First timestamp we've seen + self.last_log = Some(timestamp); + return true; + } + } else { + // Try to parse ISO 8601 format without timezone (common in k8s logs) + if let Ok(naive_dt) = + chrono::NaiveDateTime::parse_from_str(timestamp_str, "%Y-%m-%dT%H:%M:%S%.f") + { + let timestamp = + DateTime::::from_naive_utc_and_offset(naive_dt, Utc).fixed_offset(); + if let Some(last) = self.last_log { + if timestamp > last { + self.last_log = Some(timestamp); + return true; + } + } else { + self.last_log = Some(timestamp); + return true; + } + } + } + } + false + } +} + pub struct Reconciler { pod_state: Store, container_tailer: ContainerLogTailer, - tailer_state: HashMap, // Keyed by "namespace/pod/container" + tailer_state: HashMap, // Keyed by "namespace/pod/container" } impl Reconciler { @@ -125,16 +198,25 @@ impl Reconciler { container_info.pod_name, container_info.namespace ); - let status = self.container_tailer.start(&container_info); - self.tailer_state.insert( - format!( - "{}/{}/{}", - container_info.namespace, - container_info.pod_name, - container_info.container_name - ), - status, + + let key = format!( + "{}/{}/{}", + container_info.namespace, + container_info.pod_name, + container_info.container_name ); + + // Check if we already have tracking info for this container + let log_info = if let Some(existing_info) = self.tailer_state.get(&key) { + // Reuse existing timestamp tracking + existing_info.clone() + } else { + // Create new tracking info starting from now + ContainerLogInfo::new(container_info.clone(), Utc::now()) + }; + + self.container_tailer.start(&log_info); + self.tailer_state.insert(key, log_info); } } @@ -148,83 +230,117 @@ struct ContainerLogTailer { log_sender: mpsc::UnboundedSender, } -#[derive(Clone)] -enum TailStatus { - Running, - // Stopped, -} +// #[derive(Clone)] +// enum TailStatus { +// Running, +// // Stopped, +// } impl ContainerLogTailer { pub fn new(client: Client, log_sender: mpsc::UnboundedSender) -> Self { Self { client, log_sender } } - pub fn start(&self, container_info: &ContainerInfo) -> TailStatus { - let container_info = container_info.clone(); + pub fn start(&self, log_info: &ContainerLogInfo) { + let mut log_info = log_info.clone(); let client = self.client.clone(); let log_sender = self.log_sender.clone(); tokio::spawn(async move { let mut tailer = ContainerLogTailer { client, log_sender }; - if let Err(e) = tailer.tail_container_logs(&container_info).await { + if let Err(e) = tailer.tail_container_logs(&mut log_info).await { error!( "Error tailing logs for container '{}' in pod '{}': {}", - container_info.container_name, container_info.pod_name, e + log_info.container_info.container_name, log_info.container_info.pod_name, e ); } }); - TailStatus::Running } pub async fn tail_container_logs( &mut self, - container_info: &ContainerInfo, + log_info: &mut ContainerLogInfo, ) -> Result<(), Box> { - let pods: Api = Api::namespaced(self.client.clone(), &container_info.namespace); + let pods: Api = + Api::namespaced(self.client.clone(), &log_info.container_info.namespace); info!( - "Starting log tailing for container '{}' in pod '{}' (namespace '{}')", - container_info.container_name, container_info.pod_name, container_info.namespace + "Starting streaming log tail for container '{}' in pod '{}' (namespace '{}') from timestamp {}", + log_info.container_info.container_name, + log_info.container_info.pod_name, + log_info.container_info.namespace, + log_info.log_since() ); let log_params = LogParams { - container: Some(container_info.container_name.clone()), - follow: false, // For now, just get recent logs, not streaming - tail_lines: Some(10), // Get last 10 lines + container: Some(log_info.container_info.container_name.clone()), + follow: true, + since_time: Some(log_info.log_since()), timestamps: true, ..Default::default() }; - match pods.logs(&container_info.pod_name, &log_params).await { - Ok(logs) => { - // Process the logs - for now just print them - // In a full implementation, these would be sent to the Vector event pipeline - if !logs.is_empty() { - info!( - "=== Logs from container '{}' in pod '{}' ===", - container_info.container_name, container_info.pod_name - ); + // Use log_stream for continuous streaming instead of one-shot logs + match pods + .log_stream(&log_info.container_info.pod_name, &log_params) + .await + { + Ok(log_stream) => { + info!( + "Started streaming logs from container '{}' in pod '{}'", + log_info.container_info.container_name, log_info.container_info.pod_name + ); - for (_, line) in logs.lines().take(5).enumerate() { - let _ = self.log_sender.send(String::from(line)).await; + let mut lines = log_stream.lines(); + let mut log_count = 0; + + // Process the stream of log lines continuously + while let Some(line_result) = lines.try_next().await? { + // Update timestamp tracking before sending + let timestamp_updated = log_info.update_last_log_timestamp(&line_result); + if timestamp_updated { + trace!( + "Updated last log timestamp for container '{}' in pod '{}' to: {:?}", + log_info.container_info.container_name, + log_info.container_info.pod_name, + log_info.last_log + ); + } + + // Send the log line to the channel + if let Err(_) = self.log_sender.send(line_result).await { + warn!( + "Log channel closed for container '{}' in pod '{}', stopping stream", + log_info.container_info.container_name, + log_info.container_info.pod_name + ); + break; + } + + log_count += 1; + + // Log progress periodically + if log_count % 100 == 0 { + trace!( + "Processed {} log lines from container '{}' in pod '{}'. Last timestamp: {:?}", + log_count, + log_info.container_info.container_name, + log_info.container_info.pod_name, + log_info.last_log + ); } - } else { - info!( - "No logs available for container '{}' in pod '{}'", - container_info.container_name, container_info.pod_name - ); } } Err(e) => { warn!( - "Failed to fetch logs for container '{}' in pod '{}': {}", - container_info.container_name, container_info.pod_name, e + "Failed to start log stream for container '{}' in pod '{}': {}", + log_info.container_info.container_name, log_info.container_info.pod_name, e ); } } info!( - "Completed log tailing for container '{}' in pod '{}'", - container_info.container_name, container_info.pod_name + "Completed streaming log tail for container '{}' in pod '{}'", + log_info.container_info.container_name, log_info.container_info.pod_name ); Ok(()) } From bceca79f2381293e5b0d26650d6a87984a17f2e3 Mon Sep 17 00:00:00 2001 From: titaneric Date: Sat, 11 Oct 2025 21:49:05 +0800 Subject: [PATCH 10/32] rename symbol --- src/sources/kubernetes_logs/reconciler.rs | 94 +++++++++-------------- 1 file changed, 35 insertions(+), 59 deletions(-) diff --git a/src/sources/kubernetes_logs/reconciler.rs b/src/sources/kubernetes_logs/reconciler.rs index e85daea88ac40..34adb19336ccf 100644 --- a/src/sources/kubernetes_logs/reconciler.rs +++ b/src/sources/kubernetes_logs/reconciler.rs @@ -7,7 +7,7 @@ use k8s_openapi::api::core::v1::Pod; use kube::runtime::reflector::Store; use kube::{Api, Client, api::LogParams}; use std::collections::HashMap; -use tracing::{error, info, trace, warn}; +use tracing::{info, trace, warn}; /// Container information for log tailing #[derive(Clone, Debug)] @@ -96,8 +96,8 @@ impl ContainerLogInfo { pub struct Reconciler { pod_state: Store, - container_tailer: ContainerLogTailer, - tailer_state: HashMap, // Keyed by "namespace/pod/container" + esb: EventStreamBuilder, + states: HashMap, // Keyed by "namespace/pod/container" } impl Reconciler { @@ -106,11 +106,14 @@ impl Reconciler { client: Client, log_sender: mpsc::UnboundedSender, ) -> Self { - let container_tailer = ContainerLogTailer::new(client.clone(), log_sender); + let esb = EventStreamBuilder { + client: client.clone(), + log_sender, + }; Self { pod_state, - container_tailer, - tailer_state: HashMap::new(), + esb, + states: HashMap::new(), } } @@ -206,17 +209,17 @@ impl Reconciler { container_info.container_name ); - // Check if we already have tracking info for this container - let log_info = if let Some(existing_info) = self.tailer_state.get(&key) { - // Reuse existing timestamp tracking - existing_info.clone() - } else { - // Create new tracking info starting from now - ContainerLogInfo::new(container_info.clone(), Utc::now()) - }; - - self.container_tailer.start(&log_info); - self.tailer_state.insert(key, log_info); + // // Check if we already have tracking info for this container + // let log_info = if let Some(existing_info) = self.states.get(&key) { + // // Reuse existing timestamp tracking + // existing_info.clone() + // } else { + // // Create new tracking info starting from now + // ContainerLogInfo::new(container_info.clone(), Utc::now()) + // }; + + self.states + .insert(key, self.esb.start(container_info.clone())); } } @@ -225,41 +228,29 @@ impl Reconciler { } #[derive(Clone)] -struct ContainerLogTailer { +struct EventStreamBuilder { client: Client, log_sender: mpsc::UnboundedSender, } -// #[derive(Clone)] -// enum TailStatus { -// Running, -// // Stopped, -// } - -impl ContainerLogTailer { - pub fn new(client: Client, log_sender: mpsc::UnboundedSender) -> Self { - Self { client, log_sender } - } +#[derive(Clone)] +enum TailerState { + Running, + // Stopped, +} - pub fn start(&self, log_info: &ContainerLogInfo) { - let mut log_info = log_info.clone(); - let client = self.client.clone(); - let log_sender = self.log_sender.clone(); +impl EventStreamBuilder { + pub fn start(&self, container_info: ContainerInfo) -> TailerState { + let this = self.clone(); tokio::spawn(async move { - let mut tailer = ContainerLogTailer { client, log_sender }; - if let Err(e) = tailer.tail_container_logs(&mut log_info).await { - error!( - "Error tailing logs for container '{}' in pod '{}': {}", - log_info.container_info.container_name, log_info.container_info.pod_name, e - ); - } + let log_info = ContainerLogInfo::new(container_info, Utc::now()); + this.run_event_stream(log_info).await; + return; }); + TailerState::Running } - pub async fn tail_container_logs( - &mut self, - log_info: &mut ContainerLogInfo, - ) -> Result<(), Box> { + pub async fn run_event_stream(mut self, mut log_info: ContainerLogInfo) { let pods: Api = Api::namespaced(self.client.clone(), &log_info.container_info.namespace); @@ -291,10 +282,9 @@ impl ContainerLogTailer { ); let mut lines = log_stream.lines(); - let mut log_count = 0; // Process the stream of log lines continuously - while let Some(line_result) = lines.try_next().await? { + while let Ok(Some(line_result)) = lines.try_next().await { // Update timestamp tracking before sending let timestamp_updated = log_info.update_last_log_timestamp(&line_result); if timestamp_updated { @@ -315,19 +305,6 @@ impl ContainerLogTailer { ); break; } - - log_count += 1; - - // Log progress periodically - if log_count % 100 == 0 { - trace!( - "Processed {} log lines from container '{}' in pod '{}'. Last timestamp: {:?}", - log_count, - log_info.container_info.container_name, - log_info.container_info.pod_name, - log_info.last_log - ); - } } } Err(e) => { @@ -342,6 +319,5 @@ impl ContainerLogTailer { "Completed streaming log tail for container '{}' in pod '{}'", log_info.container_info.container_name, log_info.container_info.pod_name ); - Ok(()) } } From ea5f81e475a5fa8667ff244ae97925c301a25066 Mon Sep 17 00:00:00 2001 From: titaneric Date: Wed, 15 Oct 2025 21:45:48 +0800 Subject: [PATCH 11/32] simplify key generation used in tracking container logs status --- src/sources/kubernetes_logs/reconciler.rs | 39 +++++++++++++++++++---- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/src/sources/kubernetes_logs/reconciler.rs b/src/sources/kubernetes_logs/reconciler.rs index 34adb19336ccf..f99878af0cf01 100644 --- a/src/sources/kubernetes_logs/reconciler.rs +++ b/src/sources/kubernetes_logs/reconciler.rs @@ -7,8 +7,38 @@ use k8s_openapi::api::core::v1::Pod; use kube::runtime::reflector::Store; use kube::{Api, Client, api::LogParams}; use std::collections::HashMap; +use std::fmt; use tracing::{info, trace, warn}; +/// Container key for identifying unique container instances +/// Format: "{namespace}/{pod_name}/{container_name}" +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct ContainerKey(String); + +impl fmt::Display for ContainerKey { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl From<&ContainerInfo> for ContainerKey { + fn from(container_info: &ContainerInfo) -> Self { + ContainerKey(format!( + "{}/{}/{}", + container_info.namespace, container_info.pod_name, container_info.container_name + )) + } +} + +impl From<(&PodInfo, &str)> for ContainerKey { + fn from((pod_info, container_name): (&PodInfo, &str)) -> Self { + ContainerKey(format!( + "{}/{}/{}", + pod_info.namespace, pod_info.name, container_name + )) + } +} + /// Container information for log tailing #[derive(Clone, Debug)] pub struct ContainerInfo { @@ -97,7 +127,7 @@ impl ContainerLogInfo { pub struct Reconciler { pod_state: Store, esb: EventStreamBuilder, - states: HashMap, // Keyed by "namespace/pod/container" + states: HashMap, // Keyed by ContainerKey } impl Reconciler { @@ -202,12 +232,7 @@ impl Reconciler { container_info.namespace ); - let key = format!( - "{}/{}/{}", - container_info.namespace, - container_info.pod_name, - container_info.container_name - ); + let key = ContainerKey::from(&container_info); // // Check if we already have tracking info for this container // let log_info = if let Some(existing_info) = self.states.get(&key) { From 96b71119b10e4a78b00d7e4b61f9e6c4a44ff4e2 Mon Sep 17 00:00:00 2001 From: titaneric Date: Sun, 12 Oct 2025 00:57:38 +0800 Subject: [PATCH 12/32] create new pod watcher for reconcile the logs --- src/sources/kubernetes_logs/mod.rs | 58 ++++-- src/sources/kubernetes_logs/reconciler.rs | 215 +++++++++++++--------- 2 files changed, 172 insertions(+), 101 deletions(-) diff --git a/src/sources/kubernetes_logs/mod.rs b/src/sources/kubernetes_logs/mod.rs index e2343279d6437..27b81b681cbea 100644 --- a/src/sources/kubernetes_logs/mod.rs +++ b/src/sources/kubernetes_logs/mod.rs @@ -745,6 +745,21 @@ impl Source { ) .backoff(watcher::DefaultBackoff::default()); + // Create a separate watcher for the reconciler if api_log is enabled + let reconciler_pod_watcher = if api_log { + let reconciler_pods = Api::::all(client.clone()); + let reconciler_watcher = watcher( + reconciler_pods, + watcher::Config { + ..Default::default() + }, + ) + .backoff(watcher::DefaultBackoff::default()); + Some(reconciler_watcher) + } else { + None + }; + let pod_store_w = reflector::store::Writer::default(); let pod_state = pod_store_w.as_reader(); let pod_cacher = MetaCache::new(); @@ -964,36 +979,49 @@ impl Source { let event_processing_loop = out.send_event_stream(&mut stream); // Only run reconciler when api_log is enabled - let reconciler_fut = if let Some(pod_init_rx) = pod_init_rx { + let reconciler_fut = if let (Some(pod_init_rx), Some(reconciler_watcher)) = + (pod_init_rx, reconciler_pod_watcher) + { let (api_logs_tx, mut api_logs_rx) = futures::channel::mpsc::unbounded::(); - let reconciler = reconciler::Reconciler::new(pod_state, client.clone(), api_logs_tx); + let reconciler = reconciler::Reconciler::new( + pod_state.clone(), + client.clone(), + api_logs_tx, + reconciler_watcher, + ); Some(async move { // Wait for pod store to be initialized match pod_init_rx.await { Ok(_) => { info!("Pod store initialized, starting reconciler"); - // Start the reconciler - match reconciler.handle_running_pods().await { - Ok(reconciler) => { - // Spawn reconciler run task - tokio::spawn(async move { + // Start the reconciler with initial full reconciliation + let mut reconciler = reconciler; + match reconciler.perform_full_reconciliation().await { + Ok(_) => { + info!("Initial reconciliation completed, starting event-driven reconciler"); + // Spawn reconciler run task and log processing in parallel + let reconciler_task = tokio::spawn(async move { reconciler.run().await; }); - // Process incoming logs from the channel - while let Some(log_line) = api_logs_rx.next().await { - info!("API Log: {}", log_line); - // TODO: Convert log_line to Vector event and send to output - // This is where you would parse the log and send it through the Vector pipeline - } + let log_processing_task = tokio::spawn(async move { + // Process incoming logs from the channel + while let Some(log_line) = api_logs_rx.next().await { + info!("API Log: {}", log_line); + // TODO: Convert log_line to Vector event and send to output + // This is where you would parse the log and send it through the Vector pipeline + } + info!("Reconciler log processing completed"); + }); - info!("Reconciler log processing completed"); + // Wait for both tasks to complete + let _ = tokio::try_join!(reconciler_task, log_processing_task); } Err(error) => { emit!(KubernetesLifecycleError { error, - message: "Reconciler exited with an error.", + message: "Initial reconciliation failed.", count: events_count, }); } diff --git a/src/sources/kubernetes_logs/reconciler.rs b/src/sources/kubernetes_logs/reconciler.rs index f99878af0cf01..251f975a576d9 100644 --- a/src/sources/kubernetes_logs/reconciler.rs +++ b/src/sources/kubernetes_logs/reconciler.rs @@ -2,12 +2,15 @@ use super::pod_info::PodInfo; use chrono::{DateTime, FixedOffset, Utc}; use futures::SinkExt; use futures::channel::mpsc; -use futures::{AsyncBufReadExt, TryStreamExt}; +use futures::{AsyncBufReadExt, StreamExt, TryStreamExt}; +use futures_util::Stream; use k8s_openapi::api::core::v1::Pod; use kube::runtime::reflector::Store; +use kube::runtime::watcher; use kube::{Api, Client, api::LogParams}; use std::collections::HashMap; use std::fmt; +use std::pin::Pin; use tracing::{info, trace, warn}; /// Container key for identifying unique container instances @@ -128,14 +131,19 @@ pub struct Reconciler { pod_state: Store, esb: EventStreamBuilder, states: HashMap, // Keyed by ContainerKey + pod_watcher: Pin>> + Send>>, } impl Reconciler { - pub fn new( + pub fn new( pod_state: Store, client: Client, log_sender: mpsc::UnboundedSender, - ) -> Self { + pod_watcher: S, + ) -> Self + where + S: Stream>> + Send + 'static, + { let esb = EventStreamBuilder { client: client.clone(), log_sender, @@ -144,23 +152,117 @@ impl Reconciler { pod_state, esb, states: HashMap::new(), + pod_watcher: Box::pin(pod_watcher), } } - pub async fn run(&self) { - // TODO: replace timer with watcher for pod state changes and reconcile accordingly - let mut timer = tokio::time::interval(tokio::time::Duration::from_secs(10)); - loop { - tokio::select! { - _ = timer.tick() => { - // self.perform_reconciliation().await; + pub async fn run(mut self) { + info!("Starting reconciler with pod watcher integration"); + + // Listen to pod watcher events for real-time reconciliation + while let Some(event) = self.pod_watcher.next().await { + match event { + Ok(watcher::Event::Apply(pod)) => { + let pod_info = PodInfo::from(&pod); + if let Some(phase) = &pod_info.phase { + if phase == "Running" { + info!( + "Pod '{}' is now running, starting log reconciliation", + pod_info.name + ); + if let Err(e) = self.reconcile_pod_containers(&pod_info).await { + warn!("Failed to reconcile pod '{}': {}", pod_info.name, e); + } + } + } } + Ok(watcher::Event::Delete(pod)) => { + let pod_info = PodInfo::from(&pod); + info!("Pod '{}' deleted, cleaning up log tailers", pod_info.name); + self.cleanup_pod_tailers(&pod_info).await; + } + Ok(watcher::Event::Init) => { + info!("Pod watcher initialized, performing full reconciliation"); + if let Err(e) = self.perform_full_reconciliation().await { + warn!("Failed to perform full reconciliation: {}", e); + } + } + Ok(watcher::Event::InitApply(pod)) => { + let pod_info = PodInfo::from(&pod); + if let Some(phase) = &pod_info.phase { + if phase == "Running" { + info!( + "Pod '{}' is running during init, starting log reconciliation", + pod_info.name + ); + if let Err(e) = self.reconcile_pod_containers(&pod_info).await { + warn!( + "Failed to reconcile pod '{}' during init: {}", + pod_info.name, e + ); + } + } + } + } + Ok(watcher::Event::InitDone) => { + info!("Pod watcher init complete, performing final reconciliation"); + if let Err(e) = self.perform_full_reconciliation().await { + warn!("Failed to perform final reconciliation: {}", e); + } + } + Err(e) => { + warn!("Pod watcher error: {}", e); + } + } + } + + info!("Reconciler pod watcher stream ended"); + } + + /// Reconcile containers for a specific pod + async fn reconcile_pod_containers(&mut self, pod_info: &PodInfo) -> crate::Result<()> { + for container_name in &pod_info.containers { + let container_info = ContainerInfo { + pod_name: pod_info.name.clone(), + namespace: pod_info.namespace.clone(), + container_name: container_name.clone(), + pod_uid: pod_info.uid.clone(), + }; + + let key = ContainerKey::from(&container_info); + + // Only start tailer if not already running + if !self.states.contains_key(&key) { + info!( + "Starting tailer for container '{}' in pod '{}' (namespace '{}')", + container_info.container_name, + container_info.pod_name, + container_info.namespace + ); + + self.states.insert(key, self.esb.start(container_info)); } } + Ok(()) } - pub async fn handle_running_pods(mut self) -> crate::Result { - info!("Performing reconciliation of pod states"); + /// Clean up tailers for a deleted pod + async fn cleanup_pod_tailers(&mut self, pod_info: &PodInfo) { + for container_name in &pod_info.containers { + let key = ContainerKey::from((pod_info, container_name.as_str())); + + if self.states.remove(&key).is_some() { + info!( + "Cleaned up tailer for container '{}' in deleted pod '{}'", + container_name, pod_info.name + ); + } + } + } + + /// Perform full reconciliation of all running pods + pub async fn perform_full_reconciliation(&mut self) -> crate::Result<()> { + info!("Performing full reconciliation of pod states"); let pods: Vec<_> = self .pod_state @@ -170,85 +272,27 @@ impl Reconciler { .collect(); if pods.is_empty() { - warn!("No pods found in pod store. The store might not be populated yet."); - return Ok(self); + warn!("No pods found in pod store during full reconciliation"); + return Ok(()); } - info!("Found {} pods in store", pods.len()); - - // Filter for running pods and start tailing their logs - let running_pods: Vec<_> = pods - .into_iter() - .filter(|pod_info| match &pod_info.phase { - Some(phase) if phase == "Running" => { - info!( - "Pod '{}' is running with {} containers", - pod_info.name, - pod_info.containers.len() - ); - true - } - Some(phase) => { - info!("Skipping pod '{}' in phase '{}'", pod_info.name, phase); - false - } - None => { - info!("Skipping pod '{}' with unknown phase", pod_info.name); - false - } - }) - .collect(); - - if running_pods.is_empty() { - info!("No running pods found to tail logs from"); - } else { - // Convert pods to container info and start tailers - let containers: Vec = running_pods - .iter() - .flat_map(|pod_info| { - pod_info - .containers - .iter() - .map(|container_name| ContainerInfo { - pod_name: pod_info.name.clone(), - namespace: pod_info.namespace.clone(), - container_name: container_name.clone(), - pod_uid: pod_info.uid.clone(), - }) - }) - .collect(); - - info!( - "Starting log tailing for {} containers across {} running pods", - containers.len(), - running_pods.len() - ); - - for container_info in containers { - info!( - "Starting tailer for container '{}' in pod '{}' (namespace '{}')", - container_info.container_name, - container_info.pod_name, - container_info.namespace - ); + info!("Found {} pods in store for full reconciliation", pods.len()); - let key = ContainerKey::from(&container_info); - - // // Check if we already have tracking info for this container - // let log_info = if let Some(existing_info) = self.states.get(&key) { - // // Reuse existing timestamp tracking - // existing_info.clone() - // } else { - // // Create new tracking info starting from now - // ContainerLogInfo::new(container_info.clone(), Utc::now()) - // }; - - self.states - .insert(key, self.esb.start(container_info.clone())); + // Filter for running pods and reconcile their containers + for pod_info in pods { + if let Some(phase) = &pod_info.phase { + if phase == "Running" { + if let Err(e) = self.reconcile_pod_containers(&pod_info).await { + warn!( + "Failed to reconcile pod '{}' during full reconciliation: {}", + pod_info.name, e + ); + } + } } } - Ok(self) + Ok(()) } } @@ -295,7 +339,6 @@ impl EventStreamBuilder { ..Default::default() }; - // Use log_stream for continuous streaming instead of one-shot logs match pods .log_stream(&log_info.container_info.pod_name, &log_params) .await From b84761852351be6ebd3ad9490a8c4c696816ef6a Mon Sep 17 00:00:00 2001 From: titaneric Date: Sun, 12 Oct 2025 01:11:16 +0800 Subject: [PATCH 13/32] remove earlier introduced `init_notify` used to block the reconciler --- src/kubernetes/reflector.rs | 13 +--- src/sources/kubernetes_logs/mod.rs | 85 ++++++----------------- src/sources/kubernetes_logs/reconciler.rs | 57 ++------------- 3 files changed, 28 insertions(+), 127 deletions(-) diff --git a/src/kubernetes/reflector.rs b/src/kubernetes/reflector.rs index 80476a56f865d..0ddaa5242f6ba 100644 --- a/src/kubernetes/reflector.rs +++ b/src/kubernetes/reflector.rs @@ -8,19 +8,17 @@ use kube::{ Resource, runtime::{reflector::store, watcher}, }; -use tokio::{pin, sync::oneshot}; +use tokio::pin; use tokio_util::time::DelayQueue; use super::meta_cache::{MetaCache, MetaDescribe}; /// Handles events from a [`kube::runtime::watcher()`] to delay the application of Deletion events. -/// Optionally notifies via `init_notify` when the initial state is loaded (on InitDone event). pub async fn custom_reflector( mut store: store::Writer, mut meta_cache: MetaCache, stream: W, delay_deletion: Duration, - init_notify: Option>, ) where K: Resource + Clone + std::fmt::Debug, K::DynamicType: Eq + Hash + Clone, @@ -29,7 +27,6 @@ pub async fn custom_reflector( pin!(stream); let mut delay_queue = DelayQueue::default(); let mut init_buffer_meta = Vec::new(); - let mut init_notify = init_notify; loop { tokio::select! { result = stream.next() => { @@ -87,11 +84,6 @@ pub async fn custom_reflector( }); init_buffer_meta.clear(); - - // Notify initialization complete if requested - if let Some(tx) = init_notify.take() { - let _ = tx.send(()); - } } } }, @@ -167,7 +159,6 @@ mod tests { meta_cache, rx, Duration::from_secs(1), - None, )); tokio::time::sleep(Duration::from_secs(1)).await; assert_eq!(store.get(&ObjectRef::from_obj(&cm)).as_deref(), Some(&cm)); @@ -197,7 +188,6 @@ mod tests { meta_cache, rx, Duration::from_secs(2), - None, )); // Ensure the Resource is still available after deletion tokio::time::sleep(Duration::from_secs(1)).await; @@ -235,7 +225,6 @@ mod tests { meta_cache, rx, Duration::from_secs(2), - None, )); tokio::time::sleep(Duration::from_secs(1)).await; // Ensure the Resource is still available after deletion diff --git a/src/sources/kubernetes_logs/mod.rs b/src/sources/kubernetes_logs/mod.rs index 27b81b681cbea..e2c88928ea6f0 100644 --- a/src/sources/kubernetes_logs/mod.rs +++ b/src/sources/kubernetes_logs/mod.rs @@ -21,7 +21,7 @@ use kube::{ }; use lifecycle::Lifecycle; use serde_with::serde_as; -use tokio::sync::oneshot; + use vector_lib::{ EstimatedJsonEncodedSizeOf, TimeZone, codecs::{BytesDeserializer, BytesDeserializerConfig}, @@ -764,20 +764,11 @@ impl Source { let pod_state = pod_store_w.as_reader(); let pod_cacher = MetaCache::new(); - // Create oneshot channel to notify when pod store is initialized - let (pod_init_tx, pod_init_rx) = if api_log { - let (tx, rx) = oneshot::channel(); - (Some(tx), Some(rx)) - } else { - (None, None) - }; - reflectors.push(tokio::spawn(custom_reflector( pod_store_w, pod_cacher, pod_watcher, delay_deletion, - pod_init_tx, ))); // ----------------------------------------------------------------- @@ -802,7 +793,6 @@ impl Source { MetaCache::new(), ns_watcher, delay_deletion, - None, ))); } @@ -828,7 +818,6 @@ impl Source { node_cacher, node_watcher, delay_deletion, - None, ))); let paths_provider = K8sPathsProvider::new( @@ -979,60 +968,30 @@ impl Source { let event_processing_loop = out.send_event_stream(&mut stream); // Only run reconciler when api_log is enabled - let reconciler_fut = if let (Some(pod_init_rx), Some(reconciler_watcher)) = - (pod_init_rx, reconciler_pod_watcher) - { + let reconciler_fut = if let Some(reconciler_watcher) = reconciler_pod_watcher { let (api_logs_tx, mut api_logs_rx) = futures::channel::mpsc::unbounded::(); - let reconciler = reconciler::Reconciler::new( - pod_state.clone(), - client.clone(), - api_logs_tx, - reconciler_watcher, - ); + let reconciler = + reconciler::Reconciler::new(client.clone(), api_logs_tx, reconciler_watcher); Some(async move { - // Wait for pod store to be initialized - match pod_init_rx.await { - Ok(_) => { - info!("Pod store initialized, starting reconciler"); - - // Start the reconciler with initial full reconciliation - let mut reconciler = reconciler; - match reconciler.perform_full_reconciliation().await { - Ok(_) => { - info!("Initial reconciliation completed, starting event-driven reconciler"); - // Spawn reconciler run task and log processing in parallel - let reconciler_task = tokio::spawn(async move { - reconciler.run().await; - }); - - let log_processing_task = tokio::spawn(async move { - // Process incoming logs from the channel - while let Some(log_line) = api_logs_rx.next().await { - info!("API Log: {}", log_line); - // TODO: Convert log_line to Vector event and send to output - // This is where you would parse the log and send it through the Vector pipeline - } - info!("Reconciler log processing completed"); - }); - - // Wait for both tasks to complete - let _ = tokio::try_join!(reconciler_task, log_processing_task); - } - Err(error) => { - emit!(KubernetesLifecycleError { - error, - message: "Initial reconciliation failed.", - count: events_count, - }); - } - } - } - Err(_) => { - warn!( - "Pod store initialization signal was dropped, reconciler will not start" - ); + info!("Starting event-driven reconciler"); + + // Spawn reconciler run task and log processing in parallel + let reconciler_task = tokio::spawn(async move { + reconciler.run().await; + }); + + let log_processing_task = tokio::spawn(async move { + // Process incoming logs from the channel + while let Some(log_line) = api_logs_rx.next().await { + info!("API Log: {}", log_line); + // TODO: Convert log_line to Vector event and send to output + // This is where you would parse the log and send it through the Vector pipeline } - } + info!("Reconciler log processing completed"); + }); + + // Wait for both tasks to complete + let _ = tokio::try_join!(reconciler_task, log_processing_task); }) } else { None diff --git a/src/sources/kubernetes_logs/reconciler.rs b/src/sources/kubernetes_logs/reconciler.rs index 251f975a576d9..893ca822d9287 100644 --- a/src/sources/kubernetes_logs/reconciler.rs +++ b/src/sources/kubernetes_logs/reconciler.rs @@ -5,7 +5,6 @@ use futures::channel::mpsc; use futures::{AsyncBufReadExt, StreamExt, TryStreamExt}; use futures_util::Stream; use k8s_openapi::api::core::v1::Pod; -use kube::runtime::reflector::Store; use kube::runtime::watcher; use kube::{Api, Client, api::LogParams}; use std::collections::HashMap; @@ -128,19 +127,13 @@ impl ContainerLogInfo { } pub struct Reconciler { - pod_state: Store, esb: EventStreamBuilder, states: HashMap, // Keyed by ContainerKey pod_watcher: Pin>> + Send>>, } impl Reconciler { - pub fn new( - pod_state: Store, - client: Client, - log_sender: mpsc::UnboundedSender, - pod_watcher: S, - ) -> Self + pub fn new(client: Client, log_sender: mpsc::UnboundedSender, pod_watcher: S) -> Self where S: Stream>> + Send + 'static, { @@ -149,7 +142,6 @@ impl Reconciler { log_sender, }; Self { - pod_state, esb, states: HashMap::new(), pod_watcher: Box::pin(pod_watcher), @@ -182,10 +174,7 @@ impl Reconciler { self.cleanup_pod_tailers(&pod_info).await; } Ok(watcher::Event::Init) => { - info!("Pod watcher initialized, performing full reconciliation"); - if let Err(e) = self.perform_full_reconciliation().await { - warn!("Failed to perform full reconciliation: {}", e); - } + info!("Pod watcher initialized - ready for event-driven reconciliation"); } Ok(watcher::Event::InitApply(pod)) => { let pod_info = PodInfo::from(&pod); @@ -205,10 +194,9 @@ impl Reconciler { } } Ok(watcher::Event::InitDone) => { - info!("Pod watcher init complete, performing final reconciliation"); - if let Err(e) = self.perform_full_reconciliation().await { - warn!("Failed to perform final reconciliation: {}", e); - } + info!( + "Pod watcher init complete - fully ready for event-driven reconciliation" + ); } Err(e) => { warn!("Pod watcher error: {}", e); @@ -259,41 +247,6 @@ impl Reconciler { } } } - - /// Perform full reconciliation of all running pods - pub async fn perform_full_reconciliation(&mut self) -> crate::Result<()> { - info!("Performing full reconciliation of pod states"); - - let pods: Vec<_> = self - .pod_state - .state() - .iter() - .map(|pod| PodInfo::from(pod.as_ref())) - .collect(); - - if pods.is_empty() { - warn!("No pods found in pod store during full reconciliation"); - return Ok(()); - } - - info!("Found {} pods in store for full reconciliation", pods.len()); - - // Filter for running pods and reconcile their containers - for pod_info in pods { - if let Some(phase) = &pod_info.phase { - if phase == "Running" { - if let Err(e) = self.reconcile_pod_containers(&pod_info).await { - warn!( - "Failed to reconcile pod '{}' during full reconciliation: {}", - pod_info.name, e - ); - } - } - } - } - - Ok(()) - } } #[derive(Clone)] From 759528683fe99bd2052cca614402815db1274fde Mon Sep 17 00:00:00 2001 From: titaneric Date: Sun, 12 Oct 2025 01:24:29 +0800 Subject: [PATCH 14/32] simplify key generation used in tracking container logs status --- src/sources/kubernetes_logs/reconciler.rs | 32 +++-------------------- 1 file changed, 4 insertions(+), 28 deletions(-) diff --git a/src/sources/kubernetes_logs/reconciler.rs b/src/sources/kubernetes_logs/reconciler.rs index 893ca822d9287..ae9f5eaac90f8 100644 --- a/src/sources/kubernetes_logs/reconciler.rs +++ b/src/sources/kubernetes_logs/reconciler.rs @@ -154,50 +154,26 @@ impl Reconciler { // Listen to pod watcher events for real-time reconciliation while let Some(event) = self.pod_watcher.next().await { match event { - Ok(watcher::Event::Apply(pod)) => { - let pod_info = PodInfo::from(&pod); - if let Some(phase) = &pod_info.phase { - if phase == "Running" { - info!( - "Pod '{}' is now running, starting log reconciliation", - pod_info.name - ); - if let Err(e) = self.reconcile_pod_containers(&pod_info).await { - warn!("Failed to reconcile pod '{}': {}", pod_info.name, e); - } - } - } - } Ok(watcher::Event::Delete(pod)) => { let pod_info = PodInfo::from(&pod); info!("Pod '{}' deleted, cleaning up log tailers", pod_info.name); self.cleanup_pod_tailers(&pod_info).await; } - Ok(watcher::Event::Init) => { - info!("Pod watcher initialized - ready for event-driven reconciliation"); - } - Ok(watcher::Event::InitApply(pod)) => { + Ok(watcher::Event::InitApply(pod)) | Ok(watcher::Event::Apply(pod)) => { let pod_info = PodInfo::from(&pod); if let Some(phase) = &pod_info.phase { if phase == "Running" { info!( - "Pod '{}' is running during init, starting log reconciliation", + "Pod '{}' is running, starting log reconciliation", pod_info.name ); if let Err(e) = self.reconcile_pod_containers(&pod_info).await { - warn!( - "Failed to reconcile pod '{}' during init: {}", - pod_info.name, e - ); + warn!("Failed to reconcile pod '{}': {}", pod_info.name, e); } } } } - Ok(watcher::Event::InitDone) => { - info!( - "Pod watcher init complete - fully ready for event-driven reconciliation" - ); - } + Ok(_) => {} Err(e) => { warn!("Pod watcher error: {}", e); } From 2578ed59b624dc142cf7918585b444682c9f3c69 Mon Sep 17 00:00:00 2001 From: titaneric Date: Tue, 14 Oct 2025 22:18:32 +0800 Subject: [PATCH 15/32] transform the API logs into `Line` and send to out stream --- src/sources/kubernetes_logs/mod.rs | 75 +++++++++++++++++++----------- 1 file changed, 48 insertions(+), 27 deletions(-) diff --git a/src/sources/kubernetes_logs/mod.rs b/src/sources/kubernetes_logs/mod.rs index e2c88928ea6f0..cf61c6c0fa779 100644 --- a/src/sources/kubernetes_logs/mod.rs +++ b/src/sources/kubernetes_logs/mod.rs @@ -9,7 +9,7 @@ use std::{cmp::min, path::PathBuf, time::Duration}; use bytes::Bytes; use chrono::Utc; use futures::{future::FutureExt, stream::StreamExt}; -use futures_util::Stream; +use futures_util::{SinkExt, Stream}; use http_1::{HeaderName, HeaderValue}; use k8s_openapi::api::core::v1::{Namespace, Node, Pod}; use k8s_paths_provider::K8sPathsProvider; @@ -916,33 +916,34 @@ impl Source { log_namespace, ); - let file_info = annotator.annotate(&mut event, &line.filename); - - emit!(KubernetesLogsEventsReceived { - file: &line.filename, - byte_size: event.estimated_json_encoded_size_of(), - pod_info: file_info.as_ref().map(|info| KubernetesLogsPodInfo { - name: info.pod_name.to_owned(), - namespace: info.pod_namespace.to_owned(), - }), - }); + if !api_log { + let file_info = annotator.annotate(&mut event, &line.filename); + emit!(KubernetesLogsEventsReceived { + file: &line.filename, + byte_size: event.estimated_json_encoded_size_of(), + pod_info: file_info.as_ref().map(|info| KubernetesLogsPodInfo { + name: info.pod_name.to_owned(), + namespace: info.pod_namespace.to_owned(), + }), + }); - if file_info.is_none() { - emit!(KubernetesLogsEventAnnotationError { event: &event }); - } else { - let namespace = file_info.as_ref().map(|info| info.pod_namespace); + if file_info.is_none() { + emit!(KubernetesLogsEventAnnotationError { event: &event }); + } else { + let namespace = file_info.as_ref().map(|info| info.pod_namespace); - if insert_namespace_fields - && let Some(name) = namespace - && ns_annotator.annotate(&mut event, name).is_none() - { - emit!(KubernetesLogsEventNamespaceAnnotationError { event: &event }); - } + if insert_namespace_fields + && let Some(name) = namespace + && ns_annotator.annotate(&mut event, name).is_none() + { + emit!(KubernetesLogsEventNamespaceAnnotationError { event: &event }); + } - let node_info = node_annotator.annotate(&mut event, self_node_name.as_str()); + let node_info = node_annotator.annotate(&mut event, self_node_name.as_str()); - if node_info.is_none() { - emit!(KubernetesLogsEventNodeAnnotationError { event: &event }); + if node_info.is_none() { + emit!(KubernetesLogsEventNodeAnnotationError { event: &event }); + } } } @@ -972,6 +973,9 @@ impl Source { let (api_logs_tx, mut api_logs_rx) = futures::channel::mpsc::unbounded::(); let reconciler = reconciler::Reconciler::new(client.clone(), api_logs_tx, reconciler_watcher); + + let file_source_tx_clone = file_source_tx.clone(); + Some(async move { info!("Starting event-driven reconciler"); @@ -981,11 +985,28 @@ impl Source { }); let log_processing_task = tokio::spawn(async move { + let mut file_source_tx = file_source_tx_clone; // Process incoming logs from the channel while let Some(log_line) = api_logs_rx.next().await { - info!("API Log: {}", log_line); - // TODO: Convert log_line to Vector event and send to output - // This is where you would parse the log and send it through the Vector pipeline + // Create a simple Line struct to reuse the existing pipeline + // TODO: Extract proper metadata from reconciler context + let filename = "k8s-api://unknown/unknown/unknown".to_string(); + let text = Bytes::from(log_line); + let text_len = text.len() as u64; + + let line = vector_lib::file_source::file_server::Line { + text, + filename, + file_id: vector_lib::file_source_common::FileFingerprint::Unknown(0), + start_offset: 0, + end_offset: text_len, + }; + + // Send through existing file processing pipeline + if let Err(e) = file_source_tx.send(vec![line]).await { + warn!("Failed to send API log through file pipeline: {}", e); + break; + } } info!("Reconciler log processing completed"); }); From 2e34730d158c2fc673007a3c39e261e0aada0e49 Mon Sep 17 00:00:00 2001 From: titaneric Date: Wed, 15 Oct 2025 01:35:16 +0800 Subject: [PATCH 16/32] pass log line by Bytes instead of String --- src/sources/kubernetes_logs/mod.rs | 20 +-- src/sources/kubernetes_logs/reconciler.rs | 148 +++++++++++++++------- 2 files changed, 113 insertions(+), 55 deletions(-) diff --git a/src/sources/kubernetes_logs/mod.rs b/src/sources/kubernetes_logs/mod.rs index cf61c6c0fa779..38e4598ad673f 100644 --- a/src/sources/kubernetes_logs/mod.rs +++ b/src/sources/kubernetes_logs/mod.rs @@ -74,7 +74,7 @@ mod util; use self::{ namespace_metadata_annotator::NamespaceMetadataAnnotator, node_metadata_annotator::NodeMetadataAnnotator, parser::Parser, - pod_metadata_annotator::PodMetadataAnnotator, + pod_metadata_annotator::PodMetadataAnnotator, reconciler::LogWithMetadata, }; /// The `self_node_name` value env var key. @@ -970,7 +970,8 @@ impl Source { // Only run reconciler when api_log is enabled let reconciler_fut = if let Some(reconciler_watcher) = reconciler_pod_watcher { - let (api_logs_tx, mut api_logs_rx) = futures::channel::mpsc::unbounded::(); + let (api_logs_tx, mut api_logs_rx) = + futures::channel::mpsc::unbounded::(); let reconciler = reconciler::Reconciler::new(client.clone(), api_logs_tx, reconciler_watcher); @@ -987,13 +988,16 @@ impl Source { let log_processing_task = tokio::spawn(async move { let mut file_source_tx = file_source_tx_clone; // Process incoming logs from the channel - while let Some(log_line) = api_logs_rx.next().await { - // Create a simple Line struct to reuse the existing pipeline - // TODO: Extract proper metadata from reconciler context - let filename = "k8s-api://unknown/unknown/unknown".to_string(); - let text = Bytes::from(log_line); + while let Some(log_with_metadata) = api_logs_rx.next().await { + // Create a filename that includes the container metadata for proper annotation + let filename = format!( + "k8s-api://{}/{}/{}", + log_with_metadata.namespace_name, + log_with_metadata.pod_name, + log_with_metadata.container_name + ); + let text = log_with_metadata.log_line; let text_len = text.len() as u64; - let line = vector_lib::file_source::file_server::Line { text, filename, diff --git a/src/sources/kubernetes_logs/reconciler.rs b/src/sources/kubernetes_logs/reconciler.rs index ae9f5eaac90f8..a348e65899698 100644 --- a/src/sources/kubernetes_logs/reconciler.rs +++ b/src/sources/kubernetes_logs/reconciler.rs @@ -1,8 +1,9 @@ use super::pod_info::PodInfo; +use bytes::Bytes; use chrono::{DateTime, FixedOffset, Utc}; use futures::SinkExt; use futures::channel::mpsc; -use futures::{AsyncBufReadExt, StreamExt, TryStreamExt}; +use futures::{AsyncBufReadExt, StreamExt}; use futures_util::Stream; use k8s_openapi::api::core::v1::Pod; use kube::runtime::watcher; @@ -12,6 +13,19 @@ use std::fmt; use std::pin::Pin; use tracing::{info, trace, warn}; +/// Log line with associated container metadata +#[derive(Clone, Debug)] +pub struct LogWithMetadata { + /// The actual log content + pub log_line: Bytes, + /// Pod name + pub pod_name: String, + /// Namespace name + pub namespace_name: String, + /// Container name + pub container_name: String, +} + /// Container key for identifying unique container instances /// Format: "{namespace}/{pod_name}/{container_name}" #[derive(Clone, Debug, PartialEq, Eq, Hash)] @@ -86,39 +100,41 @@ impl ContainerLogInfo { /// Update the last log timestamp when processing a log line /// Returns true if the timestamp was successfully parsed and updated - fn update_last_log_timestamp(&mut self, log_line: &str) -> bool { + fn update_last_log_timestamp(&mut self, log_line: &[u8]) -> bool { // Kubernetes log format typically starts with RFC3339 timestamp // e.g., "2023-10-11T10:30:00.123456789Z message content" - if let Some(timestamp_end) = log_line.find(' ') { - let timestamp_str = &log_line[..timestamp_end]; - if let Ok(timestamp) = DateTime::parse_from_rfc3339(timestamp_str) { - // Only update if this timestamp is newer than our last recorded timestamp - if let Some(last) = self.last_log { - if timestamp > last { - self.last_log = Some(timestamp); - return true; - } - } else { - // First timestamp we've seen - self.last_log = Some(timestamp); - return true; - } - } else { - // Try to parse ISO 8601 format without timezone (common in k8s logs) - if let Ok(naive_dt) = - chrono::NaiveDateTime::parse_from_str(timestamp_str, "%Y-%m-%dT%H:%M:%S%.f") - { - let timestamp = - DateTime::::from_naive_utc_and_offset(naive_dt, Utc).fixed_offset(); + if let Some(timestamp_end) = log_line.iter().position(|&b| b == b' ') { + let timestamp_bytes = &log_line[..timestamp_end]; + if let Ok(timestamp_str) = std::str::from_utf8(timestamp_bytes) { + if let Ok(timestamp) = DateTime::parse_from_rfc3339(timestamp_str) { + // Only update if this timestamp is newer than our last recorded timestamp if let Some(last) = self.last_log { if timestamp > last { self.last_log = Some(timestamp); return true; } } else { + // First timestamp we've seen self.last_log = Some(timestamp); return true; } + } else { + // Try to parse ISO 8601 format without timezone (common in k8s logs) + if let Ok(naive_dt) = + chrono::NaiveDateTime::parse_from_str(timestamp_str, "%Y-%m-%dT%H:%M:%S%.f") + { + let timestamp = DateTime::::from_naive_utc_and_offset(naive_dt, Utc) + .fixed_offset(); + if let Some(last) = self.last_log { + if timestamp > last { + self.last_log = Some(timestamp); + return true; + } + } else { + self.last_log = Some(timestamp); + return true; + } + } } } } @@ -133,7 +149,11 @@ pub struct Reconciler { } impl Reconciler { - pub fn new(client: Client, log_sender: mpsc::UnboundedSender, pod_watcher: S) -> Self + pub fn new( + client: Client, + log_sender: mpsc::UnboundedSender, + pod_watcher: S, + ) -> Self where S: Stream>> + Send + 'static, { @@ -228,7 +248,7 @@ impl Reconciler { #[derive(Clone)] struct EventStreamBuilder { client: Client, - log_sender: mpsc::UnboundedSender, + log_sender: mpsc::UnboundedSender, } #[derive(Clone)] @@ -278,29 +298,63 @@ impl EventStreamBuilder { log_info.container_info.container_name, log_info.container_info.pod_name ); - let mut lines = log_stream.lines(); - - // Process the stream of log lines continuously - while let Ok(Some(line_result)) = lines.try_next().await { - // Update timestamp tracking before sending - let timestamp_updated = log_info.update_last_log_timestamp(&line_result); - if timestamp_updated { - trace!( - "Updated last log timestamp for container '{}' in pod '{}' to: {:?}", - log_info.container_info.container_name, - log_info.container_info.pod_name, - log_info.last_log - ); - } + let mut buffer = Vec::new(); + let mut log_stream = log_stream; + + // Process the stream by reading line by line + loop { + buffer.clear(); + match log_stream.read_until(b'\n', &mut buffer).await { + Ok(0) => break, // EOF + Ok(_) => { + // Remove trailing newline if present + if buffer.ends_with(&[b'\n']) { + buffer.pop(); + } + // Remove trailing carriage return if present (for CRLF) + if buffer.ends_with(&[b'\r']) { + buffer.pop(); + } + + let line_bytes = Bytes::from(buffer.clone()); - // Send the log line to the channel - if let Err(_) = self.log_sender.send(line_result).await { - warn!( - "Log channel closed for container '{}' in pod '{}', stopping stream", - log_info.container_info.container_name, - log_info.container_info.pod_name - ); - break; + // Update timestamp tracking before sending + let timestamp_updated = log_info.update_last_log_timestamp(&buffer); + if timestamp_updated { + trace!( + "Updated last log timestamp for container '{}' in pod '{}' to: {:?}", + log_info.container_info.container_name, + log_info.container_info.pod_name, + log_info.last_log + ); + } + + // Send the log line with metadata to the channel + let log_with_metadata = LogWithMetadata { + log_line: line_bytes, + pod_name: log_info.container_info.pod_name.clone(), + namespace_name: log_info.container_info.namespace.clone(), + container_name: log_info.container_info.container_name.clone(), + }; + + if let Err(_) = self.log_sender.send(log_with_metadata).await { + warn!( + "Log channel closed for container '{}' in pod '{}', stopping stream", + log_info.container_info.container_name, + log_info.container_info.pod_name + ); + break; + } + } + Err(e) => { + warn!( + "Error reading from log stream for container '{}' in pod '{}': {}", + log_info.container_info.container_name, + log_info.container_info.pod_name, + e + ); + break; + } } } } From 8a26708761ac6e5ccba4dda174d40d295514e6f3 Mon Sep 17 00:00:00 2001 From: titaneric Date: Wed, 15 Oct 2025 01:52:51 +0800 Subject: [PATCH 17/32] convert log line in the reconciler --- src/sources/kubernetes_logs/mod.rs | 54 ++++------------------- src/sources/kubernetes_logs/reconciler.rs | 48 +++++++++----------- 2 files changed, 28 insertions(+), 74 deletions(-) diff --git a/src/sources/kubernetes_logs/mod.rs b/src/sources/kubernetes_logs/mod.rs index 38e4598ad673f..e94eeba9bed4b 100644 --- a/src/sources/kubernetes_logs/mod.rs +++ b/src/sources/kubernetes_logs/mod.rs @@ -9,7 +9,7 @@ use std::{cmp::min, path::PathBuf, time::Duration}; use bytes::Bytes; use chrono::Utc; use futures::{future::FutureExt, stream::StreamExt}; -use futures_util::{SinkExt, Stream}; +use futures_util::Stream; use http_1::{HeaderName, HeaderValue}; use k8s_openapi::api::core::v1::{Namespace, Node, Pod}; use k8s_paths_provider::K8sPathsProvider; @@ -74,7 +74,7 @@ mod util; use self::{ namespace_metadata_annotator::NamespaceMetadataAnnotator, node_metadata_annotator::NodeMetadataAnnotator, parser::Parser, - pod_metadata_annotator::PodMetadataAnnotator, reconciler::LogWithMetadata, + pod_metadata_annotator::PodMetadataAnnotator, }; /// The `self_node_name` value env var key. @@ -970,53 +970,15 @@ impl Source { // Only run reconciler when api_log is enabled let reconciler_fut = if let Some(reconciler_watcher) = reconciler_pod_watcher { - let (api_logs_tx, mut api_logs_rx) = - futures::channel::mpsc::unbounded::(); - let reconciler = - reconciler::Reconciler::new(client.clone(), api_logs_tx, reconciler_watcher); - - let file_source_tx_clone = file_source_tx.clone(); + let reconciler = reconciler::Reconciler::new( + client.clone(), + file_source_tx.clone(), + reconciler_watcher, + ); Some(async move { info!("Starting event-driven reconciler"); - - // Spawn reconciler run task and log processing in parallel - let reconciler_task = tokio::spawn(async move { - reconciler.run().await; - }); - - let log_processing_task = tokio::spawn(async move { - let mut file_source_tx = file_source_tx_clone; - // Process incoming logs from the channel - while let Some(log_with_metadata) = api_logs_rx.next().await { - // Create a filename that includes the container metadata for proper annotation - let filename = format!( - "k8s-api://{}/{}/{}", - log_with_metadata.namespace_name, - log_with_metadata.pod_name, - log_with_metadata.container_name - ); - let text = log_with_metadata.log_line; - let text_len = text.len() as u64; - let line = vector_lib::file_source::file_server::Line { - text, - filename, - file_id: vector_lib::file_source_common::FileFingerprint::Unknown(0), - start_offset: 0, - end_offset: text_len, - }; - - // Send through existing file processing pipeline - if let Err(e) = file_source_tx.send(vec![line]).await { - warn!("Failed to send API log through file pipeline: {}", e); - break; - } - } - info!("Reconciler log processing completed"); - }); - - // Wait for both tasks to complete - let _ = tokio::try_join!(reconciler_task, log_processing_task); + reconciler.run().await; }) } else { None diff --git a/src/sources/kubernetes_logs/reconciler.rs b/src/sources/kubernetes_logs/reconciler.rs index a348e65899698..3c4608f046fde 100644 --- a/src/sources/kubernetes_logs/reconciler.rs +++ b/src/sources/kubernetes_logs/reconciler.rs @@ -12,19 +12,7 @@ use std::collections::HashMap; use std::fmt; use std::pin::Pin; use tracing::{info, trace, warn}; - -/// Log line with associated container metadata -#[derive(Clone, Debug)] -pub struct LogWithMetadata { - /// The actual log content - pub log_line: Bytes, - /// Pod name - pub pod_name: String, - /// Namespace name - pub namespace_name: String, - /// Container name - pub container_name: String, -} +use vector_lib::{file_source::file_server::Line, file_source_common::FileFingerprint}; /// Container key for identifying unique container instances /// Format: "{namespace}/{pod_name}/{container_name}" @@ -149,17 +137,13 @@ pub struct Reconciler { } impl Reconciler { - pub fn new( - client: Client, - log_sender: mpsc::UnboundedSender, - pod_watcher: S, - ) -> Self + pub fn new(client: Client, line_sender: mpsc::Sender>, pod_watcher: S) -> Self where S: Stream>> + Send + 'static, { let esb = EventStreamBuilder { client: client.clone(), - log_sender, + line_sender, }; Self { esb, @@ -248,7 +232,7 @@ impl Reconciler { #[derive(Clone)] struct EventStreamBuilder { client: Client, - log_sender: mpsc::UnboundedSender, + line_sender: mpsc::Sender>, } #[derive(Clone)] @@ -329,17 +313,25 @@ impl EventStreamBuilder { ); } - // Send the log line with metadata to the channel - let log_with_metadata = LogWithMetadata { - log_line: line_bytes, - pod_name: log_info.container_info.pod_name.clone(), - namespace_name: log_info.container_info.namespace.clone(), - container_name: log_info.container_info.container_name.clone(), + // Create filename for proper annotation + let filename = format!( + "k8s-api://{}/{}/{}", + log_info.container_info.namespace, + log_info.container_info.pod_name, + log_info.container_info.container_name + ); + let text_len = line_bytes.len() as u64; + let line = Line { + text: line_bytes, + filename, + file_id: FileFingerprint::Unknown(0), + start_offset: 0, + end_offset: text_len, }; - if let Err(_) = self.log_sender.send(log_with_metadata).await { + if let Err(_) = self.line_sender.send(vec![line]).await { warn!( - "Log channel closed for container '{}' in pod '{}', stopping stream", + "Line channel closed for container '{}' in pod '{}', stopping stream", log_info.container_info.container_name, log_info.container_info.pod_name ); From 3cc048c5e9b18fce4e09faca42f8fcb3bd7990e6 Mon Sep 17 00:00:00 2001 From: titaneric Date: Thu, 16 Oct 2025 00:09:21 +0800 Subject: [PATCH 18/32] tidy up change for easier review --- src/sources/kubernetes_logs/mod.rs | 50 ++++++++-------- src/sources/kubernetes_logs/parser/cri.rs | 9 ++- src/sources/kubernetes_logs/reconciler.rs | 69 ++--------------------- 3 files changed, 34 insertions(+), 94 deletions(-) diff --git a/src/sources/kubernetes_logs/mod.rs b/src/sources/kubernetes_logs/mod.rs index e94eeba9bed4b..ab2f8263d9bd2 100644 --- a/src/sources/kubernetes_logs/mod.rs +++ b/src/sources/kubernetes_logs/mod.rs @@ -916,34 +916,36 @@ impl Source { log_namespace, ); - if !api_log { - let file_info = annotator.annotate(&mut event, &line.filename); - emit!(KubernetesLogsEventsReceived { - file: &line.filename, - byte_size: event.estimated_json_encoded_size_of(), - pod_info: file_info.as_ref().map(|info| KubernetesLogsPodInfo { - name: info.pod_name.to_owned(), - namespace: info.pod_namespace.to_owned(), - }), - }); + // TODO: annotate the logs with pods's metadata + if api_log { + return event; + } + let file_info = annotator.annotate(&mut event, &line.filename); + emit!(KubernetesLogsEventsReceived { + file: &line.filename, + byte_size: event.estimated_json_encoded_size_of(), + pod_info: file_info.as_ref().map(|info| KubernetesLogsPodInfo { + name: info.pod_name.to_owned(), + namespace: info.pod_namespace.to_owned(), + }), + }); - if file_info.is_none() { - emit!(KubernetesLogsEventAnnotationError { event: &event }); - } else { - let namespace = file_info.as_ref().map(|info| info.pod_namespace); + if file_info.is_none() { + emit!(KubernetesLogsEventAnnotationError { event: &event }); + } else { + let namespace = file_info.as_ref().map(|info| info.pod_namespace); - if insert_namespace_fields - && let Some(name) = namespace - && ns_annotator.annotate(&mut event, name).is_none() - { - emit!(KubernetesLogsEventNamespaceAnnotationError { event: &event }); - } + if insert_namespace_fields + && let Some(name) = namespace + && ns_annotator.annotate(&mut event, name).is_none() + { + emit!(KubernetesLogsEventNamespaceAnnotationError { event: &event }); + } - let node_info = node_annotator.annotate(&mut event, self_node_name.as_str()); + let node_info = node_annotator.annotate(&mut event, self_node_name.as_str()); - if node_info.is_none() { - emit!(KubernetesLogsEventNodeAnnotationError { event: &event }); - } + if node_info.is_none() { + emit!(KubernetesLogsEventNodeAnnotationError { event: &event }); } } diff --git a/src/sources/kubernetes_logs/parser/cri.rs b/src/sources/kubernetes_logs/parser/cri.rs index 2d9741dd46c30..795907cf88dd2 100644 --- a/src/sources/kubernetes_logs/parser/cri.rs +++ b/src/sources/kubernetes_logs/parser/cri.rs @@ -8,9 +8,7 @@ use vector_lib::{ use crate::{ event::{self, Event, Value}, - internal_events::{ - DROP_EVENT, ParserConversionError, ParserMatchError, ParserMissingFieldError, - }, + internal_events::{DROP_EVENT, ParserConversionError, ParserMissingFieldError}, sources::kubernetes_logs::{Config, transform_utils::get_message_path}, transforms::{FunctionTransform, OutputBuffer}, }; @@ -58,8 +56,9 @@ impl FunctionTransform for Cri { } Some(s) => match parse_log_line(&s) { None => { - emit!(ParserMatchError { value: &s[..] }); - return; + // TODO: fix it until `FunctionTransform` supports Api logs + // emit!(ParserMatchError { value: &s[..] }); + drop(log.insert(&message_path, Value::Bytes(s))); } Some(parsed_log) => { // For all fields except `timestamp`, simply treat them as `Value::Bytes`. For diff --git a/src/sources/kubernetes_logs/reconciler.rs b/src/sources/kubernetes_logs/reconciler.rs index 3c4608f046fde..c8c0e57dd511b 100644 --- a/src/sources/kubernetes_logs/reconciler.rs +++ b/src/sources/kubernetes_logs/reconciler.rs @@ -11,7 +11,7 @@ use kube::{Api, Client, api::LogParams}; use std::collections::HashMap; use std::fmt; use std::pin::Pin; -use tracing::{info, trace, warn}; +use tracing::{info, warn}; use vector_lib::{file_source::file_server::Line, file_source_common::FileFingerprint}; /// Container key for identifying unique container instances @@ -78,56 +78,11 @@ impl ContainerLogInfo { } } - /// Get the timestamp from which logs should be fetched - /// Only logs after this point need to be fetched fn log_since(&self) -> DateTime { self.last_log .map(|dt| dt.with_timezone(&Utc)) .unwrap_or(self.created) } - - /// Update the last log timestamp when processing a log line - /// Returns true if the timestamp was successfully parsed and updated - fn update_last_log_timestamp(&mut self, log_line: &[u8]) -> bool { - // Kubernetes log format typically starts with RFC3339 timestamp - // e.g., "2023-10-11T10:30:00.123456789Z message content" - if let Some(timestamp_end) = log_line.iter().position(|&b| b == b' ') { - let timestamp_bytes = &log_line[..timestamp_end]; - if let Ok(timestamp_str) = std::str::from_utf8(timestamp_bytes) { - if let Ok(timestamp) = DateTime::parse_from_rfc3339(timestamp_str) { - // Only update if this timestamp is newer than our last recorded timestamp - if let Some(last) = self.last_log { - if timestamp > last { - self.last_log = Some(timestamp); - return true; - } - } else { - // First timestamp we've seen - self.last_log = Some(timestamp); - return true; - } - } else { - // Try to parse ISO 8601 format without timezone (common in k8s logs) - if let Ok(naive_dt) = - chrono::NaiveDateTime::parse_from_str(timestamp_str, "%Y-%m-%dT%H:%M:%S%.f") - { - let timestamp = DateTime::::from_naive_utc_and_offset(naive_dt, Utc) - .fixed_offset(); - if let Some(last) = self.last_log { - if timestamp > last { - self.last_log = Some(timestamp); - return true; - } - } else { - self.last_log = Some(timestamp); - return true; - } - } - } - } - } - false - } } pub struct Reconciler { @@ -252,7 +207,7 @@ impl EventStreamBuilder { TailerState::Running } - pub async fn run_event_stream(mut self, mut log_info: ContainerLogInfo) { + pub async fn run_event_stream(mut self, log_info: ContainerLogInfo) { let pods: Api = Api::namespaced(self.client.clone(), &log_info.container_info.namespace); @@ -302,28 +257,12 @@ impl EventStreamBuilder { let line_bytes = Bytes::from(buffer.clone()); - // Update timestamp tracking before sending - let timestamp_updated = log_info.update_last_log_timestamp(&buffer); - if timestamp_updated { - trace!( - "Updated last log timestamp for container '{}' in pod '{}' to: {:?}", - log_info.container_info.container_name, - log_info.container_info.pod_name, - log_info.last_log - ); - } + // TODO: track last log timestamp - // Create filename for proper annotation - let filename = format!( - "k8s-api://{}/{}/{}", - log_info.container_info.namespace, - log_info.container_info.pod_name, - log_info.container_info.container_name - ); let text_len = line_bytes.len() as u64; let line = Line { text: line_bytes, - filename, + filename: String::new(), // Filename is not applicable for k8s logs file_id: FileFingerprint::Unknown(0), start_offset: 0, end_offset: text_len, From 598b6f7cc9e3d0696f18053280d02c5578b7e7cf Mon Sep 17 00:00:00 2001 From: titaneric Date: Thu, 16 Oct 2025 00:39:32 +0800 Subject: [PATCH 19/32] reference ContainerInfo in ContainerLogInfo --- src/sources/kubernetes_logs/reconciler.rs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/sources/kubernetes_logs/reconciler.rs b/src/sources/kubernetes_logs/reconciler.rs index c8c0e57dd511b..672728ec0291e 100644 --- a/src/sources/kubernetes_logs/reconciler.rs +++ b/src/sources/kubernetes_logs/reconciler.rs @@ -59,18 +59,18 @@ pub struct ContainerInfo { /// Container log information with timestamp tracking /// Similar to docker_logs ContainerLogInfo for position tracking -#[derive(Clone, Debug)] -struct ContainerLogInfo { - /// Container information - container_info: ContainerInfo, +#[derive(Debug)] +struct ContainerLogInfo<'a> { + /// Container information reference + container_info: &'a ContainerInfo, /// Timestamp of when this tracking started created: DateTime, /// Timestamp of last log message processed last_log: Option>, } -impl ContainerLogInfo { - fn new(container_info: ContainerInfo, created: DateTime) -> Self { +impl<'a> ContainerLogInfo<'a> { + fn new(container_info: &'a ContainerInfo, created: DateTime) -> Self { Self { container_info, created, @@ -193,21 +193,20 @@ struct EventStreamBuilder { #[derive(Clone)] enum TailerState { Running, - // Stopped, } impl EventStreamBuilder { pub fn start(&self, container_info: ContainerInfo) -> TailerState { let this = self.clone(); tokio::spawn(async move { - let log_info = ContainerLogInfo::new(container_info, Utc::now()); + let log_info = ContainerLogInfo::new(&container_info, Utc::now()); this.run_event_stream(log_info).await; return; }); TailerState::Running } - pub async fn run_event_stream(mut self, log_info: ContainerLogInfo) { + pub async fn run_event_stream(mut self, log_info: ContainerLogInfo<'_>) { let pods: Api = Api::namespaced(self.client.clone(), &log_info.container_info.namespace); From 9cfd21a4036bf790551cd465ce5b05350901f177 Mon Sep 17 00:00:00 2001 From: titaneric Date: Thu, 16 Oct 2025 01:00:36 +0800 Subject: [PATCH 20/32] update file_id used in API logs's reconciler --- src/sources/kubernetes_logs/reconciler.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sources/kubernetes_logs/reconciler.rs b/src/sources/kubernetes_logs/reconciler.rs index 672728ec0291e..a9ae2a850d0f8 100644 --- a/src/sources/kubernetes_logs/reconciler.rs +++ b/src/sources/kubernetes_logs/reconciler.rs @@ -262,7 +262,7 @@ impl EventStreamBuilder { let line = Line { text: line_bytes, filename: String::new(), // Filename is not applicable for k8s logs - file_id: FileFingerprint::Unknown(0), + file_id: FileFingerprint::FirstLinesChecksum(0), start_offset: 0, end_offset: text_len, }; From 8a1174c7b00faf25cf4e964b8d4483eecc650af5 Mon Sep 17 00:00:00 2001 From: titaneric Date: Thu, 16 Oct 2025 01:34:52 +0800 Subject: [PATCH 21/32] fix clippy error --- src/sources/kubernetes_logs/reconciler.rs | 27 +++++++++++------------ 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/sources/kubernetes_logs/reconciler.rs b/src/sources/kubernetes_logs/reconciler.rs index a9ae2a850d0f8..eed17c0df23db 100644 --- a/src/sources/kubernetes_logs/reconciler.rs +++ b/src/sources/kubernetes_logs/reconciler.rs @@ -70,7 +70,7 @@ struct ContainerLogInfo<'a> { } impl<'a> ContainerLogInfo<'a> { - fn new(container_info: &'a ContainerInfo, created: DateTime) -> Self { + const fn new(container_info: &'a ContainerInfo, created: DateTime) -> Self { Self { container_info, created, @@ -120,15 +120,15 @@ impl Reconciler { } Ok(watcher::Event::InitApply(pod)) | Ok(watcher::Event::Apply(pod)) => { let pod_info = PodInfo::from(&pod); - if let Some(phase) = &pod_info.phase { - if phase == "Running" { - info!( - "Pod '{}' is running, starting log reconciliation", - pod_info.name - ); - if let Err(e) = self.reconcile_pod_containers(&pod_info).await { - warn!("Failed to reconcile pod '{}': {}", pod_info.name, e); - } + if let Some(phase) = &pod_info.phase + && phase == "Running" + { + info!( + "Pod '{}' is running, starting log reconciliation", + pod_info.name + ); + if let Err(e) = self.reconcile_pod_containers(&pod_info).await { + warn!("Failed to reconcile pod '{}': {}", pod_info.name, e); } } } @@ -201,7 +201,6 @@ impl EventStreamBuilder { tokio::spawn(async move { let log_info = ContainerLogInfo::new(&container_info, Utc::now()); this.run_event_stream(log_info).await; - return; }); TailerState::Running } @@ -246,11 +245,11 @@ impl EventStreamBuilder { Ok(0) => break, // EOF Ok(_) => { // Remove trailing newline if present - if buffer.ends_with(&[b'\n']) { + if buffer.ends_with(b"\n") { buffer.pop(); } // Remove trailing carriage return if present (for CRLF) - if buffer.ends_with(&[b'\r']) { + if buffer.ends_with(b"\r") { buffer.pop(); } @@ -267,7 +266,7 @@ impl EventStreamBuilder { end_offset: text_len, }; - if let Err(_) = self.line_sender.send(vec![line]).await { + if self.line_sender.send(vec![line]).await.is_err() { warn!( "Line channel closed for container '{}' in pod '{}', stopping stream", log_info.container_info.container_name, From 8faaaf83cb277cc6196250b6b3898a0313d54294 Mon Sep 17 00:00:00 2001 From: titaneric Date: Fri, 17 Oct 2025 22:38:14 +0800 Subject: [PATCH 22/32] replace api_log with log_collection_strategy enum --- src/sources/kubernetes_logs/mod.rs | 35 +++++++++++++++++++----------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/src/sources/kubernetes_logs/mod.rs b/src/sources/kubernetes_logs/mod.rs index ee457fca67222..9fd383d3c3aa6 100644 --- a/src/sources/kubernetes_logs/mod.rs +++ b/src/sources/kubernetes_logs/mod.rs @@ -281,8 +281,19 @@ pub struct Config { #[serde(default = "default_rotate_wait", rename = "rotate_wait_secs")] rotate_wait: Duration, - /// Whether use k8s logs API or not - api_log: bool, + /// The strategy to use for log collection. + log_collection_strategy: LogCollectionStrategy, +} + +/// Configuration for the log collection strategy. +#[configurable_component] +#[derive(Clone, Debug, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +enum LogCollectionStrategy { + /// Collect logs by reading log files from the filesystem. + File, + /// Collect logs via the Kubernetes Logs API. + Api, } const fn default_read_from() -> ReadFromConfig { @@ -331,7 +342,7 @@ impl Default for Config { log_namespace: None, internal_metrics: Default::default(), rotate_wait: default_rotate_wait(), - api_log: default_api_log(), + log_collection_strategy: default_log_collection_strategy(), } } } @@ -590,9 +601,7 @@ struct Source { delay_deletion: Duration, include_file_metric_tag: bool, rotate_wait: Duration, - // TODO: This will be used when implementing K8s logs API integration - #[allow(dead_code)] - api_log: bool, + log_collection_strategy: LogCollectionStrategy, } impl Source { @@ -682,7 +691,7 @@ impl Source { delay_deletion, include_file_metric_tag: config.internal_metrics.include_file_tag, rotate_wait: config.rotate_wait, - api_log: config.api_log, + log_collection_strategy: config.log_collection_strategy.clone(), }) } @@ -720,7 +729,7 @@ impl Source { delay_deletion, include_file_metric_tag, rotate_wait, - api_log, + log_collection_strategy, } = self; let mut reflectors = Vec::new(); @@ -745,8 +754,8 @@ impl Source { ) .backoff(watcher::DefaultBackoff::default()); - // Create a separate watcher for the reconciler if api_log is enabled - let reconciler_pod_watcher = if api_log { + // Create a separate watcher for the reconciler if log_collection_strategy is Api + let reconciler_pod_watcher = if log_collection_strategy == LogCollectionStrategy::Api { let reconciler_pods = Api::::all(client.clone()); let reconciler_watcher = watcher( reconciler_pods, @@ -916,7 +925,7 @@ impl Source { ); // TODO: annotate the logs with pods's metadata - if api_log { + if log_collection_strategy == LogCollectionStrategy::Api { return event; } let file_info = annotator.annotate(&mut event, &line.filename); @@ -1160,8 +1169,8 @@ const fn default_delay_deletion_ms() -> Duration { const fn default_rotate_wait() -> Duration { Duration::from_secs(u64::MAX / 2) } -const fn default_api_log() -> bool { - true // Enable api_log by default for now to test reconciler functionality +const fn default_log_collection_strategy() -> LogCollectionStrategy { + LogCollectionStrategy::File } // This function constructs the patterns we include for file watching, created From 0483a77b63447897304ab4c6feb907dba35ca6f3 Mon Sep 17 00:00:00 2001 From: titaneric Date: Fri, 17 Oct 2025 23:07:19 +0800 Subject: [PATCH 23/32] execute file server or logs reconciler according to strategy setting --- src/sources/kubernetes_logs/mod.rs | 96 +++++++++++++----------------- 1 file changed, 42 insertions(+), 54 deletions(-) diff --git a/src/sources/kubernetes_logs/mod.rs b/src/sources/kubernetes_logs/mod.rs index 9fd383d3c3aa6..fe90e8610cbfa 100644 --- a/src/sources/kubernetes_logs/mod.rs +++ b/src/sources/kubernetes_logs/mod.rs @@ -287,7 +287,7 @@ pub struct Config { /// Configuration for the log collection strategy. #[configurable_component] -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] #[serde(rename_all = "snake_case")] enum LogCollectionStrategy { /// Collect logs by reading log files from the filesystem. @@ -701,6 +701,9 @@ impl Source { global_shutdown: ShutdownSignal, log_namespace: LogNamespace, ) -> crate::Result<()> { + // Store log_collection_strategy before destructuring self + let strategy = self.log_collection_strategy.clone(); + let Self { client, data_dir, @@ -729,7 +732,7 @@ impl Source { delay_deletion, include_file_metric_tag, rotate_wait, - log_collection_strategy, + log_collection_strategy: _, } = self; let mut reflectors = Vec::new(); @@ -755,19 +758,14 @@ impl Source { .backoff(watcher::DefaultBackoff::default()); // Create a separate watcher for the reconciler if log_collection_strategy is Api - let reconciler_pod_watcher = if log_collection_strategy == LogCollectionStrategy::Api { - let reconciler_pods = Api::::all(client.clone()); - let reconciler_watcher = watcher( - reconciler_pods, - watcher::Config { - ..Default::default() - }, - ) - .backoff(watcher::DefaultBackoff::default()); - Some(reconciler_watcher) - } else { - None - }; + let reconciler_pods = Api::::all(client.clone()); + let reconciler_watcher = watcher( + reconciler_pods, + watcher::Config { + ..Default::default() + }, + ) + .backoff(watcher::DefaultBackoff::default()); let pod_store_w = reflector::store::Writer::default(); let pod_state = pod_store_w.as_reader(); @@ -925,7 +923,7 @@ impl Source { ); // TODO: annotate the logs with pods's metadata - if log_collection_strategy == LogCollectionStrategy::Api { + if strategy == LogCollectionStrategy::Api { return event; } let file_info = annotator.annotate(&mut event, &line.filename); @@ -978,73 +976,63 @@ impl Source { let event_processing_loop = out.send_event_stream(&mut stream); - // Only run reconciler when api_log is enabled - let reconciler_fut = if let Some(reconciler_watcher) = reconciler_pod_watcher { - let reconciler = reconciler::Reconciler::new( - client.clone(), - file_source_tx.clone(), - reconciler_watcher, - ); + let reconciler = + reconciler::Reconciler::new(client.clone(), file_source_tx.clone(), reconciler_watcher); - Some(async move { - info!("Starting event-driven reconciler"); - reconciler.run().await; - }) - } else { - None - }; let mut lifecycle = Lifecycle::new(); - { + // Only add file server when log_collection_strategy is File + if strategy == LogCollectionStrategy::File { let (slot, shutdown) = lifecycle.add(); - let fut = util::run_file_server(file_server, file_source_tx, shutdown, checkpointer) - .map(|result| match result { - Ok(FileServerShutdown) => info!(message = "File server completed gracefully."), - Err(error) => emit!(KubernetesLifecycleError { - message: "File server exited with an error.", - error, - count: events_count, - }), - }); + let fut = + util::run_file_server(file_server, file_source_tx.clone(), shutdown, checkpointer) + .map(|result| match result { + Ok(FileServerShutdown) => { + info!(message = "File server completed gracefully.") + } + Err(error) => emit!(KubernetesLifecycleError { + message: "File server exited with an error.", + error, + count: events_count, + }), + }); slot.bind(Box::pin(fut)); } - { + // Only add reconciler to lifecycle if log_collection_strategy is Api + if strategy == LogCollectionStrategy::Api { let (slot, shutdown) = lifecycle.add(); let fut = util::complete_with_deadline_on_signal( - event_processing_loop, + reconciler.run(), shutdown, Duration::from_secs(30), // more than enough time to propagate ) .map(|result| { match result { - Ok(Ok(())) => info!(message = "Event processing loop completed gracefully."), - Ok(Err(_)) => emit!(StreamClosedError { - count: events_count - }), + Ok(_) => info!(message = "Reconciler completed gracefully."), Err(error) => emit!(KubernetesLifecycleError { error, - message: "Event processing loop timed out during the shutdown.", + message: "Reconciler timed out during the shutdown.", count: events_count, }), }; }); slot.bind(Box::pin(fut)); } - - // Only add reconciler to lifecycle if api_log is enabled - if let Some(reconciler_future) = reconciler_fut { + { let (slot, shutdown) = lifecycle.add(); - let fut = util::complete_with_deadline_on_signal( - reconciler_future, + event_processing_loop, shutdown, Duration::from_secs(30), // more than enough time to propagate ) .map(|result| { match result { - Ok(_) => info!(message = "Reconciler completed gracefully."), + Ok(Ok(())) => info!(message = "Event processing loop completed gracefully."), + Ok(Err(_)) => emit!(StreamClosedError { + count: events_count + }), Err(error) => emit!(KubernetesLifecycleError { error, - message: "Reconciler timed out during the shutdown.", + message: "Event processing loop timed out during the shutdown.", count: events_count, }), }; From ffd36abebe8341c48b31fa6fa8ca2eed49c96729 Mon Sep 17 00:00:00 2001 From: titaneric Date: Fri, 17 Oct 2025 23:32:53 +0800 Subject: [PATCH 24/32] broadcast pod events to reconciler --- src/sources/kubernetes_logs/mod.rs | 81 +++++++++++++++++++++++++----- 1 file changed, 69 insertions(+), 12 deletions(-) diff --git a/src/sources/kubernetes_logs/mod.rs b/src/sources/kubernetes_logs/mod.rs index fe90e8610cbfa..b5c466331ba31 100644 --- a/src/sources/kubernetes_logs/mod.rs +++ b/src/sources/kubernetes_logs/mod.rs @@ -21,6 +21,7 @@ use kube::{ }; use lifecycle::Lifecycle; use serde_with::serde_as; +use tokio_stream::wrappers::BroadcastStream; use vector_lib::{ EstimatedJsonEncodedSizeOf, TimeZone, @@ -757,15 +758,50 @@ impl Source { ) .backoff(watcher::DefaultBackoff::default()); - // Create a separate watcher for the reconciler if log_collection_strategy is Api - let reconciler_pods = Api::::all(client.clone()); - let reconciler_watcher = watcher( - reconciler_pods, - watcher::Config { - ..Default::default() + // Create shared broadcast channel for pod events + let (pod_event_tx, _) = tokio::sync::broadcast::channel(1000); + let reflector_rx = pod_event_tx.subscribe(); + let reconciler_rx = if strategy == LogCollectionStrategy::Api { + Some(pod_event_tx.subscribe()) + } else { + None + }; + + // Spawn task to forward pod events to broadcast channel + tokio::spawn(async move { + use futures_util::StreamExt; + use tokio::pin; + pin!(pod_watcher); + while let Some(event_result) = pod_watcher.next().await { + match event_result { + Ok(event) => { + // Only broadcast successful events + if pod_event_tx.send(event).is_err() { + // All receivers have been dropped + break; + } + } + Err(e) => { + warn!("Pod watcher error: {}", e); + // Continue on errors to maintain resilience + } + } + } + }); + + // Convert broadcast receiver to stream for reflector + let reflector_stream = futures_util::StreamExt::filter_map( + BroadcastStream::new(reflector_rx), + |result| async move { + match result { + Ok(event) => Some(Ok(event)), + Err(tokio_stream::wrappers::errors::BroadcastStreamRecvError::Lagged(_)) => { + warn!("Reflector lagged behind pod events"); + None + } + } }, - ) - .backoff(watcher::DefaultBackoff::default()); + ); let pod_store_w = reflector::store::Writer::default(); let pod_state = pod_store_w.as_reader(); @@ -774,7 +810,7 @@ impl Source { reflectors.push(tokio::spawn(custom_reflector( pod_store_w, pod_cacher, - pod_watcher, + reflector_stream, delay_deletion, ))); @@ -976,8 +1012,29 @@ impl Source { let event_processing_loop = out.send_event_stream(&mut stream); - let reconciler = - reconciler::Reconciler::new(client.clone(), file_source_tx.clone(), reconciler_watcher); + let reconciler = if let Some(rx) = reconciler_rx { + let reconciler_stream = futures_util::StreamExt::filter_map( + BroadcastStream::new(rx), + |result| async move { + match result { + Ok(event) => Some(Ok(event)), + Err(tokio_stream::wrappers::errors::BroadcastStreamRecvError::Lagged( + _, + )) => { + warn!("Reconciler lagged behind pod events"); + None + } + } + }, + ); + Some(reconciler::Reconciler::new( + client.clone(), + file_source_tx.clone(), + reconciler_stream, + )) + } else { + None + }; let mut lifecycle = Lifecycle::new(); // Only add file server when log_collection_strategy is File @@ -998,7 +1055,7 @@ impl Source { slot.bind(Box::pin(fut)); } // Only add reconciler to lifecycle if log_collection_strategy is Api - if strategy == LogCollectionStrategy::Api { + if let Some(reconciler) = reconciler { let (slot, shutdown) = lifecycle.add(); let fut = util::complete_with_deadline_on_signal( reconciler.run(), From 9bcddfd12f15c4a88e3f80839aee1b8f94057fea Mon Sep 17 00:00:00 2001 From: titaneric Date: Sat, 18 Oct 2025 00:20:57 +0800 Subject: [PATCH 25/32] simplify the trait import --- src/sources/kubernetes_logs/mod.rs | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/src/sources/kubernetes_logs/mod.rs b/src/sources/kubernetes_logs/mod.rs index b5c466331ba31..d10a9871a99c0 100644 --- a/src/sources/kubernetes_logs/mod.rs +++ b/src/sources/kubernetes_logs/mod.rs @@ -8,8 +8,7 @@ use std::{cmp::min, path::PathBuf, time::Duration}; use bytes::Bytes; use chrono::Utc; -use futures::{future::FutureExt, stream::StreamExt}; -use futures_util::Stream; +use futures_util::{Stream, future::FutureExt, stream::StreamExt}; use http_1::{HeaderName, HeaderValue}; use k8s_openapi::api::core::v1::{Namespace, Node, Pod}; use k8s_paths_provider::K8sPathsProvider; @@ -21,6 +20,7 @@ use kube::{ }; use lifecycle::Lifecycle; use serde_with::serde_as; +use tokio::pin; use tokio_stream::wrappers::BroadcastStream; use vector_lib::{ @@ -769,8 +769,6 @@ impl Source { // Spawn task to forward pod events to broadcast channel tokio::spawn(async move { - use futures_util::StreamExt; - use tokio::pin; pin!(pod_watcher); while let Some(event_result) = pod_watcher.next().await { match event_result { @@ -790,18 +788,15 @@ impl Source { }); // Convert broadcast receiver to stream for reflector - let reflector_stream = futures_util::StreamExt::filter_map( - BroadcastStream::new(reflector_rx), - |result| async move { - match result { - Ok(event) => Some(Ok(event)), - Err(tokio_stream::wrappers::errors::BroadcastStreamRecvError::Lagged(_)) => { - warn!("Reflector lagged behind pod events"); - None - } + let reflector_stream = BroadcastStream::new(reflector_rx).filter_map(|result| async move { + match result { + Ok(event) => Some(Ok(event)), + Err(tokio_stream::wrappers::errors::BroadcastStreamRecvError::Lagged(_)) => { + warn!("Reflector lagged behind pod events"); + None } - }, - ); + } + }); let pod_store_w = reflector::store::Writer::default(); let pod_state = pod_store_w.as_reader(); From d9e7c9ade4f261435877970ca332aa85b4f08710 Mon Sep 17 00:00:00 2001 From: titaneric Date: Sat, 18 Oct 2025 00:56:20 +0800 Subject: [PATCH 26/32] simplify the BroadcastStream --- src/sources/kubernetes_logs/mod.rs | 16 +----------- src/sources/kubernetes_logs/reconciler.rs | 30 +++++++++++++---------- 2 files changed, 18 insertions(+), 28 deletions(-) diff --git a/src/sources/kubernetes_logs/mod.rs b/src/sources/kubernetes_logs/mod.rs index d10a9871a99c0..f40bd1d163c05 100644 --- a/src/sources/kubernetes_logs/mod.rs +++ b/src/sources/kubernetes_logs/mod.rs @@ -1008,24 +1008,10 @@ impl Source { let event_processing_loop = out.send_event_stream(&mut stream); let reconciler = if let Some(rx) = reconciler_rx { - let reconciler_stream = futures_util::StreamExt::filter_map( - BroadcastStream::new(rx), - |result| async move { - match result { - Ok(event) => Some(Ok(event)), - Err(tokio_stream::wrappers::errors::BroadcastStreamRecvError::Lagged( - _, - )) => { - warn!("Reconciler lagged behind pod events"); - None - } - } - }, - ); Some(reconciler::Reconciler::new( client.clone(), file_source_tx.clone(), - reconciler_stream, + rx, )) } else { None diff --git a/src/sources/kubernetes_logs/reconciler.rs b/src/sources/kubernetes_logs/reconciler.rs index eed17c0df23db..16cc1c1f7c2d4 100644 --- a/src/sources/kubernetes_logs/reconciler.rs +++ b/src/sources/kubernetes_logs/reconciler.rs @@ -4,13 +4,15 @@ use chrono::{DateTime, FixedOffset, Utc}; use futures::SinkExt; use futures::channel::mpsc; use futures::{AsyncBufReadExt, StreamExt}; -use futures_util::Stream; +use futures_util::{Stream, future::ready}; use k8s_openapi::api::core::v1::Pod; use kube::runtime::watcher; use kube::{Api, Client, api::LogParams}; use std::collections::HashMap; use std::fmt; use std::pin::Pin; +use tokio::sync::broadcast; +use tokio_stream::wrappers::BroadcastStream; use tracing::{info, warn}; use vector_lib::{file_source::file_server::Line, file_source_common::FileFingerprint}; @@ -88,22 +90,27 @@ impl<'a> ContainerLogInfo<'a> { pub struct Reconciler { esb: EventStreamBuilder, states: HashMap, // Keyed by ContainerKey - pod_watcher: Pin>> + Send>>, + pod_watcher: Pin> + Send>>, } impl Reconciler { - pub fn new(client: Client, line_sender: mpsc::Sender>, pod_watcher: S) -> Self - where - S: Stream>> + Send + 'static, - { + pub fn new( + client: Client, + line_sender: mpsc::Sender>, + pod_receiver: broadcast::Receiver>, + ) -> Self { let esb = EventStreamBuilder { client: client.clone(), line_sender, }; + + // Convert broadcast receiver to stream, ignoring errors like TraceSubscription + let pod_stream = BroadcastStream::new(pod_receiver).filter_map(|event| ready(event.ok())); + Self { esb, states: HashMap::new(), - pod_watcher: Box::pin(pod_watcher), + pod_watcher: Box::pin(pod_stream), } } @@ -113,12 +120,12 @@ impl Reconciler { // Listen to pod watcher events for real-time reconciliation while let Some(event) = self.pod_watcher.next().await { match event { - Ok(watcher::Event::Delete(pod)) => { + watcher::Event::Delete(pod) => { let pod_info = PodInfo::from(&pod); info!("Pod '{}' deleted, cleaning up log tailers", pod_info.name); self.cleanup_pod_tailers(&pod_info).await; } - Ok(watcher::Event::InitApply(pod)) | Ok(watcher::Event::Apply(pod)) => { + watcher::Event::InitApply(pod) | watcher::Event::Apply(pod) => { let pod_info = PodInfo::from(&pod); if let Some(phase) = &pod_info.phase && phase == "Running" @@ -132,10 +139,7 @@ impl Reconciler { } } } - Ok(_) => {} - Err(e) => { - warn!("Pod watcher error: {}", e); - } + _ => {} } } From 8ebba320abb5816503ddf8e8609cdcae5c99dd06 Mon Sep 17 00:00:00 2001 From: titaneric Date: Sat, 18 Oct 2025 01:21:45 +0800 Subject: [PATCH 27/32] push the pod forwarder into reflectors to abort if needed --- src/sources/kubernetes_logs/mod.rs | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/sources/kubernetes_logs/mod.rs b/src/sources/kubernetes_logs/mod.rs index f40bd1d163c05..09920d910653f 100644 --- a/src/sources/kubernetes_logs/mod.rs +++ b/src/sources/kubernetes_logs/mod.rs @@ -8,7 +8,11 @@ use std::{cmp::min, path::PathBuf, time::Duration}; use bytes::Bytes; use chrono::Utc; -use futures_util::{Stream, future::FutureExt, stream::StreamExt}; +use futures_util::{ + Stream, + future::{FutureExt, ready}, + stream::StreamExt, +}; use http_1::{HeaderName, HeaderValue}; use k8s_openapi::api::core::v1::{Namespace, Node, Pod}; use k8s_paths_provider::K8sPathsProvider; @@ -768,7 +772,7 @@ impl Source { }; // Spawn task to forward pod events to broadcast channel - tokio::spawn(async move { + let pod_forwarder = tokio::spawn(async move { pin!(pod_watcher); while let Some(event_result) = pod_watcher.next().await { match event_result { @@ -786,16 +790,14 @@ impl Source { } } }); + reflectors.push(pod_forwarder); // Convert broadcast receiver to stream for reflector - let reflector_stream = BroadcastStream::new(reflector_rx).filter_map(|result| async move { - match result { + let reflector_stream = BroadcastStream::new(reflector_rx).filter_map(|result| { + ready(match result { Ok(event) => Some(Ok(event)), - Err(tokio_stream::wrappers::errors::BroadcastStreamRecvError::Lagged(_)) => { - warn!("Reflector lagged behind pod events"); - None - } - } + Err(_) => None, + }) }); let pod_store_w = reflector::store::Writer::default(); From 0667bcd654911a9bee76086348db09f9d39b2033 Mon Sep 17 00:00:00 2001 From: titaneric Date: Sat, 18 Oct 2025 01:41:38 +0800 Subject: [PATCH 28/32] add batched log TODO --- src/sources/kubernetes_logs/reconciler.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sources/kubernetes_logs/reconciler.rs b/src/sources/kubernetes_logs/reconciler.rs index 16cc1c1f7c2d4..f98a7cc36acef 100644 --- a/src/sources/kubernetes_logs/reconciler.rs +++ b/src/sources/kubernetes_logs/reconciler.rs @@ -270,6 +270,7 @@ impl EventStreamBuilder { end_offset: text_len, }; + // TODO: Send batches of lines instead of one by one if self.line_sender.send(vec![line]).await.is_err() { warn!( "Line channel closed for container '{}' in pod '{}', stopping stream", From 1ae23326b085eb96310795bfc451f14f5c5d9f80 Mon Sep 17 00:00:00 2001 From: titaneric Date: Sat, 18 Oct 2025 02:16:29 +0800 Subject: [PATCH 29/32] refactor the reconciler's pod state handling --- src/sources/kubernetes_logs/mod.rs | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/src/sources/kubernetes_logs/mod.rs b/src/sources/kubernetes_logs/mod.rs index 09920d910653f..13cee828fcab2 100644 --- a/src/sources/kubernetes_logs/mod.rs +++ b/src/sources/kubernetes_logs/mod.rs @@ -765,20 +765,16 @@ impl Source { // Create shared broadcast channel for pod events let (pod_event_tx, _) = tokio::sync::broadcast::channel(1000); let reflector_rx = pod_event_tx.subscribe(); - let reconciler_rx = if strategy == LogCollectionStrategy::Api { - Some(pod_event_tx.subscribe()) - } else { - None - }; // Spawn task to forward pod events to broadcast channel + let pod_forwarder_tx = pod_event_tx.clone(); let pod_forwarder = tokio::spawn(async move { pin!(pod_watcher); while let Some(event_result) = pod_watcher.next().await { match event_result { Ok(event) => { // Only broadcast successful events - if pod_event_tx.send(event).is_err() { + if pod_forwarder_tx.send(event).is_err() { // All receivers have been dropped break; } @@ -1009,16 +1005,6 @@ impl Source { let event_processing_loop = out.send_event_stream(&mut stream); - let reconciler = if let Some(rx) = reconciler_rx { - Some(reconciler::Reconciler::new( - client.clone(), - file_source_tx.clone(), - rx, - )) - } else { - None - }; - let mut lifecycle = Lifecycle::new(); // Only add file server when log_collection_strategy is File if strategy == LogCollectionStrategy::File { @@ -1037,8 +1023,10 @@ impl Source { }); slot.bind(Box::pin(fut)); } - // Only add reconciler to lifecycle if log_collection_strategy is Api - if let Some(reconciler) = reconciler { + if strategy == LogCollectionStrategy::Api { + let reconciler_rx = pod_event_tx.subscribe(); + let reconciler = + reconciler::Reconciler::new(client.clone(), file_source_tx.clone(), reconciler_rx); let (slot, shutdown) = lifecycle.add(); let fut = util::complete_with_deadline_on_signal( reconciler.run(), From 255bf93e32b1cafe5b89aeac112d22563f086e1b Mon Sep 17 00:00:00 2001 From: titaneric Date: Sat, 18 Oct 2025 02:27:05 +0800 Subject: [PATCH 30/32] use destructured `log_collection_strategy` --- src/sources/kubernetes_logs/mod.rs | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/sources/kubernetes_logs/mod.rs b/src/sources/kubernetes_logs/mod.rs index 13cee828fcab2..e3d01ad58a34f 100644 --- a/src/sources/kubernetes_logs/mod.rs +++ b/src/sources/kubernetes_logs/mod.rs @@ -706,9 +706,6 @@ impl Source { global_shutdown: ShutdownSignal, log_namespace: LogNamespace, ) -> crate::Result<()> { - // Store log_collection_strategy before destructuring self - let strategy = self.log_collection_strategy.clone(); - let Self { client, data_dir, @@ -737,7 +734,7 @@ impl Source { delay_deletion, include_file_metric_tag, rotate_wait, - log_collection_strategy: _, + log_collection_strategy, } = self; let mut reflectors = Vec::new(); @@ -952,7 +949,7 @@ impl Source { ); // TODO: annotate the logs with pods's metadata - if strategy == LogCollectionStrategy::Api { + if log_collection_strategy == LogCollectionStrategy::Api { return event; } let file_info = annotator.annotate(&mut event, &line.filename); @@ -1007,7 +1004,7 @@ impl Source { let mut lifecycle = Lifecycle::new(); // Only add file server when log_collection_strategy is File - if strategy == LogCollectionStrategy::File { + if log_collection_strategy == LogCollectionStrategy::File { let (slot, shutdown) = lifecycle.add(); let fut = util::run_file_server(file_server, file_source_tx.clone(), shutdown, checkpointer) @@ -1023,7 +1020,7 @@ impl Source { }); slot.bind(Box::pin(fut)); } - if strategy == LogCollectionStrategy::Api { + if log_collection_strategy == LogCollectionStrategy::Api { let reconciler_rx = pod_event_tx.subscribe(); let reconciler = reconciler::Reconciler::new(client.clone(), file_source_tx.clone(), reconciler_rx); From cbfb10adf0713615e4774e12263c218ed704b544 Mon Sep 17 00:00:00 2001 From: titaneric Date: Sat, 18 Oct 2025 02:38:22 +0800 Subject: [PATCH 31/32] remove dead code an unnecessary clone --- src/sources/kubernetes_logs/mod.rs | 3 +-- src/sources/kubernetes_logs/pod_info.rs | 5 ----- src/sources/kubernetes_logs/reconciler.rs | 5 ----- 3 files changed, 1 insertion(+), 12 deletions(-) diff --git a/src/sources/kubernetes_logs/mod.rs b/src/sources/kubernetes_logs/mod.rs index e3d01ad58a34f..7a02b43b05496 100644 --- a/src/sources/kubernetes_logs/mod.rs +++ b/src/sources/kubernetes_logs/mod.rs @@ -860,8 +860,7 @@ impl Source { exclude_paths, insert_namespace_fields, ); - let annotator = - PodMetadataAnnotator::new(pod_state.clone(), pod_fields_spec, log_namespace); + let annotator = PodMetadataAnnotator::new(pod_state, pod_fields_spec, log_namespace); let ns_annotator = NamespaceMetadataAnnotator::new(ns_state, namespace_fields_spec, log_namespace); let node_annotator = NodeMetadataAnnotator::new(node_state, node_field_spec, log_namespace); diff --git a/src/sources/kubernetes_logs/pod_info.rs b/src/sources/kubernetes_logs/pod_info.rs index 0d91a675d2730..751bef7d19e16 100644 --- a/src/sources/kubernetes_logs/pod_info.rs +++ b/src/sources/kubernetes_logs/pod_info.rs @@ -8,8 +8,6 @@ pub struct PodInfo { pub name: String, /// Pod namespace pub namespace: String, - /// Pod UID for uniqueness - pub uid: String, /// Pod phase (Running, Pending, etc.) pub phase: Option, /// Container names within the pod @@ -24,8 +22,6 @@ impl From<&Pod> for PodInfo { let namespace = metadata.namespace.as_ref().cloned().unwrap_or_default(); - let uid = metadata.uid.as_ref().cloned().unwrap_or_default(); - let phase = pod.status.as_ref().and_then(|status| status.phase.clone()); let containers = pod @@ -42,7 +38,6 @@ impl From<&Pod> for PodInfo { PodInfo { name, namespace, - uid, phase, containers, } diff --git a/src/sources/kubernetes_logs/reconciler.rs b/src/sources/kubernetes_logs/reconciler.rs index f98a7cc36acef..f5215e6390df8 100644 --- a/src/sources/kubernetes_logs/reconciler.rs +++ b/src/sources/kubernetes_logs/reconciler.rs @@ -54,9 +54,6 @@ pub struct ContainerInfo { pub namespace: String, /// Container name pub container_name: String, - /// Pod UID for tracking (will be used for future state tracking) - #[allow(dead_code)] - pub pod_uid: String, } /// Container log information with timestamp tracking @@ -104,7 +101,6 @@ impl Reconciler { line_sender, }; - // Convert broadcast receiver to stream, ignoring errors like TraceSubscription let pod_stream = BroadcastStream::new(pod_receiver).filter_map(|event| ready(event.ok())); Self { @@ -153,7 +149,6 @@ impl Reconciler { pod_name: pod_info.name.clone(), namespace: pod_info.namespace.clone(), container_name: container_name.clone(), - pod_uid: pod_info.uid.clone(), }; let key = ContainerKey::from(&container_info); From 0f0a38edf2fddc08767af7474bbc03bb58f4065f Mon Sep 17 00:00:00 2001 From: titaneric Date: Sat, 18 Oct 2025 02:45:31 +0800 Subject: [PATCH 32/32] take the buffer to avoid unnecessary clone --- src/sources/kubernetes_logs/reconciler.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/sources/kubernetes_logs/reconciler.rs b/src/sources/kubernetes_logs/reconciler.rs index f5215e6390df8..902cf43561c49 100644 --- a/src/sources/kubernetes_logs/reconciler.rs +++ b/src/sources/kubernetes_logs/reconciler.rs @@ -239,7 +239,6 @@ impl EventStreamBuilder { // Process the stream by reading line by line loop { - buffer.clear(); match log_stream.read_until(b'\n', &mut buffer).await { Ok(0) => break, // EOF Ok(_) => { @@ -252,7 +251,7 @@ impl EventStreamBuilder { buffer.pop(); } - let line_bytes = Bytes::from(buffer.clone()); + let line_bytes = Bytes::from(std::mem::take(&mut buffer)); // TODO: track last log timestamp