zendesk
diff --git a/‎CHANGELOG.md‎
Lines changed: 7 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎circle.yml‎
Lines changed: 1 addition & 1 deletion b/‎circle.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/consumer-group.rb‎
Lines changed: 1 addition & 0 deletions b/‎examples/consumer-group.rb‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎lib/kafka.rb‎
Lines changed: 4 additions & 3 deletions b/‎lib/kafka.rb‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎lib/kafka/broker.rb‎
Lines changed: 4 additions & 0 deletions b/‎lib/kafka/broker.rb‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎lib/kafka/broker_pool.rb‎
Lines changed: 6 additions & 1 deletion b/‎lib/kafka/broker_pool.rb‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎lib/kafka/cluster.rb‎
Lines changed: 13 additions & 2 deletions b/‎lib/kafka/cluster.rb‎
Lines changed: 13 additions & 2 deletions
diff --git a/‎lib/kafka/connection.rb‎
Lines changed: 6 additions & 2 deletions b/‎lib/kafka/connection.rb‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎lib/kafka/consumer.rb‎
Lines changed: 52 additions & 6 deletions b/‎lib/kafka/consumer.rb‎
Lines changed: 52 additions & 6 deletions
diff --git a/‎lib/kafka/fetch_operation.rb‎
Lines changed: 5 additions & 0 deletions b/‎lib/kafka/fetch_operation.rb‎
Lines changed: 5 additions & 0 deletions
@@ -4,6 +4,13 @@ Changes and additions to the library will be listed here.
 
 ## Unreleased
 
+## v0.5.0
+
+- Drops support for Kafka 0.9 in favor of Kafka 0.10 (#381)!
+- Handle cases where there are no partitions to fetch from by sleeping a bit (#439).
+- Handle problems with the broker cache (#440).
+- Shut down more quickly (#438).
+
 ## v0.4.3
 
 - Restart the async producer thread automatically after errors.
 
@@ -11,7 +11,7 @@ machine:
 dependencies:
   pre:
     - docker -v
-    - docker pull ches/kafka:0.9.0.1
+    - docker pull ches/kafka:0.10.0.0
     - docker pull jplock/zookeeper:3.4.6
 
 test:
 
@@ -20,6 +20,7 @@
 consumer.subscribe(topic)
 
 trap("TERM") { consumer.stop }
+trap("INT") { consumer.stop }
 
 consumer.each_message do |message|
   puts message.value
 
@@ -17,6 +17,10 @@ def initialize(topic, partition, offset)
     end
   end
 
+  # A fetch operation was executed with no partitions specified.
+  class NoPartitionsToFetchFrom < Error
+  end
+
   # Subclasses of this exception class map to an error code described in the
   # Kafka protocol specification.
   #
@@ -221,9 +225,6 @@ class OffsetCommitError < Error
   class FetchError < Error
   end
 
-  class NoPartitionsAssignedError < Error
-  end
-
   # Initializes a new Kafka client.
   #
   # @see Client#initialize
 
@@ -10,6 +10,10 @@ def initialize(connection:, node_id: nil, logger:)
       @logger = logger
     end
 
+    def address_match?(host, port)
+      @connection.address_match?(host, port)
+    end
+
     # @return [String]
     def to_s
       "#{@connection} (node_id=#{@node_id.inspect})"
 
@@ -9,7 +9,12 @@ def initialize(connection_builder:, logger:)
     end
 
     def connect(host, port, node_id: nil)
-      return @brokers.fetch(node_id) if @brokers.key?(node_id)
+      if @brokers.key?(node_id)
+        broker = @brokers.fetch(node_id)
+        return broker if broker.address_match?(host, port)
+        broker.disconnect
+        @brokers[node_id] = nil
+      end
 
       broker = Broker.new(
         connection: @connection_builder.build_connection(host, port),
 
@@ -93,9 +93,20 @@ def get_group_coordinator(group_id:)
           Protocol.handle_error(response.error_code)
 
           coordinator_id = response.coordinator_id
-          coordinator = connect_to_broker(coordinator_id)
 
-          @logger.debug "Coordinator for group `#{group_id}` is #{coordinator}"
+          @logger.debug "Coordinator for group `#{group_id}` is #{coordinator_id}. Connecting..."
+
+          # It's possible that a new broker is introduced to the cluster and
+          # becomes the coordinator before we have a chance to refresh_metadata.
+          coordinator = begin
+            connect_to_broker(coordinator_id)
+          rescue Kafka::NoSuchBroker
+            @logger.debug "Broker #{coordinator_id} missing from broker cache, refreshing"
+            refresh_metadata!
+            connect_to_broker(coordinator_id)
+          end
+
+          @logger.debug "Connected to coordinator: #{coordinator} for group `#{group_id}`"
 
           return coordinator
         rescue GroupCoordinatorNotAvailable
 
@@ -59,6 +59,10 @@ def initialize(host:, port:, client_id:, logger:, instrumenter:, sasl_authentica
       @sasl_authenticator = sasl_authenticator
     end
 
+    def address_match?(host, port)
+      @host == host && @port == port
+    end
+
     def to_s
       "#{@host}:#{@port}"
     end
@@ -105,10 +109,10 @@ def send_request(request)
 
         response
       end
-    rescue Errno::EPIPE, Errno::ECONNRESET, Errno::ETIMEDOUT, EOFError => e
+    rescue SystemCallError, EOFError => e
       close
 
-      raise ConnectionError, "Connection error: #{e}"
+      raise ConnectionError, "Connection error #{e.class}: #{e}"
     end
 
     private
 
@@ -57,6 +57,17 @@ def initialize(cluster:, logger:, instrumenter:, group:, offset_manager:, sessio
 
       # The maximum number of bytes to fetch from a single partition, by topic.
       @max_bytes = {}
+
+      # Hash containing offsets for each topic and partition that has the
+      # automatically_mark_as_processed feature disabled. Offset manager is only active
+      # when everything is suppose to happen automatically. Otherwise we need to keep track of the
+      # offset manually in memory for all the time
+      # The key structure for this equals an array with topic and partition [topic, partition]
+      # The value is equal to the offset of the last message we've received
+      # @note It won't be updated in case user marks message as processed, because for the case
+      #   when user commits message other than last in a batch, this would make ruby-kafka refetch
+      #   some already consumed messages
+      @current_offsets = Hash.new { |h, k| h[k] = {} }
     end
 
     # Subscribes the consumer to a topic.
@@ -94,6 +105,7 @@ def subscribe(topic, default_offset: nil, start_from_beginning: true, max_bytes_
     # @return [nil]
     def stop
       @running = false
+      @cluster.disconnect
     end
 
     # Pause processing of a specific topic partition.
@@ -180,7 +192,11 @@ def paused?(topic, partition)
     # @return [nil]
     def each_message(min_bytes: 1, max_wait_time: 1, automatically_mark_as_processed: true)
       consumer_loop do
-        batches = fetch_batches(min_bytes: min_bytes, max_wait_time: max_wait_time)
+        batches = fetch_batches(
+          min_bytes: min_bytes,
+          max_wait_time: max_wait_time,
+          automatically_mark_as_processed: automatically_mark_as_processed
+        )
 
         batches.each do |batch|
           batch.messages.each do |message|
@@ -196,6 +212,7 @@ def each_message(min_bytes: 1, max_wait_time: 1, automatically_mark_as_processed
 
               begin
                 yield message
+                @current_offsets[message.topic][message.partition] = message.offset
               rescue => e
                 location = "#{message.topic}/#{message.partition} at offset #{message.offset}"
                 backtrace = e.backtrace.join("\n")
@@ -216,6 +233,8 @@ def each_message(min_bytes: 1, max_wait_time: 1, automatically_mark_as_processed
 
         # We may not have received any messages, but it's still a good idea to
         # commit offsets if we've processed messages in the last set of batches.
+        # This also ensures the offsets are retained if we haven't read any messages
+        # since the offset retention period has elapsed.
         @offset_manager.commit_offsets_if_necessary
       end
     end
@@ -244,7 +263,11 @@ def each_message(min_bytes: 1, max_wait_time: 1, automatically_mark_as_processed
     # @return [nil]
     def each_batch(min_bytes: 1, max_wait_time: 1, automatically_mark_as_processed: true)
       consumer_loop do
-        batches = fetch_batches(min_bytes: min_bytes, max_wait_time: max_wait_time)
+        batches = fetch_batches(
+          min_bytes: min_bytes,
+          max_wait_time: max_wait_time,
+          automatically_mark_as_processed: automatically_mark_as_processed
+        )
 
         batches.each do |batch|
           unless batch.empty?
@@ -259,6 +282,7 @@ def each_batch(min_bytes: 1, max_wait_time: 1, automatically_mark_as_processed:
 
               begin
                 yield batch
+                @current_offsets[batch.topic][batch.partition] = batch.last_offset
               rescue => e
                 offset_range = (batch.first_offset..batch.last_offset)
                 location = "#{batch.topic}/#{batch.partition} in offset range #{offset_range}"
@@ -279,6 +303,12 @@ def each_batch(min_bytes: 1, max_wait_time: 1, automatically_mark_as_processed:
 
           return if !@running
         end
+
+        # We may not have received any messages, but it's still a good idea to
+        # commit offsets if we've processed messages in the last set of batches.
+        # This also ensures the offsets are retained if we haven't read any messages
+        # since the offset retention period has elapsed.
+        @offset_manager.commit_offsets_if_necessary
       end
     end
 
@@ -370,15 +400,13 @@ def join_group
       end
     end
 
-    def fetch_batches(min_bytes:, max_wait_time:)
+    def fetch_batches(min_bytes:, max_wait_time:, automatically_mark_as_processed:)
       join_group unless @group.member?
 
       subscribed_partitions = @group.subscribed_partitions
 
       @heartbeat.send_if_necessary
 
-      raise NoPartitionsAssignedError if subscribed_partitions.empty?
-
       operation = FetchOperation.new(
         cluster: @cluster,
         logger: @logger,
@@ -388,7 +416,18 @@ def fetch_batches(min_bytes:, max_wait_time:)
 
       subscribed_partitions.each do |topic, partitions|
         partitions.each do |partition|
-          offset = @offset_manager.next_offset_for(topic, partition)
+          if automatically_mark_as_processed
+            offset = @offset_manager.next_offset_for(topic, partition)
+          else
+            # When automatic marking is off, the first poll needs to be based on the last committed
+            # offset from Kafka, that's why we fallback in case of nil (it may not be 0)
+            if @current_offsets[topic].key?(partition)
+              offset = @current_offsets[topic][partition] + 1
+            else
+              offset = @offset_manager.next_offset_for(topic, partition)
+            end
+          end
+
           max_bytes = @max_bytes.fetch(topic)
 
           if paused?(topic, partition)
@@ -401,6 +440,13 @@ def fetch_batches(min_bytes:, max_wait_time:)
       end
 
       operation.execute
+    rescue NoPartitionsToFetchFrom
+      backoff = max_wait_time > 0 ? max_wait_time : 1
+
+      @logger.info "There are no partitions to fetch from, sleeping for #{backoff}s"
+      sleep backoff
+
+      retry
     rescue OffsetOutOfRange => e
       @logger.error "Invalid offset for #{e.topic}/#{e.partition}, resetting to default offset"
 
 
@@ -46,6 +46,10 @@ def execute
 
       topics_by_broker = {}
 
+      if @topics.none? {|topic, partitions| partitions.any? }
+        raise NoPartitionsToFetchFrom
+      end
+
       @topics.each do |topic, partitions|
         partitions.each do |partition, options|
           broker = @cluster.get_leader(topic, partition)
@@ -90,6 +94,7 @@ def execute
                 topic: fetched_topic.name,
                 partition: fetched_partition.partition,
                 offset: message.offset,
+                create_time: message.create_time,
               )
             }