Skip to content

Commit 3f82e61

Browse files
author
Justin Marney
committed
Merge branch 'master' into jm-handle-replaced-coordinator
2 parents 9b6e9d9 + d08ab57 commit 3f82e61

27 files changed

+361
-30
lines changed

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,13 @@ Changes and additions to the library will be listed here.
44

55
## Unreleased
66

7+
## v0.5.0
8+
9+
- Drops support for Kafka 0.9 in favor of Kafka 0.10 (#381)!
10+
- Handle cases where there are no partitions to fetch from by sleeping a bit (#439).
11+
- Handle problems with the broker cache (#440).
12+
- Shut down more quickly (#438).
13+
714
## v0.4.3
815

916
- Restart the async producer thread automatically after errors.

circle.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ machine:
1111
dependencies:
1212
pre:
1313
- docker -v
14-
- docker pull ches/kafka:0.9.0.1
14+
- docker pull ches/kafka:0.10.0.0
1515
- docker pull jplock/zookeeper:3.4.6
1616

1717
test:

examples/consumer-group.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
consumer.subscribe(topic)
2121

2222
trap("TERM") { consumer.stop }
23+
trap("INT") { consumer.stop }
2324

2425
consumer.each_message do |message|
2526
puts message.value

lib/kafka.rb

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ def initialize(topic, partition, offset)
1717
end
1818
end
1919

20+
# A fetch operation was executed with no partitions specified.
21+
class NoPartitionsToFetchFrom < Error
22+
end
23+
2024
# Subclasses of this exception class map to an error code described in the
2125
# Kafka protocol specification.
2226
#
@@ -221,9 +225,6 @@ class OffsetCommitError < Error
221225
class FetchError < Error
222226
end
223227

224-
class NoPartitionsAssignedError < Error
225-
end
226-
227228
# Initializes a new Kafka client.
228229
#
229230
# @see Client#initialize

lib/kafka/broker.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ def initialize(connection:, node_id: nil, logger:)
1010
@logger = logger
1111
end
1212

13+
def address_match?(host, port)
14+
@connection.address_match?(host, port)
15+
end
16+
1317
# @return [String]
1418
def to_s
1519
"#{@connection} (node_id=#{@node_id.inspect})"

lib/kafka/broker_pool.rb

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,12 @@ def initialize(connection_builder:, logger:)
99
end
1010

1111
def connect(host, port, node_id: nil)
12-
return @brokers.fetch(node_id) if @brokers.key?(node_id)
12+
if @brokers.key?(node_id)
13+
broker = @brokers.fetch(node_id)
14+
return broker if broker.address_match?(host, port)
15+
broker.disconnect
16+
@brokers[node_id] = nil
17+
end
1318

1419
broker = Broker.new(
1520
connection: @connection_builder.build_connection(host, port),

lib/kafka/cluster.rb

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,20 @@ def get_group_coordinator(group_id:)
9393
Protocol.handle_error(response.error_code)
9494

9595
coordinator_id = response.coordinator_id
96-
coordinator = connect_to_broker(coordinator_id)
9796

98-
@logger.debug "Coordinator for group `#{group_id}` is #{coordinator}"
97+
@logger.debug "Coordinator for group `#{group_id}` is #{coordinator_id}. Connecting..."
98+
99+
# It's possible that a new broker is introduced to the cluster and
100+
# becomes the coordinator before we have a chance to refresh_metadata.
101+
coordinator = begin
102+
connect_to_broker(coordinator_id)
103+
rescue Kafka::NoSuchBroker
104+
@logger.debug "Broker #{coordinator_id} missing from broker cache, refreshing"
105+
refresh_metadata!
106+
connect_to_broker(coordinator_id)
107+
end
108+
109+
@logger.debug "Connected to coordinator: #{coordinator} for group `#{group_id}`"
99110

100111
return coordinator
101112
rescue GroupCoordinatorNotAvailable

lib/kafka/connection.rb

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@ def initialize(host:, port:, client_id:, logger:, instrumenter:, sasl_authentica
5959
@sasl_authenticator = sasl_authenticator
6060
end
6161

62+
def address_match?(host, port)
63+
@host == host && @port == port
64+
end
65+
6266
def to_s
6367
"#{@host}:#{@port}"
6468
end
@@ -105,10 +109,10 @@ def send_request(request)
105109

106110
response
107111
end
108-
rescue Errno::EPIPE, Errno::ECONNRESET, Errno::ETIMEDOUT, EOFError => e
112+
rescue SystemCallError, EOFError => e
109113
close
110114

111-
raise ConnectionError, "Connection error: #{e}"
115+
raise ConnectionError, "Connection error #{e.class}: #{e}"
112116
end
113117

114118
private

lib/kafka/consumer.rb

Lines changed: 52 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,17 @@ def initialize(cluster:, logger:, instrumenter:, group:, offset_manager:, sessio
5757

5858
# The maximum number of bytes to fetch from a single partition, by topic.
5959
@max_bytes = {}
60+
61+
# Hash containing offsets for each topic and partition that has the
62+
# automatically_mark_as_processed feature disabled. Offset manager is only active
63+
# when everything is suppose to happen automatically. Otherwise we need to keep track of the
64+
# offset manually in memory for all the time
65+
# The key structure for this equals an array with topic and partition [topic, partition]
66+
# The value is equal to the offset of the last message we've received
67+
# @note It won't be updated in case user marks message as processed, because for the case
68+
# when user commits message other than last in a batch, this would make ruby-kafka refetch
69+
# some already consumed messages
70+
@current_offsets = Hash.new { |h, k| h[k] = {} }
6071
end
6172

6273
# Subscribes the consumer to a topic.
@@ -94,6 +105,7 @@ def subscribe(topic, default_offset: nil, start_from_beginning: true, max_bytes_
94105
# @return [nil]
95106
def stop
96107
@running = false
108+
@cluster.disconnect
97109
end
98110

99111
# Pause processing of a specific topic partition.
@@ -180,7 +192,11 @@ def paused?(topic, partition)
180192
# @return [nil]
181193
def each_message(min_bytes: 1, max_wait_time: 1, automatically_mark_as_processed: true)
182194
consumer_loop do
183-
batches = fetch_batches(min_bytes: min_bytes, max_wait_time: max_wait_time)
195+
batches = fetch_batches(
196+
min_bytes: min_bytes,
197+
max_wait_time: max_wait_time,
198+
automatically_mark_as_processed: automatically_mark_as_processed
199+
)
184200

185201
batches.each do |batch|
186202
batch.messages.each do |message|
@@ -196,6 +212,7 @@ def each_message(min_bytes: 1, max_wait_time: 1, automatically_mark_as_processed
196212

197213
begin
198214
yield message
215+
@current_offsets[message.topic][message.partition] = message.offset
199216
rescue => e
200217
location = "#{message.topic}/#{message.partition} at offset #{message.offset}"
201218
backtrace = e.backtrace.join("\n")
@@ -216,6 +233,8 @@ def each_message(min_bytes: 1, max_wait_time: 1, automatically_mark_as_processed
216233

217234
# We may not have received any messages, but it's still a good idea to
218235
# commit offsets if we've processed messages in the last set of batches.
236+
# This also ensures the offsets are retained if we haven't read any messages
237+
# since the offset retention period has elapsed.
219238
@offset_manager.commit_offsets_if_necessary
220239
end
221240
end
@@ -244,7 +263,11 @@ def each_message(min_bytes: 1, max_wait_time: 1, automatically_mark_as_processed
244263
# @return [nil]
245264
def each_batch(min_bytes: 1, max_wait_time: 1, automatically_mark_as_processed: true)
246265
consumer_loop do
247-
batches = fetch_batches(min_bytes: min_bytes, max_wait_time: max_wait_time)
266+
batches = fetch_batches(
267+
min_bytes: min_bytes,
268+
max_wait_time: max_wait_time,
269+
automatically_mark_as_processed: automatically_mark_as_processed
270+
)
248271

249272
batches.each do |batch|
250273
unless batch.empty?
@@ -259,6 +282,7 @@ def each_batch(min_bytes: 1, max_wait_time: 1, automatically_mark_as_processed:
259282

260283
begin
261284
yield batch
285+
@current_offsets[batch.topic][batch.partition] = batch.last_offset
262286
rescue => e
263287
offset_range = (batch.first_offset..batch.last_offset)
264288
location = "#{batch.topic}/#{batch.partition} in offset range #{offset_range}"
@@ -279,6 +303,12 @@ def each_batch(min_bytes: 1, max_wait_time: 1, automatically_mark_as_processed:
279303

280304
return if !@running
281305
end
306+
307+
# We may not have received any messages, but it's still a good idea to
308+
# commit offsets if we've processed messages in the last set of batches.
309+
# This also ensures the offsets are retained if we haven't read any messages
310+
# since the offset retention period has elapsed.
311+
@offset_manager.commit_offsets_if_necessary
282312
end
283313
end
284314

@@ -370,15 +400,13 @@ def join_group
370400
end
371401
end
372402

373-
def fetch_batches(min_bytes:, max_wait_time:)
403+
def fetch_batches(min_bytes:, max_wait_time:, automatically_mark_as_processed:)
374404
join_group unless @group.member?
375405

376406
subscribed_partitions = @group.subscribed_partitions
377407

378408
@heartbeat.send_if_necessary
379409

380-
raise NoPartitionsAssignedError if subscribed_partitions.empty?
381-
382410
operation = FetchOperation.new(
383411
cluster: @cluster,
384412
logger: @logger,
@@ -388,7 +416,18 @@ def fetch_batches(min_bytes:, max_wait_time:)
388416

389417
subscribed_partitions.each do |topic, partitions|
390418
partitions.each do |partition|
391-
offset = @offset_manager.next_offset_for(topic, partition)
419+
if automatically_mark_as_processed
420+
offset = @offset_manager.next_offset_for(topic, partition)
421+
else
422+
# When automatic marking is off, the first poll needs to be based on the last committed
423+
# offset from Kafka, that's why we fallback in case of nil (it may not be 0)
424+
if @current_offsets[topic].key?(partition)
425+
offset = @current_offsets[topic][partition] + 1
426+
else
427+
offset = @offset_manager.next_offset_for(topic, partition)
428+
end
429+
end
430+
392431
max_bytes = @max_bytes.fetch(topic)
393432

394433
if paused?(topic, partition)
@@ -401,6 +440,13 @@ def fetch_batches(min_bytes:, max_wait_time:)
401440
end
402441

403442
operation.execute
443+
rescue NoPartitionsToFetchFrom
444+
backoff = max_wait_time > 0 ? max_wait_time : 1
445+
446+
@logger.info "There are no partitions to fetch from, sleeping for #{backoff}s"
447+
sleep backoff
448+
449+
retry
404450
rescue OffsetOutOfRange => e
405451
@logger.error "Invalid offset for #{e.topic}/#{e.partition}, resetting to default offset"
406452

lib/kafka/fetch_operation.rb

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ def execute
4646

4747
topics_by_broker = {}
4848

49+
if @topics.none? {|topic, partitions| partitions.any? }
50+
raise NoPartitionsToFetchFrom
51+
end
52+
4953
@topics.each do |topic, partitions|
5054
partitions.each do |partition, options|
5155
broker = @cluster.get_leader(topic, partition)
@@ -90,6 +94,7 @@ def execute
9094
topic: fetched_topic.name,
9195
partition: fetched_partition.partition,
9296
offset: message.offset,
97+
create_time: message.create_time,
9398
)
9499
}
95100

0 commit comments

Comments
 (0)