Skip to content

Commit

Permalink
Fix reading strings in proper character sets
Browse files Browse the repository at this point in the history
  • Loading branch information
jeremycole committed Aug 3, 2023
1 parent b8e8960 commit eec600a
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 20 deletions.
44 changes: 28 additions & 16 deletions lib/mysql_binlog/binlog_event_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ def event_header
def format_description_event(header)
fields = {}
fields[:binlog_version] = parser.read_uint16
fields[:server_version] = parser.read_nstringz(50)
fields[:server_version] = parser.read_nstringz(50).force_encoding("UTF-8")
fields[:create_timestamp] = parser.read_uint32
fields[:header_length] = parser.read_uint8
fields
Expand All @@ -239,7 +239,7 @@ def rotate_event(header)
fields = {}
fields[:pos] = parser.read_uint64
name_length = reader.remaining(header)
fields[:name] = parser.read_nstring(name_length)
fields[:name] = parser.read_nstring(name_length).force_encoding("UTF-8")
fields
end

Expand All @@ -255,7 +255,7 @@ def _query_event_status_updated_db_names
break if c == "\0"
db_name << c
end
db_names << db_name
db_names << db_name.force_encoding("UTF-8")
end

db_names
Expand All @@ -281,7 +281,7 @@ def _query_event_status(header, fields)
when :sql_mode
parser.read_uint64
when :catalog_deprecated
parser.read_lpstringz
parser.read_lpstringz.force_encoding("UTF-8")
when :auto_increment
{
:increment => parser.read_uint16,
Expand All @@ -294,9 +294,9 @@ def _query_event_status(header, fields)
:collation_server => COLLATION[parser.read_uint16],
}
when :time_zone
parser.read_lpstring
parser.read_lpstring.force_encoding("UTF-8")
when :catalog
parser.read_lpstring
parser.read_lpstring.force_encoding("UTF-8")
when :lc_time_names
parser.read_uint16
when :charset_database
Expand All @@ -307,8 +307,8 @@ def _query_event_status(header, fields)
parser.read_uint32
when :invoker
{
:user => parser.read_lpstring,
:host => parser.read_lpstring,
:user => parser.read_lpstring.force_encoding("UTF-8"),
:host => parser.read_lpstring.force_encoding("UTF-8"),
}
when :updated_db_names
_query_event_status_updated_db_names
Expand Down Expand Up @@ -344,9 +344,20 @@ def query_event(header)
db_length = parser.read_uint8
fields[:error_code] = parser.read_uint16
fields[:status] = _query_event_status(header, fields)
fields[:db] = parser.read_nstringz(db_length + 1)
fields[:db] = parser.read_nstringz(db_length + 1).force_encoding("UTF-8")
query_length = reader.remaining(header)
fields[:query] = reader.read([query_length, binlog.max_query_length].min)

character_set = fields[:status][:charset][:character_set_client][:character_set]
query = reader.read([query_length, binlog.max_query_length].min)
case character_set
when :utf8mb4, :utf8
fields[:query] = query.force_encoding("UTF-8")
when :latin1 # Note: Used for internally-generated query events such as "BEGIN".
fields[:query] = query.force_encoding("ISO-8859-1")
else
raise "Unsupported character set #{character_set} for query event in #{reader.filename} at offset #{reader.position}"
end

fields
end

Expand Down Expand Up @@ -445,8 +456,8 @@ def table_map_event(header)
fields[:table_id] = parser.read_uint48
fields[:flags] = parser.read_uint_bitmap_by_size_and_name(2, TABLE_MAP_EVENT_FLAGS)
map_entry = @table_map[fields[:table_id]] = {}
map_entry[:db] = parser.read_lpstringz
map_entry[:table] = parser.read_lpstringz
map_entry[:db] = parser.read_lpstringz.force_encoding("UTF-8")
map_entry[:table] = parser.read_lpstringz.force_encoding("UTF-8")
columns = parser.read_varint
columns_type = parser.read_uint8_array(columns).map { |c| MYSQL_TYPES[c] || "unknown_#{c}".to_sym }
columns_metadata = _table_map_event_column_metadata(columns_type)
Expand Down Expand Up @@ -495,9 +506,9 @@ def table_metadata_event(header)
:character_set => COLLATION[parser.read_uint16],
:flags => parser.read_uint_bitmap_by_size_and_name(2,
TABLE_METADATA_EVENT_COLUMN_FLAGS),
:name => parser.read_varstring,
:type_name => parser.read_varstring,
:comment => parser.read_varstring,
:name => parser.read_varstring.force_encoding("UTF-8"),
:type_name => parser.read_varstring.force_encoding("UTF-8"),
:comment => parser.read_varstring.force_encoding("UTF-8"),
}
end
fields
Expand Down Expand Up @@ -639,6 +650,7 @@ def generic_rows_event_v2(header)

def rows_query_log_event(header)
reader.read(1) # skip useless byte length which is unused
# TODO: What character set?
{ query: reader.read(header[:payload_length]-1) }
end

Expand Down Expand Up @@ -678,7 +690,7 @@ def previous_gtids_log_event(header)

def gtid_log_event(header)
flags = parser.read_uint8
sid = parser.read_nstring(16)
sid = parser.read_nstring(16).force_encoding("UTF-8")
gno = parser.read_uint64
lts_type = parser.read_uint8
lts_last_committed = parser.read_uint64
Expand Down
10 changes: 6 additions & 4 deletions lib/mysql_binlog/binlog_field_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -452,12 +452,14 @@ def read_mysql_type(type, metadata=nil)
when :double
read_double
when :var_string
read_varstring
read_varstring.force_encoding('UTF-8')
when :varchar, :string
prefix_size = (metadata[:max_length] > 255) ? 2 : 1
read_lpstring(prefix_size)
when :blob, :geometry, :json
read_lpstring(metadata[:length_size])
read_lpstring(prefix_size).force_encoding('UTF-8')
when :blob, :geometry
read_lpstring(metadata[:length_size]).force_encoding('binary')
when :json
read_lpstring(metadata[:length_size]).force_encoding('UTF-8')
when :timestamp
read_uint32
when :timestamp2
Expand Down

0 comments on commit eec600a

Please sign in to comment.