From 3520318c3984eb5a413edbbdb2508de4cba48e94 Mon Sep 17 00:00:00 2001 From: Aaron Patterson Date: Wed, 13 Apr 2016 17:05:06 -0700 Subject: [PATCH 1/6] move the decoder callbacks to the buf pointer this way we can configure the callbacks to something else at runtime. --- ext/bert/c/decode.c | 9 +++++++-- test/bert_test.rb | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/ext/bert/c/decode.c b/ext/bert/c/decode.c index dee9926..c2a8cbd 100644 --- a/ext/bert/c/decode.c +++ b/ext/bert/c/decode.c @@ -23,9 +23,14 @@ static VALUE rb_mBERT; static VALUE rb_cDecode; static VALUE rb_cTuple; +typedef struct bert_buf bert_buf; + +typedef VALUE (*bert_ptr)(struct bert_buf *buf); + struct bert_buf { const uint8_t *data; const uint8_t *end; + bert_ptr *callbacks; }; static VALUE bert_read_invalid(struct bert_buf *buf); @@ -43,7 +48,6 @@ static VALUE bert_read_bin(struct bert_buf *buf); static VALUE bert_read_sbignum(struct bert_buf *buf); static VALUE bert_read_lbignum(struct bert_buf *buf); -typedef VALUE (*bert_ptr)(struct bert_buf *buf); static bert_ptr bert_callbacks[] = { &bert_read_sint, &bert_read_int, @@ -105,7 +109,7 @@ static VALUE bert_read(struct bert_buf *buf) if (!BERT_VALID_TYPE(type)) rb_raise(rb_eRuntimeError, "Invalid tag '%d' for term", type); - return bert_callbacks[type - BERT_TYPE_OFFSET](buf); + return buf->callbacks[type - BERT_TYPE_OFFSET](buf); } static VALUE bert_read_dict(struct bert_buf *buf) @@ -477,6 +481,7 @@ static VALUE rb_bert_decode(VALUE klass, VALUE rb_string) if (bert_buf_read8(&buf) != ERL_VERSION) rb_raise(rb_eTypeError, "Invalid magic value for BERT string"); + buf.callbacks = bert_callbacks; return bert_read(&buf); } diff --git a/test/bert_test.rb b/test/bert_test.rb index 10d3f46..78746c0 100644 --- a/test/bert_test.rb +++ b/test/bert_test.rb @@ -5,7 +5,7 @@ class BertTest < Test::Unit::TestCase setup do time = Time.at(1254976067) @ruby = t[:user, {:name => 'TPW'}, [/cat/i, 9.9], time, nil, true, false, :true, :false] - @bert = "\203h\td\000\004userh\003d\000\004bertd\000\004dictl\000\000\000\001h\002d\000\004namem\000\000\000\003TPWjl\000\000\000\002h\004d\000\004bertd\000\005regexm\000\000\000\003catl\000\000\000\001d\000\bcaselessjc9.900000000000000e+00\000\000\000\000\000\000\000\000\000\000jh\005d\000\004bertd\000\004timeb\000\000\004\346b\000\016\344\303a\000h\002d\000\004bertd\000\003nilh\002d\000\004bertd\000\004trueh\002d\000\004bertd\000\005falsed\000\004trued\000\005false" + @bert = "\203h\td\000\004userh\003d\000\004bertd\000\004dictl\000\000\000\001h\002d\000\004namem\000\000\000\003TPWjl\000\000\000\002h\004d\000\004bertd\000\005regexm\000\000\000\003catl\000\000\000\001d\000\bcaselessjc9.900000000000000e+00\000\000\000\000\000\000\000\000\000\000jh\005d\000\004bertd\000\004timeb\000\000\004\346b\000\016\344\303a\000h\002d\000\004bertd\000\003nilh\002d\000\004bertd\000\004trueh\002d\000\004bertd\000\005falsed\000\004trued\000\005false".b @ebin = "<<131,104,9,100,0,4,117,115,101,114,104,3,100,0,4,98,101,114,116,100,0,4,100,105,99,116,108,0,0,0,1,104,2,100,0,4,110,97,109,101,109,0,0,0,3,84,80,87,106,108,0,0,0,2,104,4,100,0,4,98,101,114,116,100,0,5,114,101,103,101,120,109,0,0,0,3,99,97,116,108,0,0,0,1,100,0,8,99,97,115,101,108,101,115,115,106,99,57,46,57,48,48,48,48,48,48,48,48,48,48,48,48,48,48,101,43,48,48,0,0,0,0,0,0,0,0,0,0,106,104,5,100,0,4,98,101,114,116,100,0,4,116,105,109,101,98,0,0,4,230,98,0,14,228,195,97,0,104,2,100,0,4,98,101,114,116,100,0,3,110,105,108,104,2,100,0,4,98,101,114,116,100,0,4,116,114,117,101,104,2,100,0,4,98,101,114,116,100,0,5,102,97,108,115,101,100,0,4,116,114,117,101,100,0,5,102,97,108,115,101>>" end From 70fd1ff6c030873833b0265748fdde440d40430a Mon Sep 17 00:00:00 2001 From: Aaron Patterson Date: Wed, 13 Apr 2016 17:57:00 -0700 Subject: [PATCH 2/6] make BERT aware of string encodings This adds an encoding field after the string so that you can apply an encoding to the string sent across the wire. --- ext/bert/c/decode.c | 53 +++++++++++++++++++++++++++++++++++++++++--- lib/bert.rb | 3 ++- lib/bert/decode.rb | 26 +++++++++++++++++++--- lib/bert/encode.rb | 6 ++++- lib/bert/types.rb | 3 ++- test/bert_test.rb | 24 ++++++++++++++++++-- test/encoder_test.rb | 9 ++++---- 7 files changed, 109 insertions(+), 15 deletions(-) diff --git a/ext/bert/c/decode.c b/ext/bert/c/decode.c index c2a8cbd..4368816 100644 --- a/ext/bert/c/decode.c +++ b/ext/bert/c/decode.c @@ -1,4 +1,5 @@ #include "ruby.h" +#include "ruby/encoding.h" #include #include @@ -15,6 +16,7 @@ #define ERL_SMALL_BIGNUM 110 #define ERL_LARGE_BIGNUM 111 #define ERL_VERSION 131 +#define ERL_VERSION2 132 #define BERT_VALID_TYPE(t) ((t) >= ERL_SMALL_INT && (t) <= ERL_LARGE_BIGNUM) #define BERT_TYPE_OFFSET (ERL_SMALL_INT) @@ -45,6 +47,7 @@ static VALUE bert_read_nil(struct bert_buf *buf); static VALUE bert_read_string(struct bert_buf *buf); static VALUE bert_read_list(struct bert_buf *buf); static VALUE bert_read_bin(struct bert_buf *buf); +static VALUE bert_read_bin_v2(struct bert_buf *buf); static VALUE bert_read_sbignum(struct bert_buf *buf); static VALUE bert_read_lbignum(struct bert_buf *buf); @@ -66,6 +69,24 @@ static bert_ptr bert_callbacks[] = { &bert_read_lbignum }; +static bert_ptr bert_callbacks_v2[] = { + &bert_read_sint, + &bert_read_int, + &bert_read_float, + &bert_read_atom, + &bert_read_invalid, + &bert_read_invalid, + &bert_read_invalid, + &bert_read_stuple, + &bert_read_ltuple, + &bert_read_nil, + &bert_read_string, + &bert_read_list, + &bert_read_bin_v2, + &bert_read_sbignum, + &bert_read_lbignum +}; + static inline uint8_t bert_buf_read8(struct bert_buf *buf) { return *buf->data++; @@ -297,6 +318,24 @@ static VALUE bert_read_bin(struct bert_buf *buf) return rb_bin; } +static VALUE bert_read_bin_v2(struct bert_buf *buf) +{ + uint8_t type; + VALUE rb_bin, enc; + + rb_bin = bert_read_bin(buf); + + bert_buf_ensure(buf, 1); + type = bert_buf_read8(buf); + if (ERL_BIN != type) + rb_raise(rb_eRuntimeError, "Invalid tag '%d' for term", type); + + enc = bert_read_bin(buf); + rb_enc_associate(rb_bin, rb_find_encoding(enc)); + + return rb_bin; +} + static VALUE bert_read_string(struct bert_buf *buf) { uint16_t i, length; @@ -471,6 +510,7 @@ static VALUE bert_read_invalid(struct bert_buf *buf) static VALUE rb_bert_decode(VALUE klass, VALUE rb_string) { struct bert_buf buf; + uint8_t proto_version; Check_Type(rb_string, T_STRING); buf.data = (uint8_t *)RSTRING_PTR(rb_string); @@ -478,10 +518,17 @@ static VALUE rb_bert_decode(VALUE klass, VALUE rb_string) bert_buf_ensure(&buf, 1); - if (bert_buf_read8(&buf) != ERL_VERSION) + proto_version = bert_buf_read8(&buf); + switch(proto_version) { + case ERL_VERSION: + buf.callbacks = bert_callbacks; + break; + case ERL_VERSION2: + buf.callbacks = bert_callbacks_v2; + break; + default: rb_raise(rb_eTypeError, "Invalid magic value for BERT string"); - - buf.callbacks = bert_callbacks; + } return bert_read(&buf); } diff --git a/lib/bert.rb b/lib/bert.rb index 527113b..e4c8e66 100644 --- a/lib/bert.rb +++ b/lib/bert.rb @@ -8,6 +8,7 @@ begin # try to load the C extension + raise LoadError require 'bert/c/decode' rescue LoadError # fall back on the pure ruby version @@ -22,4 +23,4 @@ # Global method for specifying that an array should be encoded as a tuple. def t BERT::Tuple -end \ No newline at end of file +end diff --git a/lib/bert/decode.rb b/lib/bert/decode.rb index 8fe056e..b9b2369 100644 --- a/lib/bert/decode.rb +++ b/lib/bert/decode.rb @@ -3,6 +3,14 @@ class Decode attr_accessor :in include Types + class V1 < Decode + def read_bin + fail("Invalid Type, not an erlang binary") unless read_1 == BIN + length = read_4 + read_string(length) + end + end + def self.impl 'Ruby' end @@ -10,7 +18,15 @@ def self.impl def self.decode(string) io = StringIO.new(string) io.set_encoding('binary') if io.respond_to?(:set_encoding) - new(io).read_any + header = io.getbyte + case header + when MAGIC + Decode::V1.new(io).read_any + when VERSION_2 + new(io).read_any + else + fail("Bad Magic") + end end def initialize(ins) @@ -19,7 +35,6 @@ def initialize(ins) end def read_any - fail("Bad Magic") unless read_1 == MAGIC read_any_raw end @@ -240,7 +255,12 @@ def read_list def read_bin fail("Invalid Type, not an erlang binary") unless read_1 == BIN length = read_4 - read_string(length) + x = read_string(length) + + fail("Invalid Type, not an erlang binary") unless read_1 == BIN + length = read_4 + x.force_encoding read_string(length) + x end def fail(str) diff --git a/lib/bert/encode.rb b/lib/bert/encode.rb index ddcc895..381cf49 100644 --- a/lib/bert/encode.rb +++ b/lib/bert/encode.rb @@ -16,7 +16,7 @@ def self.encode(data) end def write_any obj - write_1 MAGIC + write_1 VERSION_2 write_any_raw obj end @@ -128,6 +128,10 @@ def write_binary(data) write_1 BIN write_4 data.bytesize write_string data + enc = data.encoding.name + write_1 BIN + write_4 enc.bytesize + write_string enc end private diff --git a/lib/bert/types.rb b/lib/bert/types.rb index 09bcd00..257b834 100644 --- a/lib/bert/types.rb +++ b/lib/bert/types.rb @@ -15,7 +15,8 @@ module Types FUN = 117 NEW_FUN = 112 MAGIC = 131 + VERSION_2 = 132 MAX_INT = (1 << 27) -1 MIN_INT = -(1 << 27) end -end \ No newline at end of file +end diff --git a/test/bert_test.rb b/test/bert_test.rb index 78746c0..f908917 100644 --- a/test/bert_test.rb +++ b/test/bert_test.rb @@ -5,8 +5,28 @@ class BertTest < Test::Unit::TestCase setup do time = Time.at(1254976067) @ruby = t[:user, {:name => 'TPW'}, [/cat/i, 9.9], time, nil, true, false, :true, :false] - @bert = "\203h\td\000\004userh\003d\000\004bertd\000\004dictl\000\000\000\001h\002d\000\004namem\000\000\000\003TPWjl\000\000\000\002h\004d\000\004bertd\000\005regexm\000\000\000\003catl\000\000\000\001d\000\bcaselessjc9.900000000000000e+00\000\000\000\000\000\000\000\000\000\000jh\005d\000\004bertd\000\004timeb\000\000\004\346b\000\016\344\303a\000h\002d\000\004bertd\000\003nilh\002d\000\004bertd\000\004trueh\002d\000\004bertd\000\005falsed\000\004trued\000\005false".b - @ebin = "<<131,104,9,100,0,4,117,115,101,114,104,3,100,0,4,98,101,114,116,100,0,4,100,105,99,116,108,0,0,0,1,104,2,100,0,4,110,97,109,101,109,0,0,0,3,84,80,87,106,108,0,0,0,2,104,4,100,0,4,98,101,114,116,100,0,5,114,101,103,101,120,109,0,0,0,3,99,97,116,108,0,0,0,1,100,0,8,99,97,115,101,108,101,115,115,106,99,57,46,57,48,48,48,48,48,48,48,48,48,48,48,48,48,48,101,43,48,48,0,0,0,0,0,0,0,0,0,0,106,104,5,100,0,4,98,101,114,116,100,0,4,116,105,109,101,98,0,0,4,230,98,0,14,228,195,97,0,104,2,100,0,4,98,101,114,116,100,0,3,110,105,108,104,2,100,0,4,98,101,114,116,100,0,4,116,114,117,101,104,2,100,0,4,98,101,114,116,100,0,5,102,97,108,115,101,100,0,4,116,114,117,101,100,0,5,102,97,108,115,101>>" + @bert = "\x84h\td\x00\x04userh\x03d\x00\x04bertd\x00\x04dictl\x00\x00\x00\x01h\x02d\x00\x04namem\x00\x00\x00\x03TPWm\x00\x00\x00\x05UTF-8jl\x00\x00\x00\x02h\x04d\x00\x04bertd\x00\x05regexm\x00\x00\x00\x03catm\x00\x00\x00\bUS-ASCIIl\x00\x00\x00\x01d\x00\bcaselessjc9.900000000000000e+00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00jh\x05d\x00\x04bertd\x00\x04timeb\x00\x00\x04\xE6b\x00\x0E\xE4\xC3a\x00h\x02d\x00\x04bertd\x00\x03nilh\x02d\x00\x04bertd\x00\x04trueh\x02d\x00\x04bertd\x00\x05falsed\x00\x04trued\x00\x05false".b + @bert_old = "\203h\td\000\004userh\003d\000\004bertd\000\004dictl\000\000\000\001h\002d\000\004namem\000\000\000\003TPWjl\000\000\000\002h\004d\000\004bertd\000\005regexm\000\000\000\003catl\000\000\000\001d\000\bcaselessjc9.900000000000000e+00\000\000\000\000\000\000\000\000\000\000jh\005d\000\004bertd\000\004timeb\000\000\004\346b\000\016\344\303a\000h\002d\000\004bertd\000\003nilh\002d\000\004bertd\000\004trueh\002d\000\004bertd\000\005falsed\000\004trued\000\005false".b + @ebin = "<<132,104,9,100,0,4,117,115,101,114,104,3,100,0,4,98,101,114,116,100,0,4,100,105,99,116,108,0,0,0,1,104,2,100,0,4,110,97,109,101,109,0,0,0,3,84,80,87,109,0,0,0,5,85,84,70,45,56,106,108,0,0,0,2,104,4,100,0,4,98,101,114,116,100,0,5,114,101,103,101,120,109,0,0,0,3,99,97,116,109,0,0,0,8,85,83,45,65,83,67,73,73,108,0,0,0,1,100,0,8,99,97,115,101,108,101,115,115,106,99,57,46,57,48,48,48,48,48,48,48,48,48,48,48,48,48,48,101,43,48,48,0,0,0,0,0,0,0,0,0,0,106,104,5,100,0,4,98,101,114,116,100,0,4,116,105,109,101,98,0,0,4,230,98,0,14,228,195,97,0,104,2,100,0,4,98,101,114,116,100,0,3,110,105,108,104,2,100,0,4,98,101,114,116,100,0,4,116,114,117,101,104,2,100,0,4,98,101,114,116,100,0,5,102,97,108,115,101,100,0,4,116,114,117,101,100,0,5,102,97,108,115,101>>" + @ebin_old = "<<131,104,9,100,0,4,117,115,101,114,104,3,100,0,4,98,101,114,116,100,0,4,100,105,99,116,108,0,0,0,1,104,2,100,0,4,110,97,109,101,109,0,0,0,3,84,80,87,106,108,0,0,0,2,104,4,100,0,4,98,101,114,116,100,0,5,114,101,103,101,120,109,0,0,0,3,99,97,116,108,0,0,0,1,100,0,8,99,97,115,101,108,101,115,115,106,99,57,46,57,48,48,48,48,48,48,48,48,48,48,48,48,48,48,101,43,48,48,0,0,0,0,0,0,0,0,0,0,106,104,5,100,0,4,98,101,114,116,100,0,4,116,105,109,101,98,0,0,4,230,98,0,14,228,195,97,0,104,2,100,0,4,98,101,114,116,100,0,3,110,105,108,104,2,100,0,4,98,101,114,116,100,0,4,116,114,117,101,104,2,100,0,4,98,101,114,116,100,0,5,102,97,108,115,101,100,0,4,116,114,117,101,100,0,5,102,97,108,115,101>>" + end + + should "decode the old format" do + assert_equal @ruby, BERT.decode(@bert_old) + end + + should "roundtrip string and maintain encoding" do + str = "日本語".encode 'EUC-JP' + round = BERT.decode(BERT.encode(str)) + assert_equal str, round + assert_equal str.encoding, round.encoding + end + + should "roundtrip binary string" do + str = "日本語".b + round = BERT.decode(BERT.encode(str)) + assert_equal str, round + assert_equal str.encoding, round.encoding end should "encode" do diff --git a/test/encoder_test.rb b/test/encoder_test.rb index 75290a7..6b120fd 100644 --- a/test/encoder_test.rb +++ b/test/encoder_test.rb @@ -82,20 +82,21 @@ class EncoderTest < Test::Unit::TestCase end should 'handle utf8 strings' do - bert = [131, 109, 0, 0, 0, 5, 195, 169, 116, 195, 169].pack('C*') + str = "été".encode 'UTF-8' + bert = [132, 109, 0, 0, 0, 5, 195, 169, 116, 195, 169, 109, 0, 0, 0, 5, 85, 84, 70, 45, 56].pack('C*') assert_equal bert, BERT::Encoder.encode("été") end should 'handle utf8 symbols' do - bert = [131, 100, 0, 5, 195, 169, 116, 195, 169].pack('C*') + bert = [132, 100, 0, 5, 195, 169, 116, 195, 169].pack('C*') assert_equal bert, BERT::Encoder.encode(:'été') end should "handle bignums" do - bert = [131,110,8,0,0,0,232,137,4,35,199,138].pack('c*') + bert = [132,110,8,0,0,0,232,137,4,35,199,138].pack('c*') assert_equal bert, BERT::Encoder.encode(10_000_000_000_000_000_000) - bert = [131,110,8,1,0,0,232,137,4,35,199,138].pack('c*') + bert = [132,110,8,1,0,0,232,137,4,35,199,138].pack('c*') assert_equal bert, BERT::Encoder.encode(-10_000_000_000_000_000_000) end From 4e78dc448a415a54e2122aaf5051ab2d2c5b39fb Mon Sep 17 00:00:00 2001 From: Aaron Patterson Date: Wed, 13 Apr 2016 18:09:52 -0700 Subject: [PATCH 3/6] default encoder to version 1, enable version 2 with a flag This commit makes the encoder default to version 1 of the BERT encoding scheme, but allows you to turn on version 2 via a feature flag. --- lib/bert.rb | 1 - lib/bert/encode.rb | 39 ++++++++++++++++++++++++----- test/bert_test.rb | 59 +++++++++++++++++++++++++++----------------- test/encoder_test.rb | 38 +++++++++++++++++++++++++--- 4 files changed, 104 insertions(+), 33 deletions(-) diff --git a/lib/bert.rb b/lib/bert.rb index e4c8e66..08b3016 100644 --- a/lib/bert.rb +++ b/lib/bert.rb @@ -8,7 +8,6 @@ begin # try to load the C extension - raise LoadError require 'bert/c/decode' rescue LoadError # fall back on the pure ruby version diff --git a/lib/bert/encode.rb b/lib/bert/encode.rb index 381cf49..e243fae 100644 --- a/lib/bert/encode.rb +++ b/lib/bert/encode.rb @@ -2,6 +2,27 @@ module BERT class Encode include Types + class V2 < Encode + def write_binary(data) + super + enc = data.encoding.name + write_1 BIN + write_4 enc.bytesize + write_string enc + end + + private + + def version_header + VERSION_2 + end + end + + class << self + attr_accessor :version + end + self.version = :v1 + attr_accessor :out def initialize(out) @@ -11,12 +32,18 @@ def initialize(out) def self.encode(data) io = StringIO.new io.set_encoding('binary') if io.respond_to?(:set_encoding) - self.new(io).write_any(data) + + if version == :v2 + Encode::V2.new(io).write_any(data) + else + new(io).write_any(data) + end + io.string end def write_any obj - write_1 VERSION_2 + write_1 version_header write_any_raw obj end @@ -128,14 +155,14 @@ def write_binary(data) write_1 BIN write_4 data.bytesize write_string data - enc = data.encoding.name - write_1 BIN - write_4 enc.bytesize - write_string enc end private + def version_header + MAGIC + end + def fail(obj) raise "Cannot encode to erlang external format: #{obj.inspect}" end diff --git a/test/bert_test.rb b/test/bert_test.rb index f908917..a4be99a 100644 --- a/test/bert_test.rb +++ b/test/bert_test.rb @@ -5,40 +5,55 @@ class BertTest < Test::Unit::TestCase setup do time = Time.at(1254976067) @ruby = t[:user, {:name => 'TPW'}, [/cat/i, 9.9], time, nil, true, false, :true, :false] - @bert = "\x84h\td\x00\x04userh\x03d\x00\x04bertd\x00\x04dictl\x00\x00\x00\x01h\x02d\x00\x04namem\x00\x00\x00\x03TPWm\x00\x00\x00\x05UTF-8jl\x00\x00\x00\x02h\x04d\x00\x04bertd\x00\x05regexm\x00\x00\x00\x03catm\x00\x00\x00\bUS-ASCIIl\x00\x00\x00\x01d\x00\bcaselessjc9.900000000000000e+00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00jh\x05d\x00\x04bertd\x00\x04timeb\x00\x00\x04\xE6b\x00\x0E\xE4\xC3a\x00h\x02d\x00\x04bertd\x00\x03nilh\x02d\x00\x04bertd\x00\x04trueh\x02d\x00\x04bertd\x00\x05falsed\x00\x04trued\x00\x05false".b @bert_old = "\203h\td\000\004userh\003d\000\004bertd\000\004dictl\000\000\000\001h\002d\000\004namem\000\000\000\003TPWjl\000\000\000\002h\004d\000\004bertd\000\005regexm\000\000\000\003catl\000\000\000\001d\000\bcaselessjc9.900000000000000e+00\000\000\000\000\000\000\000\000\000\000jh\005d\000\004bertd\000\004timeb\000\000\004\346b\000\016\344\303a\000h\002d\000\004bertd\000\003nilh\002d\000\004bertd\000\004trueh\002d\000\004bertd\000\005falsed\000\004trued\000\005false".b - @ebin = "<<132,104,9,100,0,4,117,115,101,114,104,3,100,0,4,98,101,114,116,100,0,4,100,105,99,116,108,0,0,0,1,104,2,100,0,4,110,97,109,101,109,0,0,0,3,84,80,87,109,0,0,0,5,85,84,70,45,56,106,108,0,0,0,2,104,4,100,0,4,98,101,114,116,100,0,5,114,101,103,101,120,109,0,0,0,3,99,97,116,109,0,0,0,8,85,83,45,65,83,67,73,73,108,0,0,0,1,100,0,8,99,97,115,101,108,101,115,115,106,99,57,46,57,48,48,48,48,48,48,48,48,48,48,48,48,48,48,101,43,48,48,0,0,0,0,0,0,0,0,0,0,106,104,5,100,0,4,98,101,114,116,100,0,4,116,105,109,101,98,0,0,4,230,98,0,14,228,195,97,0,104,2,100,0,4,98,101,114,116,100,0,3,110,105,108,104,2,100,0,4,98,101,114,116,100,0,4,116,114,117,101,104,2,100,0,4,98,101,114,116,100,0,5,102,97,108,115,101,100,0,4,116,114,117,101,100,0,5,102,97,108,115,101>>" @ebin_old = "<<131,104,9,100,0,4,117,115,101,114,104,3,100,0,4,98,101,114,116,100,0,4,100,105,99,116,108,0,0,0,1,104,2,100,0,4,110,97,109,101,109,0,0,0,3,84,80,87,106,108,0,0,0,2,104,4,100,0,4,98,101,114,116,100,0,5,114,101,103,101,120,109,0,0,0,3,99,97,116,108,0,0,0,1,100,0,8,99,97,115,101,108,101,115,115,106,99,57,46,57,48,48,48,48,48,48,48,48,48,48,48,48,48,48,101,43,48,48,0,0,0,0,0,0,0,0,0,0,106,104,5,100,0,4,98,101,114,116,100,0,4,116,105,109,101,98,0,0,4,230,98,0,14,228,195,97,0,104,2,100,0,4,98,101,114,116,100,0,3,110,105,108,104,2,100,0,4,98,101,114,116,100,0,4,116,114,117,101,104,2,100,0,4,98,101,114,116,100,0,5,102,97,108,115,101,100,0,4,116,114,117,101,100,0,5,102,97,108,115,101>>" end - should "decode the old format" do - assert_equal @ruby, BERT.decode(@bert_old) - end + context "v2 encoder" do + setup do + @old_version = BERT::Encode.version + BERT::Encode.version = :v2 + @bert = "\x84h\td\x00\x04userh\x03d\x00\x04bertd\x00\x04dictl\x00\x00\x00\x01h\x02d\x00\x04namem\x00\x00\x00\x03TPWm\x00\x00\x00\x05UTF-8jl\x00\x00\x00\x02h\x04d\x00\x04bertd\x00\x05regexm\x00\x00\x00\x03catm\x00\x00\x00\bUS-ASCIIl\x00\x00\x00\x01d\x00\bcaselessjc9.900000000000000e+00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00jh\x05d\x00\x04bertd\x00\x04timeb\x00\x00\x04\xE6b\x00\x0E\xE4\xC3a\x00h\x02d\x00\x04bertd\x00\x03nilh\x02d\x00\x04bertd\x00\x04trueh\x02d\x00\x04bertd\x00\x05falsed\x00\x04trued\x00\x05false".b + @ebin = "<<132,104,9,100,0,4,117,115,101,114,104,3,100,0,4,98,101,114,116,100,0,4,100,105,99,116,108,0,0,0,1,104,2,100,0,4,110,97,109,101,109,0,0,0,3,84,80,87,109,0,0,0,5,85,84,70,45,56,106,108,0,0,0,2,104,4,100,0,4,98,101,114,116,100,0,5,114,101,103,101,120,109,0,0,0,3,99,97,116,109,0,0,0,8,85,83,45,65,83,67,73,73,108,0,0,0,1,100,0,8,99,97,115,101,108,101,115,115,106,99,57,46,57,48,48,48,48,48,48,48,48,48,48,48,48,48,48,101,43,48,48,0,0,0,0,0,0,0,0,0,0,106,104,5,100,0,4,98,101,114,116,100,0,4,116,105,109,101,98,0,0,4,230,98,0,14,228,195,97,0,104,2,100,0,4,98,101,114,116,100,0,3,110,105,108,104,2,100,0,4,98,101,114,116,100,0,4,116,114,117,101,104,2,100,0,4,98,101,114,116,100,0,5,102,97,108,115,101,100,0,4,116,114,117,101,100,0,5,102,97,108,115,101>>" + end - should "roundtrip string and maintain encoding" do - str = "日本語".encode 'EUC-JP' - round = BERT.decode(BERT.encode(str)) - assert_equal str, round - assert_equal str.encoding, round.encoding - end + teardown do + BERT::Encode.version = @old_version + end - should "roundtrip binary string" do - str = "日本語".b - round = BERT.decode(BERT.encode(str)) - assert_equal str, round - assert_equal str.encoding, round.encoding - end + should "decode new format" do + assert_equal @ruby, BERT.decode(@bert) + end - should "encode" do - assert_equal @bert, BERT.encode(@ruby) + should "roundtrip string and maintain encoding" do + str = "日本語".encode 'EUC-JP' + round = BERT.decode(BERT.encode(str)) + assert_equal str, round + assert_equal str.encoding, round.encoding + end + + should "roundtrip binary string" do + str = "日本語".b + round = BERT.decode(BERT.encode(str)) + assert_equal str, round + assert_equal str.encoding, round.encoding + end + + should "encode" do + assert_equal @bert, BERT.encode(@ruby) + end + + should "ebin" do + assert_equal @ebin, BERT.ebin(@bert) + end end - should "decode" do - assert_equal @ruby, BERT.decode(@bert) + should "decode the old format" do + assert_equal @ruby, BERT.decode(@bert_old) end should "ebin" do - assert_equal @ebin, BERT.ebin(@bert) + assert_equal @ebin_old, BERT.ebin(@bert_old) end should "do roundtrips" do diff --git a/test/encoder_test.rb b/test/encoder_test.rb index 6b120fd..1b6cb9b 100644 --- a/test/encoder_test.rb +++ b/test/encoder_test.rb @@ -83,23 +83,53 @@ class EncoderTest < Test::Unit::TestCase should 'handle utf8 strings' do str = "été".encode 'UTF-8' - bert = [132, 109, 0, 0, 0, 5, 195, 169, 116, 195, 169, 109, 0, 0, 0, 5, 85, 84, 70, 45, 56].pack('C*') + bert = [131, 109, 0, 0, 0, 5, 195, 169, 116, 195, 169].pack('C*') assert_equal bert, BERT::Encoder.encode("été") end should 'handle utf8 symbols' do - bert = [132, 100, 0, 5, 195, 169, 116, 195, 169].pack('C*') + bert = [131, 100, 0, 5, 195, 169, 116, 195, 169].pack('C*') assert_equal bert, BERT::Encoder.encode(:'été') end should "handle bignums" do - bert = [132,110,8,0,0,0,232,137,4,35,199,138].pack('c*') + bert = [131,110,8,0,0,0,232,137,4,35,199,138].pack('c*') assert_equal bert, BERT::Encoder.encode(10_000_000_000_000_000_000) - bert = [132,110,8,1,0,0,232,137,4,35,199,138].pack('c*') + bert = [131,110,8,1,0,0,232,137,4,35,199,138].pack('c*') assert_equal bert, BERT::Encoder.encode(-10_000_000_000_000_000_000) end + context "v2" do + setup do + @old_version = BERT::Encode.version + BERT::Encode.version = :v2 + end + + teardown do + BERT::Encode.version = @old_version + end + + should 'handle utf8 strings' do + str = "été".encode 'UTF-8' + bert = [132, 109, 0, 0, 0, 5, 195, 169, 116, 195, 169, 109, 0, 0, 0, 5, 85, 84, 70, 45, 56].pack('C*') + assert_equal bert, BERT::Encoder.encode("été") + end + + should 'handle utf8 symbols' do + bert = [132, 100, 0, 5, 195, 169, 116, 195, 169].pack('C*') + assert_equal bert, BERT::Encoder.encode(:'été') + end + + should "handle bignums" do + bert = [132,110,8,0,0,0,232,137,4,35,199,138].pack('c*') + assert_equal bert, BERT::Encoder.encode(10_000_000_000_000_000_000) + + bert = [132,110,8,1,0,0,232,137,4,35,199,138].pack('c*') + assert_equal bert, BERT::Encoder.encode(-10_000_000_000_000_000_000) + end + end + should "leave other stuff alone" do before = [1, 2.0, [:foo, 'bar']] assert_equal before, BERT::Encoder.convert(before) From aa084e74f02213ad2729823fa9fa2f8ab356f0fd Mon Sep 17 00:00:00 2001 From: Aaron Patterson Date: Thu, 14 Apr 2016 15:00:46 -0700 Subject: [PATCH 4/6] add two new types, unicode strings, and other encoded strings This commit adds two new types, one for unicode strings and one for other encoded strings. Unocide strings have no extra wire protocol overhead, where "other" strings send the encoding name along with the string. --- ext/bert/c/decode.c | 44 +++++++++++++++++++++----------------------- lib/bert/decode.rb | 32 ++++++++++++++++++++++++++++---- lib/bert/encode.rb | 26 +++++++++++++++++++++++--- lib/bert/types.rb | 3 ++- test/bert_test.rb | 4 ++-- test/encoder_test.rb | 2 +- 6 files changed, 77 insertions(+), 34 deletions(-) diff --git a/ext/bert/c/decode.c b/ext/bert/c/decode.c index 4368816..85c08e1 100644 --- a/ext/bert/c/decode.c +++ b/ext/bert/c/decode.c @@ -15,10 +15,12 @@ #define ERL_BIN 109 #define ERL_SMALL_BIGNUM 110 #define ERL_LARGE_BIGNUM 111 +#define ERL_ENC_STRING 112 +#define ERL_UNICODE_STRING 113 #define ERL_VERSION 131 #define ERL_VERSION2 132 -#define BERT_VALID_TYPE(t) ((t) >= ERL_SMALL_INT && (t) <= ERL_LARGE_BIGNUM) +#define BERT_VALID_TYPE(t) ((t) >= ERL_SMALL_INT && (t) <= ERL_UNICODE_STRING) #define BERT_TYPE_OFFSET (ERL_SMALL_INT) static VALUE rb_mBERT; @@ -47,7 +49,9 @@ static VALUE bert_read_nil(struct bert_buf *buf); static VALUE bert_read_string(struct bert_buf *buf); static VALUE bert_read_list(struct bert_buf *buf); static VALUE bert_read_bin(struct bert_buf *buf); -static VALUE bert_read_bin_v2(struct bert_buf *buf); +static VALUE bert_read_enc_string(struct bert_buf *buf); +static VALUE bert_read_unicode_string(struct bert_buf *buf); +static VALUE bert_read_unicode_string(struct bert_buf *buf); static VALUE bert_read_sbignum(struct bert_buf *buf); static VALUE bert_read_lbignum(struct bert_buf *buf); @@ -66,25 +70,9 @@ static bert_ptr bert_callbacks[] = { &bert_read_list, &bert_read_bin, &bert_read_sbignum, - &bert_read_lbignum -}; - -static bert_ptr bert_callbacks_v2[] = { - &bert_read_sint, - &bert_read_int, - &bert_read_float, - &bert_read_atom, - &bert_read_invalid, - &bert_read_invalid, - &bert_read_invalid, - &bert_read_stuple, - &bert_read_ltuple, - &bert_read_nil, - &bert_read_string, - &bert_read_list, - &bert_read_bin_v2, - &bert_read_sbignum, - &bert_read_lbignum + &bert_read_lbignum, + &bert_read_enc_string, + &bert_read_unicode_string }; static inline uint8_t bert_buf_read8(struct bert_buf *buf) @@ -318,7 +306,17 @@ static VALUE bert_read_bin(struct bert_buf *buf) return rb_bin; } -static VALUE bert_read_bin_v2(struct bert_buf *buf) +static VALUE bert_read_unicode_string(struct bert_buf *buf) +{ + VALUE rb_str; + + rb_str = bert_read_bin(buf); + rb_enc_associate(rb_str, rb_utf8_encoding()); + + return rb_str; +} + +static VALUE bert_read_enc_string(struct bert_buf *buf) { uint8_t type; VALUE rb_bin, enc; @@ -524,7 +522,7 @@ static VALUE rb_bert_decode(VALUE klass, VALUE rb_string) buf.callbacks = bert_callbacks; break; case ERL_VERSION2: - buf.callbacks = bert_callbacks_v2; + buf.callbacks = bert_callbacks; break; default: rb_raise(rb_eTypeError, "Invalid magic value for BERT string"); diff --git a/lib/bert/decode.rb b/lib/bert/decode.rb index b9b2369..04c62bf 100644 --- a/lib/bert/decode.rb +++ b/lib/bert/decode.rb @@ -9,6 +9,12 @@ def read_bin length = read_4 read_string(length) end + + def read_erl_string + fail("Invalid Type, not an erlang string") unless read_1 == STRING + length = read_2 + read_string(length).unpack('C' * length) + end end def self.impl @@ -52,6 +58,8 @@ def read_any_raw when STRING then read_erl_string when LIST then read_list when BIN then read_bin + when ENC_STRING then read_enc_string + when UNICODE_STRING then read_unicode_string else fail("Unknown term tag: #{peek_1}") end @@ -238,6 +246,14 @@ def read_nil [] end + def read_unicode_string + fail("Invalid Type, not a unicode string") unless read_1 == UNICODE_STRING + length = read_4 + str = read_string(length) + str.force_encoding "UTF-8" + str + end + def read_erl_string fail("Invalid Type, not an erlang string") unless read_1 == STRING length = read_2 @@ -255,6 +271,18 @@ def read_list def read_bin fail("Invalid Type, not an erlang binary") unless read_1 == BIN length = read_4 + read_string(length) + end + + def fail(str) + raise str + end + + private + + def read_enc_string + fail("Invalid Type, not an erlang binary") unless read_1 == ENC_STRING + length = read_4 x = read_string(length) fail("Invalid Type, not an erlang binary") unless read_1 == BIN @@ -262,9 +290,5 @@ def read_bin x.force_encoding read_string(length) x end - - def fail(str) - raise str - end end end diff --git a/lib/bert/encode.rb b/lib/bert/encode.rb index e243fae..4c610d7 100644 --- a/lib/bert/encode.rb +++ b/lib/bert/encode.rb @@ -4,15 +4,35 @@ class Encode class V2 < Encode def write_binary(data) - super + enc = data.encoding + case enc + when ::Encoding::UTF_8, ::Encoding::US_ASCII + write_unicode_string data + when ::Encoding::ASCII_8BIT + super + else + write_enc_string data + end + end + + private + + def write_unicode_string(data) + write_1 UNICODE_STRING + write_4 data.bytesize + write_string data + end + + def write_enc_string(data) + write_1 ENC_STRING + write_4 data.bytesize + write_string data enc = data.encoding.name write_1 BIN write_4 enc.bytesize write_string enc end - private - def version_header VERSION_2 end diff --git a/lib/bert/types.rb b/lib/bert/types.rb index 257b834..0b74abb 100644 --- a/lib/bert/types.rb +++ b/lib/bert/types.rb @@ -12,8 +12,9 @@ module Types STRING = 107 LIST = 108 BIN = 109 + ENC_STRING = 112 + UNICODE_STRING = 113 FUN = 117 - NEW_FUN = 112 MAGIC = 131 VERSION_2 = 132 MAX_INT = (1 << 27) -1 diff --git a/test/bert_test.rb b/test/bert_test.rb index a4be99a..04e608e 100644 --- a/test/bert_test.rb +++ b/test/bert_test.rb @@ -13,8 +13,8 @@ class BertTest < Test::Unit::TestCase setup do @old_version = BERT::Encode.version BERT::Encode.version = :v2 - @bert = "\x84h\td\x00\x04userh\x03d\x00\x04bertd\x00\x04dictl\x00\x00\x00\x01h\x02d\x00\x04namem\x00\x00\x00\x03TPWm\x00\x00\x00\x05UTF-8jl\x00\x00\x00\x02h\x04d\x00\x04bertd\x00\x05regexm\x00\x00\x00\x03catm\x00\x00\x00\bUS-ASCIIl\x00\x00\x00\x01d\x00\bcaselessjc9.900000000000000e+00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00jh\x05d\x00\x04bertd\x00\x04timeb\x00\x00\x04\xE6b\x00\x0E\xE4\xC3a\x00h\x02d\x00\x04bertd\x00\x03nilh\x02d\x00\x04bertd\x00\x04trueh\x02d\x00\x04bertd\x00\x05falsed\x00\x04trued\x00\x05false".b - @ebin = "<<132,104,9,100,0,4,117,115,101,114,104,3,100,0,4,98,101,114,116,100,0,4,100,105,99,116,108,0,0,0,1,104,2,100,0,4,110,97,109,101,109,0,0,0,3,84,80,87,109,0,0,0,5,85,84,70,45,56,106,108,0,0,0,2,104,4,100,0,4,98,101,114,116,100,0,5,114,101,103,101,120,109,0,0,0,3,99,97,116,109,0,0,0,8,85,83,45,65,83,67,73,73,108,0,0,0,1,100,0,8,99,97,115,101,108,101,115,115,106,99,57,46,57,48,48,48,48,48,48,48,48,48,48,48,48,48,48,101,43,48,48,0,0,0,0,0,0,0,0,0,0,106,104,5,100,0,4,98,101,114,116,100,0,4,116,105,109,101,98,0,0,4,230,98,0,14,228,195,97,0,104,2,100,0,4,98,101,114,116,100,0,3,110,105,108,104,2,100,0,4,98,101,114,116,100,0,4,116,114,117,101,104,2,100,0,4,98,101,114,116,100,0,5,102,97,108,115,101,100,0,4,116,114,117,101,100,0,5,102,97,108,115,101>>" + @bert = "\x84h\td\x00\x04userh\x03d\x00\x04bertd\x00\x04dictl\x00\x00\x00\x01h\x02d\x00\x04nameq\x00\x00\x00\x03TPWjl\x00\x00\x00\x02h\x04d\x00\x04bertd\x00\x05regexq\x00\x00\x00\x03catl\x00\x00\x00\x01d\x00\bcaselessjc9.900000000000000e+00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00jh\x05d\x00\x04bertd\x00\x04timeb\x00\x00\x04\xE6b\x00\x0E\xE4\xC3a\x00h\x02d\x00\x04bertd\x00\x03nilh\x02d\x00\x04bertd\x00\x04trueh\x02d\x00\x04bertd\x00\x05falsed\x00\x04trued\x00\x05false".b + @ebin = "<<132,104,9,100,0,4,117,115,101,114,104,3,100,0,4,98,101,114,116,100,0,4,100,105,99,116,108,0,0,0,1,104,2,100,0,4,110,97,109,101,113,0,0,0,3,84,80,87,106,108,0,0,0,2,104,4,100,0,4,98,101,114,116,100,0,5,114,101,103,101,120,113,0,0,0,3,99,97,116,108,0,0,0,1,100,0,8,99,97,115,101,108,101,115,115,106,99,57,46,57,48,48,48,48,48,48,48,48,48,48,48,48,48,48,101,43,48,48,0,0,0,0,0,0,0,0,0,0,106,104,5,100,0,4,98,101,114,116,100,0,4,116,105,109,101,98,0,0,4,230,98,0,14,228,195,97,0,104,2,100,0,4,98,101,114,116,100,0,3,110,105,108,104,2,100,0,4,98,101,114,116,100,0,4,116,114,117,101,104,2,100,0,4,98,101,114,116,100,0,5,102,97,108,115,101,100,0,4,116,114,117,101,100,0,5,102,97,108,115,101>>" end teardown do diff --git a/test/encoder_test.rb b/test/encoder_test.rb index 1b6cb9b..7d4c62c 100644 --- a/test/encoder_test.rb +++ b/test/encoder_test.rb @@ -112,7 +112,7 @@ class EncoderTest < Test::Unit::TestCase should 'handle utf8 strings' do str = "été".encode 'UTF-8' - bert = [132, 109, 0, 0, 0, 5, 195, 169, 116, 195, 169, 109, 0, 0, 0, 5, 85, 84, 70, 45, 56].pack('C*') + bert = [132, 113, 0, 0, 0, 5, 195, 169, 116, 195, 169].pack('C*') assert_equal bert, BERT::Encoder.encode("été") end From 31ab6593386f44eae71b681abd0c6dd2a9e030ca Mon Sep 17 00:00:00 2001 From: Aaron Patterson Date: Thu, 14 Apr 2016 15:02:07 -0700 Subject: [PATCH 5/6] reduce diff --- ext/bert/c/decode.c | 23 ++++++----------------- lib/bert/decode.rb | 18 +----------------- 2 files changed, 7 insertions(+), 34 deletions(-) diff --git a/ext/bert/c/decode.c b/ext/bert/c/decode.c index 85c08e1..3e492f0 100644 --- a/ext/bert/c/decode.c +++ b/ext/bert/c/decode.c @@ -27,14 +27,9 @@ static VALUE rb_mBERT; static VALUE rb_cDecode; static VALUE rb_cTuple; -typedef struct bert_buf bert_buf; - -typedef VALUE (*bert_ptr)(struct bert_buf *buf); - struct bert_buf { const uint8_t *data; const uint8_t *end; - bert_ptr *callbacks; }; static VALUE bert_read_invalid(struct bert_buf *buf); @@ -51,10 +46,10 @@ static VALUE bert_read_list(struct bert_buf *buf); static VALUE bert_read_bin(struct bert_buf *buf); static VALUE bert_read_enc_string(struct bert_buf *buf); static VALUE bert_read_unicode_string(struct bert_buf *buf); -static VALUE bert_read_unicode_string(struct bert_buf *buf); static VALUE bert_read_sbignum(struct bert_buf *buf); static VALUE bert_read_lbignum(struct bert_buf *buf); +typedef VALUE (*bert_ptr)(struct bert_buf *buf); static bert_ptr bert_callbacks[] = { &bert_read_sint, &bert_read_int, @@ -118,7 +113,7 @@ static VALUE bert_read(struct bert_buf *buf) if (!BERT_VALID_TYPE(type)) rb_raise(rb_eRuntimeError, "Invalid tag '%d' for term", type); - return buf->callbacks[type - BERT_TYPE_OFFSET](buf); + return bert_callbacks[type - BERT_TYPE_OFFSET](buf); } static VALUE bert_read_dict(struct bert_buf *buf) @@ -517,17 +512,11 @@ static VALUE rb_bert_decode(VALUE klass, VALUE rb_string) bert_buf_ensure(&buf, 1); proto_version = bert_buf_read8(&buf); - switch(proto_version) { - case ERL_VERSION: - buf.callbacks = bert_callbacks; - break; - case ERL_VERSION2: - buf.callbacks = bert_callbacks; - break; - default: - rb_raise(rb_eTypeError, "Invalid magic value for BERT string"); + if (proto_version == ERL_VERSION || proto_version == ERL_VERSION2) { + return bert_read(&buf); + } else { + rb_raise(rb_eTypeError, "Invalid magic value for BERT string"); } - return bert_read(&buf); } static VALUE rb_bert_impl(VALUE klass) diff --git a/lib/bert/decode.rb b/lib/bert/decode.rb index 04c62bf..b8aeeb2 100644 --- a/lib/bert/decode.rb +++ b/lib/bert/decode.rb @@ -3,20 +3,6 @@ class Decode attr_accessor :in include Types - class V1 < Decode - def read_bin - fail("Invalid Type, not an erlang binary") unless read_1 == BIN - length = read_4 - read_string(length) - end - - def read_erl_string - fail("Invalid Type, not an erlang string") unless read_1 == STRING - length = read_2 - read_string(length).unpack('C' * length) - end - end - def self.impl 'Ruby' end @@ -26,9 +12,7 @@ def self.decode(string) io.set_encoding('binary') if io.respond_to?(:set_encoding) header = io.getbyte case header - when MAGIC - Decode::V1.new(io).read_any - when VERSION_2 + when MAGIC, VERSION_2 new(io).read_any else fail("Bad Magic") From 3113c6f3953c2314cae2e08db4b6a40579a37b02 Mon Sep 17 00:00:00 2001 From: Aaron Patterson Date: Mon, 18 Apr 2016 08:19:34 -0700 Subject: [PATCH 6/6] add a comment about ERL types The two new types are extensions, so this commit adds a comment documenting what these extensions are for (namely so that we can support string encodings over the wire). --- ext/bert/c/decode.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/ext/bert/c/decode.c b/ext/bert/c/decode.c index 3e492f0..3b610f9 100644 --- a/ext/bert/c/decode.c +++ b/ext/bert/c/decode.c @@ -15,12 +15,17 @@ #define ERL_BIN 109 #define ERL_SMALL_BIGNUM 110 #define ERL_LARGE_BIGNUM 111 -#define ERL_ENC_STRING 112 -#define ERL_UNICODE_STRING 113 + +/* These two types are specific to version 2 of the protocol. They diverge + * from Erlang, but allow us to pass string encodings across the wire. */ +#define ERLEXT_ENC_STRING 112 +#define ERLEXT_UNICODE_STRING 113 + +/* Protocol version constants. */ #define ERL_VERSION 131 #define ERL_VERSION2 132 -#define BERT_VALID_TYPE(t) ((t) >= ERL_SMALL_INT && (t) <= ERL_UNICODE_STRING) +#define BERT_VALID_TYPE(t) ((t) >= ERL_SMALL_INT && (t) <= ERLEXT_UNICODE_STRING) #define BERT_TYPE_OFFSET (ERL_SMALL_INT) static VALUE rb_mBERT;