Skip to content

Commit 4e78dc4

Browse files
committed
default encoder to version 1, enable version 2 with a flag
This commit makes the encoder default to version 1 of the BERT encoding scheme, but allows you to turn on version 2 via a feature flag.
1 parent 70fd1ff commit 4e78dc4

File tree

4 files changed

+104
-33
lines changed

4 files changed

+104
-33
lines changed

lib/bert.rb

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
begin
1010
# try to load the C extension
11-
raise LoadError
1211
require 'bert/c/decode'
1312
rescue LoadError
1413
# fall back on the pure ruby version

lib/bert/encode.rb

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,27 @@ module BERT
22
class Encode
33
include Types
44

5+
class V2 < Encode
6+
def write_binary(data)
7+
super
8+
enc = data.encoding.name
9+
write_1 BIN
10+
write_4 enc.bytesize
11+
write_string enc
12+
end
13+
14+
private
15+
16+
def version_header
17+
VERSION_2
18+
end
19+
end
20+
21+
class << self
22+
attr_accessor :version
23+
end
24+
self.version = :v1
25+
526
attr_accessor :out
627

728
def initialize(out)
@@ -11,12 +32,18 @@ def initialize(out)
1132
def self.encode(data)
1233
io = StringIO.new
1334
io.set_encoding('binary') if io.respond_to?(:set_encoding)
14-
self.new(io).write_any(data)
35+
36+
if version == :v2
37+
Encode::V2.new(io).write_any(data)
38+
else
39+
new(io).write_any(data)
40+
end
41+
1542
io.string
1643
end
1744

1845
def write_any obj
19-
write_1 VERSION_2
46+
write_1 version_header
2047
write_any_raw obj
2148
end
2249

@@ -128,14 +155,14 @@ def write_binary(data)
128155
write_1 BIN
129156
write_4 data.bytesize
130157
write_string data
131-
enc = data.encoding.name
132-
write_1 BIN
133-
write_4 enc.bytesize
134-
write_string enc
135158
end
136159

137160
private
138161

162+
def version_header
163+
MAGIC
164+
end
165+
139166
def fail(obj)
140167
raise "Cannot encode to erlang external format: #{obj.inspect}"
141168
end

test/bert_test.rb

Lines changed: 37 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -5,40 +5,55 @@ class BertTest < Test::Unit::TestCase
55
setup do
66
time = Time.at(1254976067)
77
@ruby = t[:user, {:name => 'TPW'}, [/cat/i, 9.9], time, nil, true, false, :true, :false]
8-
@bert = "\x84h\td\x00\x04userh\x03d\x00\x04bertd\x00\x04dictl\x00\x00\x00\x01h\x02d\x00\x04namem\x00\x00\x00\x03TPWm\x00\x00\x00\x05UTF-8jl\x00\x00\x00\x02h\x04d\x00\x04bertd\x00\x05regexm\x00\x00\x00\x03catm\x00\x00\x00\bUS-ASCIIl\x00\x00\x00\x01d\x00\bcaselessjc9.900000000000000e+00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00jh\x05d\x00\x04bertd\x00\x04timeb\x00\x00\x04\xE6b\x00\x0E\xE4\xC3a\x00h\x02d\x00\x04bertd\x00\x03nilh\x02d\x00\x04bertd\x00\x04trueh\x02d\x00\x04bertd\x00\x05falsed\x00\x04trued\x00\x05false".b
98
@bert_old = "\203h\td\000\004userh\003d\000\004bertd\000\004dictl\000\000\000\001h\002d\000\004namem\000\000\000\003TPWjl\000\000\000\002h\004d\000\004bertd\000\005regexm\000\000\000\003catl\000\000\000\001d\000\bcaselessjc9.900000000000000e+00\000\000\000\000\000\000\000\000\000\000jh\005d\000\004bertd\000\004timeb\000\000\004\346b\000\016\344\303a\000h\002d\000\004bertd\000\003nilh\002d\000\004bertd\000\004trueh\002d\000\004bertd\000\005falsed\000\004trued\000\005false".b
10-
@ebin = "<<132,104,9,100,0,4,117,115,101,114,104,3,100,0,4,98,101,114,116,100,0,4,100,105,99,116,108,0,0,0,1,104,2,100,0,4,110,97,109,101,109,0,0,0,3,84,80,87,109,0,0,0,5,85,84,70,45,56,106,108,0,0,0,2,104,4,100,0,4,98,101,114,116,100,0,5,114,101,103,101,120,109,0,0,0,3,99,97,116,109,0,0,0,8,85,83,45,65,83,67,73,73,108,0,0,0,1,100,0,8,99,97,115,101,108,101,115,115,106,99,57,46,57,48,48,48,48,48,48,48,48,48,48,48,48,48,48,101,43,48,48,0,0,0,0,0,0,0,0,0,0,106,104,5,100,0,4,98,101,114,116,100,0,4,116,105,109,101,98,0,0,4,230,98,0,14,228,195,97,0,104,2,100,0,4,98,101,114,116,100,0,3,110,105,108,104,2,100,0,4,98,101,114,116,100,0,4,116,114,117,101,104,2,100,0,4,98,101,114,116,100,0,5,102,97,108,115,101,100,0,4,116,114,117,101,100,0,5,102,97,108,115,101>>"
119
@ebin_old = "<<131,104,9,100,0,4,117,115,101,114,104,3,100,0,4,98,101,114,116,100,0,4,100,105,99,116,108,0,0,0,1,104,2,100,0,4,110,97,109,101,109,0,0,0,3,84,80,87,106,108,0,0,0,2,104,4,100,0,4,98,101,114,116,100,0,5,114,101,103,101,120,109,0,0,0,3,99,97,116,108,0,0,0,1,100,0,8,99,97,115,101,108,101,115,115,106,99,57,46,57,48,48,48,48,48,48,48,48,48,48,48,48,48,48,101,43,48,48,0,0,0,0,0,0,0,0,0,0,106,104,5,100,0,4,98,101,114,116,100,0,4,116,105,109,101,98,0,0,4,230,98,0,14,228,195,97,0,104,2,100,0,4,98,101,114,116,100,0,3,110,105,108,104,2,100,0,4,98,101,114,116,100,0,4,116,114,117,101,104,2,100,0,4,98,101,114,116,100,0,5,102,97,108,115,101,100,0,4,116,114,117,101,100,0,5,102,97,108,115,101>>"
1210
end
1311

14-
should "decode the old format" do
15-
assert_equal @ruby, BERT.decode(@bert_old)
16-
end
12+
context "v2 encoder" do
13+
setup do
14+
@old_version = BERT::Encode.version
15+
BERT::Encode.version = :v2
16+
@bert = "\x84h\td\x00\x04userh\x03d\x00\x04bertd\x00\x04dictl\x00\x00\x00\x01h\x02d\x00\x04namem\x00\x00\x00\x03TPWm\x00\x00\x00\x05UTF-8jl\x00\x00\x00\x02h\x04d\x00\x04bertd\x00\x05regexm\x00\x00\x00\x03catm\x00\x00\x00\bUS-ASCIIl\x00\x00\x00\x01d\x00\bcaselessjc9.900000000000000e+00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00jh\x05d\x00\x04bertd\x00\x04timeb\x00\x00\x04\xE6b\x00\x0E\xE4\xC3a\x00h\x02d\x00\x04bertd\x00\x03nilh\x02d\x00\x04bertd\x00\x04trueh\x02d\x00\x04bertd\x00\x05falsed\x00\x04trued\x00\x05false".b
17+
@ebin = "<<132,104,9,100,0,4,117,115,101,114,104,3,100,0,4,98,101,114,116,100,0,4,100,105,99,116,108,0,0,0,1,104,2,100,0,4,110,97,109,101,109,0,0,0,3,84,80,87,109,0,0,0,5,85,84,70,45,56,106,108,0,0,0,2,104,4,100,0,4,98,101,114,116,100,0,5,114,101,103,101,120,109,0,0,0,3,99,97,116,109,0,0,0,8,85,83,45,65,83,67,73,73,108,0,0,0,1,100,0,8,99,97,115,101,108,101,115,115,106,99,57,46,57,48,48,48,48,48,48,48,48,48,48,48,48,48,48,101,43,48,48,0,0,0,0,0,0,0,0,0,0,106,104,5,100,0,4,98,101,114,116,100,0,4,116,105,109,101,98,0,0,4,230,98,0,14,228,195,97,0,104,2,100,0,4,98,101,114,116,100,0,3,110,105,108,104,2,100,0,4,98,101,114,116,100,0,4,116,114,117,101,104,2,100,0,4,98,101,114,116,100,0,5,102,97,108,115,101,100,0,4,116,114,117,101,100,0,5,102,97,108,115,101>>"
18+
end
1719

18-
should "roundtrip string and maintain encoding" do
19-
str = "日本語".encode 'EUC-JP'
20-
round = BERT.decode(BERT.encode(str))
21-
assert_equal str, round
22-
assert_equal str.encoding, round.encoding
23-
end
20+
teardown do
21+
BERT::Encode.version = @old_version
22+
end
2423

25-
should "roundtrip binary string" do
26-
str = "日本語".b
27-
round = BERT.decode(BERT.encode(str))
28-
assert_equal str, round
29-
assert_equal str.encoding, round.encoding
30-
end
24+
should "decode new format" do
25+
assert_equal @ruby, BERT.decode(@bert)
26+
end
3127

32-
should "encode" do
33-
assert_equal @bert, BERT.encode(@ruby)
28+
should "roundtrip string and maintain encoding" do
29+
str = "日本語".encode 'EUC-JP'
30+
round = BERT.decode(BERT.encode(str))
31+
assert_equal str, round
32+
assert_equal str.encoding, round.encoding
33+
end
34+
35+
should "roundtrip binary string" do
36+
str = "日本語".b
37+
round = BERT.decode(BERT.encode(str))
38+
assert_equal str, round
39+
assert_equal str.encoding, round.encoding
40+
end
41+
42+
should "encode" do
43+
assert_equal @bert, BERT.encode(@ruby)
44+
end
45+
46+
should "ebin" do
47+
assert_equal @ebin, BERT.ebin(@bert)
48+
end
3449
end
3550

36-
should "decode" do
37-
assert_equal @ruby, BERT.decode(@bert)
51+
should "decode the old format" do
52+
assert_equal @ruby, BERT.decode(@bert_old)
3853
end
3954

4055
should "ebin" do
41-
assert_equal @ebin, BERT.ebin(@bert)
56+
assert_equal @ebin_old, BERT.ebin(@bert_old)
4257
end
4358

4459
should "do roundtrips" do

test/encoder_test.rb

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,23 +83,53 @@ class EncoderTest < Test::Unit::TestCase
8383

8484
should 'handle utf8 strings' do
8585
str = "été".encode 'UTF-8'
86-
bert = [132, 109, 0, 0, 0, 5, 195, 169, 116, 195, 169, 109, 0, 0, 0, 5, 85, 84, 70, 45, 56].pack('C*')
86+
bert = [131, 109, 0, 0, 0, 5, 195, 169, 116, 195, 169].pack('C*')
8787
assert_equal bert, BERT::Encoder.encode("été")
8888
end
8989

9090
should 'handle utf8 symbols' do
91-
bert = [132, 100, 0, 5, 195, 169, 116, 195, 169].pack('C*')
91+
bert = [131, 100, 0, 5, 195, 169, 116, 195, 169].pack('C*')
9292
assert_equal bert, BERT::Encoder.encode(:'été')
9393
end
9494

9595
should "handle bignums" do
96-
bert = [132,110,8,0,0,0,232,137,4,35,199,138].pack('c*')
96+
bert = [131,110,8,0,0,0,232,137,4,35,199,138].pack('c*')
9797
assert_equal bert, BERT::Encoder.encode(10_000_000_000_000_000_000)
9898

99-
bert = [132,110,8,1,0,0,232,137,4,35,199,138].pack('c*')
99+
bert = [131,110,8,1,0,0,232,137,4,35,199,138].pack('c*')
100100
assert_equal bert, BERT::Encoder.encode(-10_000_000_000_000_000_000)
101101
end
102102

103+
context "v2" do
104+
setup do
105+
@old_version = BERT::Encode.version
106+
BERT::Encode.version = :v2
107+
end
108+
109+
teardown do
110+
BERT::Encode.version = @old_version
111+
end
112+
113+
should 'handle utf8 strings' do
114+
str = "été".encode 'UTF-8'
115+
bert = [132, 109, 0, 0, 0, 5, 195, 169, 116, 195, 169, 109, 0, 0, 0, 5, 85, 84, 70, 45, 56].pack('C*')
116+
assert_equal bert, BERT::Encoder.encode("été")
117+
end
118+
119+
should 'handle utf8 symbols' do
120+
bert = [132, 100, 0, 5, 195, 169, 116, 195, 169].pack('C*')
121+
assert_equal bert, BERT::Encoder.encode(:'été')
122+
end
123+
124+
should "handle bignums" do
125+
bert = [132,110,8,0,0,0,232,137,4,35,199,138].pack('c*')
126+
assert_equal bert, BERT::Encoder.encode(10_000_000_000_000_000_000)
127+
128+
bert = [132,110,8,1,0,0,232,137,4,35,199,138].pack('c*')
129+
assert_equal bert, BERT::Encoder.encode(-10_000_000_000_000_000_000)
130+
end
131+
end
132+
103133
should "leave other stuff alone" do
104134
before = [1, 2.0, [:foo, 'bar']]
105135
assert_equal before, BERT::Encoder.convert(before)

0 commit comments

Comments
 (0)