From f4c5a0bbc22a17da6e46d799daa4f0fd79e9f2fc Mon Sep 17 00:00:00 2001 From: ChristianRomberg Date: Sat, 28 Apr 2018 10:49:31 +0200 Subject: [PATCH] Support binary format Should fix #221 --- train.lua | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/train.lua b/train.lua index 52210ec8..12a06297 100644 --- a/train.lua +++ b/train.lua @@ -76,7 +76,14 @@ end local loader = DataLoader(opt) local vocab = utils.read_json(opt.input_json) local idx_to_token = {} +local binary_pattern = "%[(%d+)%]" for k, v in pairs(vocab.idx_to_token) do + -- If the string is an escaped binary representation, replace it with its binary counterpart + if v:match(binary_pattern) then + local ordinal_string = v:gsub(binary_pattern, "%1") + local ordinal = tonumber(ordinal_string) + v = string.char(ordinal) + end idx_to_token[tonumber(k)] = v end