-
Notifications
You must be signed in to change notification settings - Fork 29
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
7d017f0
commit b6bf5b4
Showing
24 changed files
with
3,872 additions
and
207 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
#!/bin/sh | ||
# Copied from https://github.com/tensorflow/nmt/blob/master/nmt/scripts/download_iwslt15.sh | ||
# | ||
# Download small-scale IWSLT15 Vietnames to English translation data for NMT | ||
# model training. | ||
# | ||
# Usage: | ||
# ./download_iwslt15.sh path-to-output-dir | ||
# | ||
# If output directory is not specified, "./iwslt15" will be used as the default | ||
# output directory. | ||
|
||
OUT_DIR="${1:-iwslt15}" | ||
SITE_PREFIX="https://nlp.stanford.edu/projects/nmt/data" | ||
|
||
mkdir -v -p $OUT_DIR | ||
|
||
# Download iwslt15 small dataset from standford website. | ||
echo "Download training dataset train.en and train.vi." | ||
curl -o "$OUT_DIR/train.en" "$SITE_PREFIX/iwslt15.en-vi/train.en" | ||
curl -o "$OUT_DIR/train.vi" "$SITE_PREFIX/iwslt15.en-vi/train.vi" | ||
|
||
echo "Download dev dataset tst2012.en and tst2012.vi." | ||
curl -o "$OUT_DIR/tst2012.en" "$SITE_PREFIX/iwslt15.en-vi/tst2012.en" | ||
curl -o "$OUT_DIR/tst2012.vi" "$SITE_PREFIX/iwslt15.en-vi/tst2012.vi" | ||
|
||
echo "Download test dataset tst2013.en and tst2013.vi." | ||
curl -o "$OUT_DIR/tst2013.en" "$SITE_PREFIX/iwslt15.en-vi/tst2013.en" | ||
curl -o "$OUT_DIR/tst2013.vi" "$SITE_PREFIX/iwslt15.en-vi/tst2013.vi" | ||
|
||
echo "Download vocab file vocab.en and vocab.vi." | ||
curl -o "$OUT_DIR/vocab.en" "$SITE_PREFIX/iwslt15.en-vi/vocab.en" | ||
curl -o "$OUT_DIR/vocab.vi" "$SITE_PREFIX/iwslt15.en-vi/vocab.vi" | ||
|
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
-- Copyright (c) Microsoft Corporation. All rights reserved. | ||
-- Licensed under the MIT License. | ||
-- | ||
--[[ | ||
-- | ||
-- Dummy Criterion | ||
-- | ||
--]] | ||
|
||
local DummyCriterion, parent = torch.class('nn.DummyCriterion', 'nn.Criterion') | ||
|
||
function DummyCriterion:__init() | ||
parent.__init(self) | ||
end | ||
|
||
function DummyCriterion:updateOutput(input, target) | ||
self.output = torch.mean(input) | ||
return self.output | ||
end | ||
|
||
function DummyCriterion:updateGradInput(input, target) | ||
local n = input:nElement() | ||
self.gradInput = input.new(input:size()):fill(1.0/n) | ||
return self.gradInput | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,174 @@ | ||
-- Copyright (c) 2017-present, Facebook, Inc. | ||
-- All rights reserved. | ||
-- | ||
-- This source code is licensed under the license found in the LICENSE file in | ||
-- the root directory of this source tree. An additional grant of patent rights | ||
-- can be found in the PATENTS file in the same directory. | ||
-- | ||
-- Copyright (c) Microsoft Corporation. All rights reserved. | ||
-- Licensed under the BSD License. | ||
-- | ||
--[[ | ||
-- | ||
-- This model uses a bi-directional LSTM encoder. The direction is reversed | ||
-- between layers and two separate columns run in parallel: one on the normal | ||
-- input and one on the reversed input (as described in | ||
-- http://arxiv.org/abs/1606.04199). | ||
-- | ||
-- The attention mechanism and the decoder setup are identical to the avgpool | ||
-- model. | ||
-- | ||
--]] | ||
|
||
require 'nn' | ||
require 'rnnlib' | ||
local usecudnn = pcall(require, 'cudnn') | ||
local argcheck = require 'argcheck' | ||
local mutils = require 'fairseq.models.utils' | ||
local rutils = require 'rnnlib.mutils' | ||
|
||
local BGRUModel = torch.class('BGRUModel', 'AvgpoolModel') | ||
|
||
BGRUModel.makeEncoderColumn = argcheck{ | ||
{name='self', type='BGRUModel'}, | ||
{name='config', type='table'}, | ||
{name='inith', type='nngraph.Node'}, | ||
{name='input', type='nngraph.Node'}, | ||
{name='nlayers', type='number'}, | ||
call = function(self, config, inith, input, nlayers) | ||
local rnnconfig = { | ||
inputsize = config.nembed, | ||
hidsize = config.nhid, | ||
nlayer = 1, | ||
winitfun = function(network) | ||
rutils.defwinitfun(network, config.init_range) | ||
end, | ||
usecudnn = usecudnn, | ||
} | ||
|
||
local rnn = nn.GRU(rnnconfig) | ||
rnn.saveHidden = false | ||
local output = nn.SelectTable(-1)(nn.SelectTable(2)( | ||
rnn({inith, input}):annotate{name = 'encoderRNN'} | ||
)) | ||
rnnconfig.inputsize = config.nhid | ||
|
||
for i = 2, nlayers do | ||
if config.dropout_hid > 0 then | ||
output = nn.MapTable(nn.Dropout(config.dropout_hid))(output) | ||
end | ||
local rnn = nn.GRU(rnnconfig) | ||
rnn.saveHidden = false | ||
output = nn.SelectTable(-1)(nn.SelectTable(2)( | ||
rnn({ | ||
inith, | ||
nn.ReverseTable()(output), | ||
}) | ||
)) | ||
end | ||
return output | ||
end | ||
} | ||
|
||
BGRUModel.makeEncoder = argcheck{ | ||
doc=[[ | ||
This encoder runs a forward and backward LSTM network and concatenates their | ||
top-most hidden states. | ||
]], | ||
{name='self', type='BGRUModel'}, | ||
{name='config', type='table'}, | ||
call = function(self, config) | ||
local sourceIn = nn.Identity()() | ||
local inith, tokens = sourceIn:split(2) | ||
|
||
local dict = config.srcdict | ||
local lut = mutils.makeLookupTable(config, dict:size(), | ||
dict.pad_index) | ||
local embed | ||
if config.dropout_src > 0 then | ||
embed = nn.MapTable(nn.Sequential() | ||
:add(lut) | ||
:add(nn.Dropout(config.dropout_src)))(tokens) | ||
else | ||
embed = nn.MapTable(lut)(tokens) | ||
end | ||
|
||
local col1 = self:makeEncoderColumn{ | ||
config = config, | ||
inith = inith, | ||
input = embed, | ||
nlayers = config.nenclayer, | ||
} | ||
local col2 = self:makeEncoderColumn{ | ||
config = config, | ||
inith = inith, | ||
input = nn.ReverseTable()(embed), | ||
nlayers = config.nenclayer, | ||
} | ||
|
||
-- Each column will switch direction between layers. Before merging, | ||
-- they should both run in the same direction (here: forward). | ||
if config.nenclayer % 2 == 0 then | ||
col1 = nn.ReverseTable()(col1) | ||
else | ||
col2 = nn.ReverseTable()(col2) | ||
end | ||
|
||
local prepare = nn.Sequential() | ||
-- Concatenate forward and backward states | ||
prepare:add(nn.JoinTable(2, 2)) | ||
-- Scale down to nhid for further processing | ||
prepare:add(nn.Linear(config.nhid * 2, config.nembed, false)) | ||
-- Add singleton dimension for subsequent joining | ||
prepare:add(nn.View(-1, 1, config.nembed)) | ||
|
||
local joinedOutput = nn.JoinTable(1, 2)( | ||
nn.MapTable(prepare)( | ||
nn.ZipTable()({col1, col2}) | ||
) | ||
) | ||
if config.dropout_hid > 0 then | ||
joinedOutput = nn.Dropout(config.dropout_hid)(joinedOutput) | ||
end | ||
|
||
-- avgpool_model.makeDecoder() expects two encoder outputs, one for | ||
-- attention score computation and the other one for applying them. | ||
-- We'll just use the same output for both. | ||
return nn.gModule({sourceIn}, { | ||
joinedOutput, nn.Identity()(joinedOutput) | ||
}) | ||
end | ||
} | ||
|
||
BGRUModel.prepareSource = argcheck{ | ||
{name='self', type='BGRUModel'}, | ||
call = function(self) | ||
-- Device buffers for samples | ||
local buffers = { | ||
source = {}, | ||
} | ||
|
||
-- NOTE: It's assumed that all encoders start from the same hidden | ||
-- state. | ||
local encoderRNN = mutils.findAnnotatedNode( | ||
self:network(), 'encoderRNN' | ||
) | ||
assert(encoderRNN ~= nil) | ||
|
||
return function(sample) | ||
-- Encoder input | ||
local source = {} | ||
for i = 1, sample.source:size(1) do | ||
buffers.source[i] = buffers.source[i] | ||
or torch.Tensor():type(self:type()) | ||
source[i] = mutils.sendtobuf(sample.source[i], | ||
buffers.source[i]) | ||
end | ||
|
||
local initialHidden = encoderRNN:initializeHidden(sample.bsz) | ||
return {initialHidden, source} | ||
end | ||
end | ||
} | ||
|
||
return BGRUModel |
Oops, something went wrong.