From 84ee65055124659c9f26a906eadd575e71f01b8a Mon Sep 17 00:00:00 2001
From: Alexander Shtuchkin <ashtuchkin@gmail.com>
Date: Wed, 15 Jul 2020 18:08:49 -0400
Subject: [PATCH] Implement UTF-16LE encoding, update tests, adjust codec
 interface

Three major reasons for reimplementing UTF-16 and not use native codec:
 1. We want to remove StringDecoder & Buffer references due to #235.
 2. StringDecoder is inconsistent with handling surrogates on Node v6-9
 3. NPM module string_decoder gives strange results when processing chunks -
    it sometimes prepends '\u0000', likely due to a bug.

Performance was and is a major concern here. Decoder shouldn't be affected because it uses
backend methods directly. Encoder is affected due to introducing character-level loop. It's
still very fast (~450Mb/s), so I'm not too worried. If needed, we can make it about 4x faster
in Node.js by introducing a dedicated backend method. Browser speeds will be the same.
---
 encodings/internal.js |   3 +-
 encodings/utf16.js    | 177 +++++++++++++++++++++++++++++------
 lib/index.js          |  14 ++-
 test/streams-test.js  |  16 +---
 test/utf16-test.js    | 211 ++++++++++++++++++++++++++++++++++--------
 test/utils.js         |  26 ++++++
 6 files changed, 360 insertions(+), 87 deletions(-)

diff --git a/encodings/internal.js b/encodings/internal.js
index dc1074f..d04ed2f 100644
--- a/encodings/internal.js
+++ b/encodings/internal.js
@@ -9,8 +9,7 @@ module.exports = {
     cesu8:  { type: "_internal", bomAware: true},
     unicode11utf8: "utf8",
 
-    ucs2:   { type: "_internal", bomAware: true},
-    utf16le: "ucs2",
+    // NOTE: utf-16le/ucs2 are in utf16.js.
 
     binary: { type: "_internal" },
     base64: { type: "_internal" },
diff --git a/encodings/utf16.js b/encodings/utf16.js
index ba23f9a..ed9bb09 100644
--- a/encodings/utf16.js
+++ b/encodings/utf16.js
@@ -1,17 +1,123 @@
 "use strict";
 
-// Note: UTF16-LE (or UCS2) codec is Node.js native. See encodings/internal.js
+// == UTF16-LE codec. ==========================================================
+// Note: We're not using Node.js native codec because StringDecoder implementation is buggy
+// (adds \0 in some chunks; doesn't flag non-even number of bytes). We do use raw encoding/decoding
+// routines for performance, though.
+
+exports.utf16le = class Utf16LECodec {
+    createEncoder(options, iconv) {
+        return new Utf16LEEncoder(iconv.backend);
+    }
+    createDecoder(options, iconv) {
+        return new Utf16LEDecoder(iconv.backend, iconv.defaultCharUnicode);
+    }
+    get bomAware() { return true; }
+}
+
+class Utf16LEEncoder {
+    constructor(backend) {
+        this.backend = backend;
+    }
+
+    write(str) {
+        const bytes = this.backend.allocBytes(str.length * 2);
+        const chars = new Uint16Array(bytes.buffer, bytes.byteOffset, str.length);
+        for (let i = 0; i < str.length; i++) {
+            chars[i] = str.charCodeAt(i);
+        }
+        return this.backend.bytesToResult(bytes, bytes.length);
+    }
+
+    end() {}
+}
+
+class Utf16LEDecoder {
+    constructor(backend, defaultChar) {
+        this.backend = backend;
+        this.defaultChar = defaultChar;
+        this.overflowByte = -1;
+        this.prefixSurrogate = undefined;
+    }
+
+    write(buf) {
+        if (buf.length == 0) {
+            return '';
+        }
+        let byteOffset = buf.byteOffset;
+        let byteLen = buf.length;
+
+        // Process previous overflowByte
+        let prefix = '';
+        if (this.overflowByte !== -1) {
+            byteOffset++; byteLen--;
+            prefix = String.fromCharCode(this.overflowByte + (buf[0] << 8));
+        }
+
+        // Set new overflowByte
+        if (byteLen & 1) {
+            this.overflowByte = buf[buf.length-1];
+            byteLen--;
+        } else {
+            this.overflowByte = -1;
+        }
+
+        let chars;
+        if (byteOffset & 1 === 0) {
+            // If byteOffset is aligned, just use the ArrayBuffer from input buf.
+            chars = new Uint16Array(buf.buffer, byteOffset, byteLen >> 1);
+        } else {
+            // If byteOffset is NOT aligned, create a new aligned buffer and copy the data.
+            chars = this.backend.allocRawChars(byteLen >> 1);
+            const srcByteView = new Uint8Array(buf.buffer, byteOffset, byteLen);
+            const destByteView = new Uint8Array(chars.buffer, chars.byteOffset, byteLen);
+            destByteView.set(srcByteView);
+        }
+
+        let res = prefix + this.backend.rawCharsToResult(chars, chars.length);
+        if (res) {
+            // Add high surrogate from previous chunk.
+            if (this.prefixSurrogate) {
+                res = this.prefixSurrogate + res;
+                this.prefixSurrogate = undefined;
+            }
+
+            // Slice off a new high surrogate at the end of the current chunk.
+            const lastChar = res.charCodeAt(res.length-1);
+            if (0xD800 <= lastChar && lastChar < 0xDC00) {
+                this.prefixSurrogate = res[res.length-1];
+                res = res.slice(0, -1);
+            }
+        }
+        return res;
+    }
+
+    end() {
+        if (this.prefixSurrogate || this.overflowByte !== -1) {
+            const res = (this.prefixSurrogate ? this.prefixSurrogate : '') + (this.overflowByte !== -1 ? this.defaultChar : '');
+            this.prefixSurrogate = undefined;
+            this.overflowByte = -1;
+            return res;
+        }
+    }
+}
+exports.ucs2 = "utf16le";  // Alias
+
 
 // == UTF16-BE codec. ==========================================================
 
 exports.utf16be = class Utf16BECodec {
-    get encoder() { return Utf16BEEncoder; }
-    get decoder() { return Utf16BEDecoder; }
+    createEncoder(options, iconv) {
+        return new Utf16BEEncoder(iconv.backend);
+    }
+    createDecoder(options, iconv) {
+        return new Utf16BEDecoder(iconv.backend, iconv.defaultCharUnicode);
+    }
     get bomAware() { return true; }
 }
 
 class Utf16BEEncoder {
-    constructor(opts, codec, backend) {
+    constructor(backend) {
         this.backend = backend;
     }
 
@@ -30,30 +136,59 @@ class Utf16BEEncoder {
 }
 
 class Utf16BEDecoder {
-    constructor(opts, codec, backend) {
+    constructor(backend, defaultChar) {
         this.backend = backend;
+        this.defaultChar = defaultChar;
         this.overflowByte = -1;
+        this.prefixSurrogate = undefined;
     }
 
     write(buf) {
+        if (buf.length === 0) {
+            return '';
+        }
+
         const chars = this.backend.allocRawChars((buf.length+1) >> 1);
         let charsPos = 0, i = 0;
     
-        if (this.overflowByte !== -1 && i < buf.length) {
+        if (this.overflowByte !== -1) {
             chars[charsPos++] = (this.overflowByte << 8) + buf[i++];
         }
     
+        // NOTE: we can win another 10% perf by using chars[i >> 1].
+        // NOTE: the double-reverse method takes almost the same time.
         for (; i < buf.length-1; i += 2) {
             chars[charsPos++] = (buf[i] << 8) + buf[i+1];
         }
     
         this.overflowByte = (i == buf.length-1) ? buf[i] : -1;
     
-        return this.backend.rawCharsToResult(chars, charsPos);
+        let res = this.backend.rawCharsToResult(chars, charsPos);
+        if (res) {
+            // Add high surrogate from previous chunk.
+            if (this.prefixSurrogate) {
+                res = this.prefixSurrogate + res;
+                this.prefixSurrogate = undefined;
+            }
+
+            // Slice off a new high surrogate at the end of the current chunk.
+            const lastChar = res.charCodeAt(res.length-1);
+            if (0xD800 <= lastChar && lastChar < 0xDC00) {
+                this.prefixSurrogate = res[res.length-1];
+                res = res.slice(0, -1);
+            }
+        }
+        return res;
+
     }
 
     end() {
-        this.overflowByte = -1;
+        if (this.prefixSurrogate || this.overflowByte !== -1) {
+            const res = (this.prefixSurrogate ? this.prefixSurrogate : '') + (this.overflowByte !== -1 ? this.defaultChar : '');
+            this.prefixSurrogate = undefined;
+            this.overflowByte = -1;
+            return res;
+        }
     }
 }
 
@@ -67,39 +202,25 @@ class Utf16BEDecoder {
 // Encoder uses UTF-16LE and prepends BOM (which can be overridden with addBOM: false).
 
 exports.utf16 = class Utf16Codec {
-    constructor(opts, iconv) {
-        this.iconv = iconv;
-    }
-    get encoder() { return Utf16Encoder; }
-    get decoder() { return Utf16Decoder; }
-}
-
-class Utf16Encoder {
-    constructor(options, codec) {
+    createEncoder(options, iconv) {
         options = options || {};
         if (options.addBOM === undefined)
             options.addBOM = true;
-        this.encoder = codec.iconv.getEncoder(options.use || 'utf-16le', options);
+        return iconv.getEncoder('utf-16le', options);
     }
-
-    // Pass-through to this.encoder
-    write(str) {
-        return this.encoder.write(str);
-    }
-    
-    end() {
-        return this.encoder.end();
+    createDecoder(options, iconv) {
+        return new Utf16Decoder(options, iconv);
     }
 }
 
 class Utf16Decoder {
-    constructor(options, codec) {
+    constructor(options, iconv) {
         this.decoder = null;
         this.initialBufs = [];
         this.initialBufsLen = 0;
     
         this.options = options || {};
-        this.iconv = codec.iconv;
+        this.iconv = iconv;
     }
 
     write(buf) {
diff --git a/lib/index.js b/lib/index.js
index f85d007..89b5890 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -105,8 +105,11 @@ iconv._canonicalizeEncoding = function(encoding) {
 }
 
 iconv.getEncoder = function getEncoder(encoding, options) {
-    var codec = iconv.getCodec(encoding),
-        encoder = new codec.encoder(options, codec, iconv.backend);
+    const codec = iconv.getCodec(encoding);
+
+    let encoder = codec.createEncoder
+        ? codec.createEncoder(options, iconv)
+        : new codec.encoder(options, codec, iconv.backend);
 
     if (codec.bomAware && options && options.addBOM)
         encoder = new bomHandling.PrependBOM(encoder, options);
@@ -115,8 +118,11 @@ iconv.getEncoder = function getEncoder(encoding, options) {
 }
 
 iconv.getDecoder = function getDecoder(encoding, options) {
-    var codec = iconv.getCodec(encoding),
-        decoder = new codec.decoder(options, codec, iconv.backend);
+    const codec = iconv.getCodec(encoding);
+
+    let decoder = codec.createDecoder
+        ? codec.createDecoder(options, iconv)
+        : new codec.decoder(options, codec, iconv.backend);
 
     if (codec.bomAware && !(options && options.stripBOM === false))
         decoder = new bomHandling.StripBOM(decoder, options);
diff --git a/test/streams-test.js b/test/streams-test.js
index 202781f..1de95bd 100644
--- a/test/streams-test.js
+++ b/test/streams-test.js
@@ -213,17 +213,7 @@ describe("Streaming mode", function() {
         encoding: "ucs2",
         input: [[0x3D], [0xD8, 0x3B], [0xDE]], // U+1F63B, 😻, SMILING CAT FACE WITH HEART-SHAPED EYES
         outputType: false, // Don't concat
-        checkOutput: function(res) {
-            if (semver.satisfies(process.version, '>= 6.2.1 < 10.0.0')) {
-                // After a string_decoder rewrite in https://github.com/nodejs/node/pull/6777, which
-                // was merged in Node v6.2.1, we don't merge chunks anymore.
-                // Not really correct, but it seems we cannot do anything with it.
-                // Though it has been fixed again in Node v10.0.0
-                assert.deepEqual(res, ["\uD83D", "\uDE3B"]);
-            } else {
-                assert.deepEqual(res, ["\uD83D\uDE3B"]); // We should have only 1 chunk.
-            }
-        },
+        checkOutput: function(res) { assert.deepEqual(res, ["\uD83D\uDE3B"]); }, // We should have only 1 chunk.
     }));
 
     it("Encoding using internal modules: utf8", checkEncodeStream({
@@ -264,13 +254,13 @@ describe("Streaming mode", function() {
 
     it("Decoding of uneven length buffers from UTF-16BE - 2", checkDecodeStream({
         encoding: "UTF-16BE",
-        input: [[0x00, 0x61, 0x00], [0x62, 0x00, 0x63]],
+        input: [[0x00, 0x61, 0x00], [0x62, 0x00], [0x63]],
         output: "abc"
     }));
 
     it("Decoding of uneven length buffers from UTF-16", checkDecodeStream({
         encoding: "UTF-16",
-        input: [[0x61], [0x0], [0x20], [0x0]],
+        input: [[0x61], [0x0, 0x20], [0x0]],
         output: "a "
     }));
 
diff --git a/test/utf16-test.js b/test/utf16-test.js
index 7a18188..013bb84 100644
--- a/test/utf16-test.js
+++ b/test/utf16-test.js
@@ -1,80 +1,211 @@
-var assert = require('assert'),
-    utils = require('./utils'),
-    iconv = utils.requireIconv(),
-    hex = utils.hex;
-
-var testStr = "1aя中文☃💩";
-    utf16beBuf = utils.bytesFrom([0, 0x31, 0, 0x61, 0x04, 0x4f, 0x4e, 0x2d, 0x65, 0x87, 0x26, 0x03, 0xd8, 0x3d, 0xdc, 0xa9]),
-    utf16leBuf = utils.bytesFrom([0x31, 0, 0x61, 0, 0x4f, 0x04, 0x2d, 0x4e, 0x87, 0x65, 0x03, 0x26, 0x3d, 0xd8, 0xa9, 0xdc]),
-    utf16beBOM = utils.bytesFrom([0xFE, 0xFF]),
-    utf16leBOM = utils.bytesFrom([0xFF, 0xFE]),
-    sampleStr = '<?xml version="1.0" encoding="UTF-8"?>\n<俄语>данные</俄语>';
-
-describe("UTF-16LE codec #node-web", function() {
+"use strict";
+
+const assert = require('assert'),
+      utils = require('./utils'),
+      iconv = utils.requireIconv(),
+      hex = utils.hex;
+
+const testStr = "1aя中文☃💩",
+      utf16beBuf = utils.bytesFrom([0, 0x31, 0, 0x61, 0x04, 0x4f, 0x4e, 0x2d, 0x65, 0x87, 0x26, 0x03, 0xd8, 0x3d, 0xdc, 0xa9]),
+      utf16leBuf = utils.bytesFrom([0x31, 0, 0x61, 0, 0x4f, 0x04, 0x2d, 0x4e, 0x87, 0x65, 0x03, 0x26, 0x3d, 0xd8, 0xa9, 0xdc]),
+      utf16beBOM = utils.bytesFrom([0xFE, 0xFF]),
+      utf16leBOM = utils.bytesFrom([0xFF, 0xFE]),
+      sampleStr = '<?xml version="1.0" encoding="UTF-8"?>\n<数据>נְתוּנִים</数据>',
+      weirdBuf = utils.bytesFrom([0x15, 0x16, 0x17, 0x18]);  // Can't automatically detect whether it's LE or BE.
+
+
+describe("UTF-16LE encoder #node-web", function() {
+    const enc = 'utf16-le';
+    it("encodes basic strings correctly", function() {
+        assert.equal(hex(iconv.encode('', enc)), '');
+        assert.equal(hex(iconv.encode(testStr, enc)), hex(utf16leBuf));
+    });
+
+    it("adds BOM if asked", function() {
+        assert.equal(hex(iconv.encode(testStr, enc, {addBOM: true})), hex(utf16leBOM) + hex(utf16leBuf));
+    });
+
+    // NOTE: I'm not sure what the right behavior is here. Node.js keeps all invalid surrogates as-is for 
+    // both utf-16le and ucs2 encodings. TextEncoder can't encode utf-16, but when using utf-8, replaces 
+    // these with '�'. Leaning towards Node side for now.
+    it("keeps single and invalid surrogates as-is", function() {
+        assert.equal(hex(iconv.encode(' \uD800 \uDE00 \uDE00\uD800 \uD800', enc)), 
+                     "2000 00d8 2000 00de 2000 00de 00d8 2000 00d8".replace(/ /g, ""));
+    });
+
+    it("keeps valid surrogate pairs split on a chunk boundary unchanged", function() {
+        const encoder = iconv.getEncoder(enc);
+        assert.equal(hex(encoder.write('\uD83D')), '3dd8');
+        assert.equal(hex(encoder.write('\uDCA9')), 'a9dc');
+        assert.strictEqual(encoder.end(), undefined);
+    });
+});
+
+describe("UTF-16LE decoder #node-web", function() {
+    const enc = 'utf16-le';
+    it("decodes basic buffers correctly", function() {
+        assert.equal(iconv.decode(utf16leBuf, enc), testStr);
+    });
+
+    it("decodes uneven length buffers showing an error", function() {
+        assert.equal(iconv.decode(utils.bytesFrom([0x61, 0, 0]), enc), "a�");
+    });
+
     it("decodes very short buffers correctly", function() {
-        assert.equal(iconv.decode(utils.bytesFrom([]), 'utf-16le'), '');
-        
-        // Looks like StringDecoder doesn't do the right thing here, returning '\u0000'. TODO: fix.
-        //assert.equal(iconv.decode(utils.bytesFrom([0x61]), 'utf-16le'), '');
+        assert.equal(iconv.decode(utils.bytesFrom([]), enc), '');
+        assert.equal(iconv.decode(utils.bytesFrom([0x61]), enc), '�');
     });
+
+    it("handles chunks with uneven lengths correctly", utils.checkDecoderChunks(enc, {
+        inputs: [[], [0x61], [], [0x00], [0x61], [0x00, 0x61], [0x00, 0x00]],
+        outputs: ['',    '', '',    'a',     '',    'a',         'a',       '�'],
+    }));
+    
+    it("doesn't split valid surrogate pairs between chunks", utils.checkDecoderChunks(enc, [{
+        inputs: [[0x3D, 0xD8, 0x3B],         [0xDE]],
+        outputs: [               '', "\uD83D\uDE3B"],
+    }, {
+        inputs: [[0x3D, 0xD8], [0x3B],         [0xDE]],
+        outputs: [         '',     '', "\uD83D\uDE3B"],
+    }, {
+        inputs: [[0x3D], [0xD8, 0x3B],         [0xDE]],
+        outputs: [   '',           '', "\uD83D\uDE3B"],
+    }, {
+        inputs: [[0x3D], [0xD8], [0x3B],         [0xDE]],
+        outputs: [   '',     '',     '', "\uD83D\uDE3B"],
+    }]));
+
+    it("handles complex surrogate pairs cases", utils.checkDecoderChunks(enc, [{
+        inputs: [[0x3E], [0xD9], [0x3D], [0xD8], [0x3B],         [0xDE]],
+        outputs: [   '',     '',     '', '\uD93E',   '', "\uD83D\uDE3B"]
+    }, {
+        inputs: [[0x3E, 0xD9, 0x3D], [0xD8], [0x3B, 0xDE]],
+        outputs: [               '', '\uD93E', "\uD83D\uDE3B"],
+    }, {
+        inputs: [[0x3E, 0xD9, 0x3D]],
+        outputs: [                '', '\uD93E�'],
+    }, {
+        inputs: [[0x3E, 0xD9], [0x3D]],
+        outputs: [         '',      '', '\uD93E�'],
+    }, {
+        inputs: [[0x3E, 0xD9]],
+        outputs: [        '', '\uD93E'],
+    }]));
 });
 
-describe("UTF-16BE codec #node-web", function() {
+describe("UTF-16BE encoder #node-web", function() {
+    const enc = 'utf16-be';
     it("encodes basic strings correctly", function() {
-        assert.equal(hex(iconv.encode(testStr, 'utf16-be')), hex(utf16beBuf));
+        assert.equal(hex(iconv.encode('', enc)), '');
+        assert.equal(hex(iconv.encode(testStr, enc)), hex(utf16beBuf));
     });
 
+    it("adds BOM if asked", function() {
+        assert.equal(hex(iconv.encode(testStr, enc, {addBOM: true})), hex(utf16beBOM) + hex(utf16beBuf));
+    });
+
+    // See note in UTF-16LE encoder above; we need to keep them consistent.
+    it("keeps single and invalid surrogates as-is", function() {
+        assert.equal(hex(iconv.encode(' \uD800 \uDE00 \uDE00\uD800 \uD800', enc)), 
+                     "0020 d800 0020 de00 0020 de00 d800 0020 d800".replace(/ /g, ""));
+    });
+
+    it("handles valid surrogate pairs on chunk boundary correctly", function() {
+        const encoder = iconv.getEncoder(enc);
+        assert.equal(hex(encoder.write('\uD83D')), 'd83d');
+        assert.equal(hex(encoder.write('\uDCA9')), 'dca9');
+        assert.strictEqual(encoder.end(), undefined);
+    });
+});
+
+describe("UTF-16BE decoder #node-web", function() {
+    const enc = 'utf16-be';
     it("decodes basic buffers correctly", function() {
-        assert.equal(iconv.decode(utf16beBuf, 'utf16-be'), testStr);
+        assert.equal(iconv.decode(utf16beBuf, enc), testStr);
     });
 
-    it("decodes uneven length buffers with no error", function() {
-        assert.equal(iconv.decode(utils.bytesFrom([0, 0x61, 0]), 'utf16-be'), "a");
+    it("decodes uneven length buffers showing an error", function() {
+        assert.equal(iconv.decode(utils.bytesFrom([0, 0x61, 0]), enc), "a�");
     });
 
     it("decodes very short buffers correctly", function() {
-        assert.equal(iconv.decode(utils.bytesFrom([]), 'utf-16be'), '');
-        assert.equal(iconv.decode(utils.bytesFrom([0x61]), 'utf-16be'), '');
+        assert.equal(iconv.decode(utils.bytesFrom([]), enc), '');
+        assert.equal(iconv.decode(utils.bytesFrom([0x61]), enc), '�');
     });
+
+    it("handles chunks with uneven lengths correctly", utils.checkDecoderChunks(enc, {
+        inputs: [[], [0x00], [], [0x61], [0x00], [0x61, 0x00], [0x61, 0x00]],
+        outputs: ['',    '', '',    'a',     '',    'a',         'a',       '�'],
+    }));
+    
+    it("doesn't split valid surrogate pairs between chunks", utils.checkDecoderChunks(enc, [{
+        inputs: [[0xD8, 0x3D, 0xDE],         [0x3B]],
+        outputs: [               '', "\uD83D\uDE3B"],
+    }, {
+        inputs: [[0xD8, 0x3D], [0xDE],         [0x3B]],
+        outputs: [         '',     '', "\uD83D\uDE3B"],
+    }, {
+        inputs: [[0xD8], [0x3D, 0xDE],         [0x3B]],
+        outputs: [   '',           '', "\uD83D\uDE3B"],
+    }, {
+        inputs: [[0xD8], [0x3D], [0xDE],         [0x3B]],
+        outputs: [   '',     '',     '', "\uD83D\uDE3B"],
+    }]));
+
+    it("handles complex surrogate pairs cases", utils.checkDecoderChunks(enc, [{
+        inputs: [[0xD9], [0x3E], [0xD8], [0x3D], [0xDE],         [0x3B]],
+        outputs: [   '',     '',     '', '\uD93E',   '', "\uD83D\uDE3B"]
+    }, {
+        inputs: [[0xD9, 0x3E, 0xD8], [0x3D], [0xDE, 0x3B]],
+        outputs: [               '', '\uD93E', "\uD83D\uDE3B"],
+    }, {
+        inputs: [[0xD9, 0x3E, 0xD8]],
+        outputs: [                '', '\uD93E�'],
+    }, {
+        inputs: [[0xD9, 0x3E], [0xD8]],
+        outputs: [         '',      '', '\uD93E�'],
+    }, {
+        inputs: [[0xD9, 0x3E]],
+        outputs: [        '', '\uD93E'],
+    }]));
 });
 
 describe("UTF-16 encoder #node-web", function() {
+    const enc = 'utf-16';
     it("uses UTF-16LE and adds BOM when encoding", function() {
-        assert.equal(hex(iconv.encode(testStr, "utf-16")), hex(utf16leBOM) + hex(utf16leBuf));
+        assert.equal(hex(iconv.encode(testStr, enc)), hex(utf16leBOM) + hex(utf16leBuf));
     });
 
     it("can skip BOM", function() {
-        assert.equal(hex(iconv.encode(testStr, "utf-16", {addBOM: false})), hex(utf16leBuf));
-    });
-
-    it("can use other encodings, for example UTF-16BE, with BOM", function() {
-        assert.equal(hex(iconv.encode(testStr, "utf-16", {use: 'UTF-16BE'})), hex(utf16beBOM) + hex(utf16beBuf));
+        assert.equal(hex(iconv.encode(testStr, enc, {addBOM: false})), hex(utf16leBuf));
     });
 });
 
 describe("UTF-16 decoder #node-web", function() {
+    const enc = 'utf-16',
+          encLE = 'utf-16le',
+          encBE = 'utf-16be';
+
     it("uses BOM to determine encoding", function() {
-        assert.equal(iconv.decode(utils.concatBufs([utf16leBOM, utf16leBuf]), "utf-16"), testStr);
-        assert.equal(iconv.decode(utils.concatBufs([utf16beBOM, utf16beBuf]), "utf-16"), testStr);
+        assert.equal(iconv.decode(utils.concatBufs([utf16leBOM, utf16leBuf]), enc), testStr);
+        assert.equal(iconv.decode(utils.concatBufs([utf16beBOM, utf16beBuf]), enc), testStr);
     });
 
     it("handles very short buffers", function() {
-        assert.equal(iconv.decode(utils.bytesFrom([]), 'utf-16'), '');
-
-        // Looks like StringDecoder doesn't do the right thing here. TODO: fix.
-        //assert.equal(iconv.decode(utils.bytesFrom([0x61]), 'utf-16'), '');
+        assert.equal(iconv.decode(utils.bytesFrom([]), enc), '');
+        assert.equal(iconv.decode(utils.bytesFrom([0x61]), enc), '�');
     });
 
     it("uses spaces when there is no BOM to determine encoding", function() {
-        assert.equal(iconv.decode(iconv.encode(sampleStr, 'utf-16le'), 'utf-16'), sampleStr);
-        assert.equal(iconv.decode(iconv.encode(sampleStr, 'utf-16be'), 'utf-16'), sampleStr);
+        assert.equal(iconv.decode(iconv.encode(sampleStr, encLE), enc), sampleStr);
+        assert.equal(iconv.decode(iconv.encode(sampleStr, encBE), enc), sampleStr);
     });
 
     it("uses UTF-16LE if no BOM and heuristics failed", function() {
-        assert.equal(iconv.decode(utf16leBuf, 'utf-16'), testStr);
+        assert.equal(iconv.decode(weirdBuf, enc), iconv.decode(weirdBuf, encLE));
     });
 
     it("can be given a different default encoding", function() {
-        assert.equal(iconv.decode(utf16leBuf, 'utf-16', {default: 'utf-16le'}), testStr);
+        assert.equal(iconv.decode(weirdBuf, enc, {defaultEncoding: encBE}), iconv.decode(weirdBuf, encBE));
     });
 });
diff --git a/test/utils.js b/test/utils.js
index dc9e3c8..f0a430b 100644
--- a/test/utils.js
+++ b/test/utils.js
@@ -1,3 +1,5 @@
+"use strict";
+
 const assert = require("assert");
 
 const utils = module.exports = {
@@ -34,5 +36,29 @@ const utils = module.exports = {
         assert(nonStrict || (bytes instanceof utils.BytesType));
         return bytes.reduce((output, byte) => (output + ('0' + (byte & 0xFF).toString(16)).slice(-2)), '');
     },
+
+    checkDecoderChunks(encoding, cases) {
+        return () => {
+            const decoder = utils.iconv.getDecoder(encoding);
+            if (!Array.isArray(cases)) {
+                cases = [cases];
+            }
+
+            for (let idx = 0; idx < cases.length; idx++) {
+                const inputs = cases[idx].inputs,
+                    outputs = cases[idx].outputs;
+                for (let i = 0; i < inputs.length; i++)
+                    assert.strictEqual(decoder.write(utils.bytesFrom(inputs[i])), outputs[i], `position ${i} in case ${idx}`);
+
+                if (outputs.length === inputs.length) {
+                    assert(!decoder.end(), `end is not empty in case ${idx}`);
+                } else if (outputs.length === inputs.length + 1) {
+                    assert.strictEqual(decoder.end(), outputs[outputs.length-1], `end result unexpected in case ${idx}`);
+                } else {
+                    assert(false, `invalid outputs array size in case ${idx}`);
+                }
+            }
+        }
+    },
 };