From f0dc58ddf81c6110d5e0364a3b87bd175d9e7611 Mon Sep 17 00:00:00 2001 From: alexleech Date: Mon, 9 Jul 2018 13:43:20 +0800 Subject: [PATCH] Fix Capture.watchStream to respect default text encoding Previously, buffer may contain truncated UTF-8 characters at the end, resulting in invalid characters in the subsequent String constructor. By using InputStreamReader, we can specify the default encoding used by the String constructor so buffer will not split the bytes for a single character. --- kernel/src/main/scala/jupyter/scala/Capture.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) mode change 100644 => 100755 kernel/src/main/scala/jupyter/scala/Capture.scala diff --git a/kernel/src/main/scala/jupyter/scala/Capture.scala b/kernel/src/main/scala/jupyter/scala/Capture.scala old mode 100644 new mode 100755 index d320f9dbf..61f52c681 --- a/kernel/src/main/scala/jupyter/scala/Capture.scala +++ b/kernel/src/main/scala/jupyter/scala/Capture.scala @@ -3,6 +3,7 @@ package jupyter.scala // Extracted from IScala, and refactored a bit import java.io.{ Console => _, _ } +import java.nio.charset.Charset object Capture { private def watchStream( @@ -13,11 +14,12 @@ object Capture { ) = new Thread(name) { override def run() = { - val buffer = Array.ofDim[Byte](size) + val buffer = Array.ofDim[Char](size) + val reader = new InputStreamReader(input, Charset.defaultCharset()) try { while (true) { - val n = input read buffer + val n = reader.read(buffer, 0, size) if (n > 0) fn(new String(buffer take n)) if (n < size) Thread.sleep(50) // little delay to accumulate output }