diff --git a/colorama/initialise.py b/colorama/initialise.py index d5fd4b71..ffed55fc 100644 --- a/colorama/initialise.py +++ b/colorama/initialise.py @@ -1,9 +1,13 @@ # Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. import atexit import contextlib +import io +import logging +import platform import sys from .ansitowin32 import AnsiToWin32 +from .win32 import GetConsoleCP, GetConsoleOutputCP, MICROSOFT_CODEPAGE_ENCODING def _wipe_internal_state_for_tests(): @@ -74,6 +78,30 @@ def just_fix_windows_console(): if sys.platform != "win32": return + + # allow this fix to be run multiple times + if not (platform.python_implementation() == 'CPython' and sys.version_info >= (3, 6)): + # CPython is hard-coded to use UTF-16 for Windows Console IO: + # https://github.com/python/cpython/blob/v3.13.2/Modules/_io/winconsoleio.c#L1092 + # But other implementations tend not to handle this at all: + # https://github.com/pypy/pypy/issues/2999 + + console_encoding_in = MICROSOFT_CODEPAGE_ENCODING[GetConsoleCP()] + console_encoding_out = MICROSOFT_CODEPAGE_ENCODING[GetConsoleOutputCP()] + + if sys.stderr.isatty() and isinstance(sys.stderr.buffer.raw, io.FileIO): + sys.stderr.reconfigure(encoding=console_encoding_out) + + if sys.stdout.isatty() and isinstance(sys.stdout.buffer.raw, io.FileIO): + sys.stdout.reconfigure(encoding=console_encoding_out) + + if sys.stdin.isatty() and isinstance(sys.stdin.buffer.raw, io.FileIO): + try: + sys.stdin.reconfigure(encoding=console_encoding_in) + except io.UnsupportedOperation as exc: + if sys.stdin.encoding != console_encoding_in: + logging.warning(exc) + if fixed_windows_console: return if wrapped_stdout is not None or wrapped_stderr is not None: diff --git a/colorama/win32.py b/colorama/win32.py index 841b0e27..9ed1d6a0 100644 --- a/colorama/win32.py +++ b/colorama/win32.py @@ -1,5 +1,9 @@ # Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. +import sys + +from . import windows_437 + # from winbase.h STDOUT = -11 STDERR = -12 @@ -105,6 +109,14 @@ def __str__(self): ] _SetConsoleMode.restype = wintypes.BOOL + _GetConsoleOutputCP = windll.kernel32.GetConsoleOutputCP + _GetConsoleOutputCP.argtypes = [] + _GetConsoleOutputCP.restype = wintypes.UINT + + _GetConsoleCP = windll.kernel32.GetConsoleCP + _GetConsoleCP.argtypes = [] + _GetConsoleCP.restype = wintypes.UINT + def _winapi_test(handle): csbi = CONSOLE_SCREEN_BUFFER_INFO() success = _GetConsoleScreenBufferInfo( @@ -178,3 +190,124 @@ def SetConsoleMode(handle, mode): success = _SetConsoleMode(handle, mode) if not success: raise ctypes.WinError() + + def GetConsoleCP(): + codepage = _GetConsoleCP() + if not codepage: + raise ctypes.WinError() + return codepage + + def GetConsoleOutputCP(): + codepage = _GetConsoleOutputCP() + if not codepage: + raise ctypes.WinError() + return codepage + +# https://learn.microsoft.com/en-us/windows/win32/intl/code-page-identifiers +MICROSOFT_CODEPAGE_ENCODING = { + 437: 'x-windows-437', + + 708: 'iso-8859-6', + 709: 'iso-9036', + 932: 'shift_jis', + 936: 'gb2312', + 950: 'big5', + 1047: 'x-ebcdic-latin1', + 1140: 'x-ebcdic-us-ca-eu', + 1141: 'x-ebcdic-de-eu', + 1142: 'x-ebcdic-dk-no-eu', + 1143: 'x-ebcdic-fi-se-eu', + 1144: 'x-ebcdic-it-eu', + 1145: 'x-ebcdic-es-eu', + 1146: 'x-ebcdic-gb-eu', + 1147: 'x-ebcdic-fr-eu', + 1148: 'x-ebcdic-int-eu', + 1149: 'x-ebcdic-is-eu', + 1200: 'utf-16le', + 1201: 'utf-16be', + 1250: 'windows-1250', + 1251: 'windows-1251', + 1252: 'windows-1252', + 1253: 'windows-1253', + 1254: 'windows-1254', + 1255: 'windows-1255', + 1256: 'windows-1256', + 1257: 'windows-1257', + 1258: 'windows-1258', + 1361: 'johab', + 10000: 'macintosh', + 10001: 'x-mac-japanese', + 10002: 'x-mac-trad-chinese', + 10003: 'x-mac-korean', + 10004: 'mac-arabic', + 10005: 'x-mac-hebrew', + 10006: 'mac-greek', + 10007: 'mac-cyrillic', + 10008: 'x-mac-simp-chinese', + 10010: 'mac-romanian', + 10017: 'x-mac-ukrainian', + 10021: 'x-mac-thai', + 10029: 'mac-centeuro', + 10079: 'mac-iceland', + 10081: 'mac-turkish', + 10082: 'mac-croatian', + 12000: 'utf-32le', + 12001: 'utf-32be', + 20000: 'x-chinese-cns', + 20001: 'x-cp20001', + 20002: 'x-chinese-eten', + 20105: 'x-ia5', + 20106: 'x-ia5-german', + 20107: 'x-ia5-swedish', + 20108: 'x-ia5-norwegian', + 20127: 'us-ascii', + 20277: 'x-ebcdic-dk-no', + 20278: 'x-ebcdic-fi-se', + 20280: 'x-ebcdic-it', + 20284: 'x-ebcdic-es', + 20285: 'x-ebcdic-gb', + 20290: 'x-ebcdic-jp-kana', + 20297: 'x-ebcdic-fr', + 20420: 'x-ebcdic-ar1', + 20423: 'x-ebcdic-gr', + 20833: 'x-ebcdic-koreanextended', + 20838: 'x-ebcdic-thai', + 20866: 'koi8-r', + 20932: 'euc-jp', + 20936: 'x-cp20936', + 20949: 'x-cp20949', + 21866: 'koi8-u', + 28591: 'iso-8859-1', + 28592: 'iso-8859-2', + 28593: 'iso-8859-3', + 28594: 'iso-8859-4', + 28595: 'iso-8859-5', + 28596: 'iso-8859-6', + 28597: 'iso-8859-7', + 28598: 'iso-8859-8', + 28599: 'iso-8859-9', + 28603: 'iso-8859-13', + 28605: 'iso-8859-15', + 38598: 'iso-8859-8-i', + 50220: 'iso-2022-jp', + 50221: 'csiso2022jp', + 50222: 'iso-2022-jp', + 50225: 'iso-2022-kr', + 50227: 'x-cp50227', + 51932: 'euc-jp', + 51936: 'euc-cn', + 51949: 'euc-kr', + 52936: 'hz-gb-2312', + 54936: 'gb18030', + 57002: 'x-iscii-de', + 57003: 'x-iscii-be', + 57004: 'x-iscii-ta', + 57005: 'x-iscii-te', + 57006: 'x-iscii-as', + 57007: 'x-iscii-or', + 57008: 'x-iscii-ka', + 57009: 'x-iscii-ma', + 57010: 'x-iscii-gu', + 57011: 'x-iscii-pa', + 65000: 'utf-7', + 65001: 'utf-8'} diff --git a/colorama/windows_437.py b/colorama/windows_437.py new file mode 100644 index 00000000..398a9f8e --- /dev/null +++ b/colorama/windows_437.py @@ -0,0 +1,108 @@ +# Copyright James Edington Administrator 2025. BSD 3-Clause license, see LICENSE file. + +import codecs +from encodings import cp437 as _psf_cp437, normalize_encoding + + +def _windows_437_search_function(encoding): + if normalize_encoding(encoding) == 'x_windows_437': + return _Windows437CodecInfo + return None + +codecs.register(_windows_437_search_function) + +# https://github.com/python/cpython/blob/v3.13.2/Lib/encodings/cp437.py#L9 +class _Windows437Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,_windows_437_encoding_map) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,_windows_437_decoding_map) + +class _Windows437IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,_windows_437_encoding_map)[0] + +class _Windows437IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,_windows_437_decoding_map)[0] + +class _Windows437StreamWriter(_Windows437Codec,codecs.StreamWriter): + pass + +class _Windows437StreamReader(_Windows437Codec,codecs.StreamReader): + pass + +# https://github.com/python/cpython/blob/v3.13.2/Lib/encodings/cp437.py#L34 +_Windows437CodecInfo = codecs.CodecInfo( + name='x-windows-437', + encode=_Windows437Codec().encode, + decode=_Windows437Codec().decode, + incrementalencoder=_Windows437IncrementalEncoder, + incrementaldecoder=_Windows437IncrementalDecoder, + streamreader=_Windows437StreamReader, + streamwriter=_Windows437StreamWriter, +) + + +_windows_437_encoding_map = _psf_cp437.encoding_map | { + 0x263a: 0x01, + 0x263b: 0x02, + 0x2665: 0x03, + 0x2666: 0x04, + 0x2663: 0x05, + 0x2660: 0x06, + 0x25cb: 0x09, + 0x2642: 0x0b, + 0x2640: 0x0c, + 0x266b: 0x0e, + 0x263c: 0x0f, + 0x25ba: 0x10, + 0x25c4: 0x11, + 0x2195: 0x12, + 0x203c: 0x13, + 0x00b6: 0x14, + 0x00a7: 0x15, + 0x25ac: 0x16, + 0x21a8: 0x17, + 0x2191: 0x18, + 0x2193: 0x19, + 0x2192: 0x1a, + 0x2190: 0x1b, + 0x221f: 0x1c, + 0x2194: 0x1d, + 0x25b2: 0x1e, + 0x25bc: 0x1d, +} + + +_windows_437_decoding_map = _psf_cp437.decoding_map | { + 0x01: 0x263a, + 0x02: 0x263b, + 0x03: 0x2665, + 0x04: 0x2666, + 0x05: 0x2663, + 0x06: 0x2660, + 0x09: 0x25cb, + 0x0b: 0x2642, + 0x0c: 0x2640, + 0x0e: 0x266b, + 0x0f: 0x263c, + 0x10: 0x25ba, + 0x11: 0x25c4, + 0x12: 0x2195, + 0x13: 0x203c, + 0x14: 0x00b6, + 0x15: 0x00a7, + 0x16: 0x25ac, + 0x17: 0x21a8, + 0x18: 0x2191, + 0x19: 0x2193, + 0x1a: 0x2192, + 0x1b: 0x2190, + 0x1c: 0x221f, + 0x1d: 0x2194, + 0x1e: 0x25b2, + 0x1d: 0x25bc, +}