forked from kvichans/cuda_find_in_files4
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathencodings.py
82 lines (81 loc) · 4.37 KB
/
encodings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
''' Data from Py-documentation (7.2.3. Standard Encodings)
Arabic/Chinese/Japanese/Korean/... codepages are skipped
'''
# Codec Aliases Languages/Purpose
ENCODINGS = [
['mbcs', 'ANSI, dbcs', 'Win only: Encode operand according to the ANSI codepage (CP_ACP)']
,['cp866', '866, IBM866', 'Russian']
,['cp1251', 'windows-1251', 'Bulgarian, Byelorussian, Macedonian, Russian, Serbian']
,['ascii', '646, us-ascii', 'English']
,['cp037', 'IBM037, IBM039', 'English']
,['cp424', 'EBCDIC-CP-HE, IBM424', 'Hebrew']
,['cp437', '437, IBM437', 'English']
,['cp500', 'EBCDIC-CP-BE, EBCDIC-CP-CH, IBM500', 'Western Europe']
,['cp737', '', 'Greek']
,['cp775', 'IBM775', 'Baltic languages']
,['cp850', '850, IBM850', 'Western Europe']
,['cp852', '852, IBM852', 'Central and Eastern Europe']
,['cp855', '855, IBM855', 'Bulgarian, Byelorussian, Macedonian, Russian, Serbian']
,['cp856', '', 'Hebrew']
,['cp857', '857, IBM857', 'Turkish']
,['cp858', '858, IBM858', 'Western Europe']
,['cp860', '860, IBM860', 'Portuguese']
,['cp861', '861, CP-IS, IBM861', 'Icelandic']
,['cp862', '862, IBM862', 'Hebrew']
,['cp863', '863, IBM863', 'Canadian']
,['cp865', '865, IBM865', 'Danish, Norwegian']
,['cp869', '869, CP-GR, IBM869', 'Greek']
,['cp875', '', 'Greek']
,['cp1026', 'ibm1026', 'Turkish']
,['cp1140', 'ibm1140', 'Western Europe']
,['cp1250', 'windows-1250', 'Central and Eastern Europe']
,['cp1252', 'windows-1252', 'Western Europe']
,['cp1253', 'windows-1253', 'Greek']
,['cp1254', 'windows-1254', 'Turkish']
,['cp1255', 'windows-1255', 'Hebrew']
,['cp1257', 'windows-1257', 'Baltic languages']
,['cp65001', '', 'Win only: Windows UTF-8 (CP_UTF8)']
,['latin_1', 'iso-8859-1, iso8859-1, 8859, cp819, latin, latin1, L1', 'West Europe']
,['iso8859_2', 'iso-8859-2, latin2, L2', 'Central and Eastern Europe']
,['iso8859_3', 'iso-8859-3, latin3, L3', 'Esperanto, Maltese']
,['iso8859_4', 'iso-8859-4, latin4, L4', 'Baltic languages']
,['iso8859_5', 'iso-8859-5, cyrillic', 'Bulgarian, Byelorussian, Macedonian, Russian, Serbian']
,['iso8859_7', 'iso-8859-7, greek, greek8','Greek']
,['iso8859_8', 'iso-8859-8, hebrew', 'Hebrew']
,['iso8859_9', 'iso-8859-9, latin5, L5', 'Turkish']
,['iso8859_10', 'iso-8859-10, latin6, L6', 'Nordic languages']
,['iso8859_13', 'iso-8859-13, latin7, L7', 'Baltic languages']
,['iso8859_14', 'iso-8859-14, latin8, L8', 'Celtic languages']
,['iso8859_15', 'iso-8859-15, latin9, L9', 'Western Europe']
,['iso8859_16', 'iso-8859-16, latin10, L10','South-Eastern Europe']
,['koi8_r', '', 'Russian']
,['koi8_u', '', 'Ukrainian']
,['mac_cyrillic','maccyrillic', 'Bulgarian, Byelorussian, Macedonian, Russian, Serbian']
,['mac_greek', 'macgreek', 'Greek']
,['mac_iceland','maciceland', 'Icelandic']
,['mac_latin2', 'maclatin2, maccentraleurope', 'Central and Eastern Europe']
,['mac_roman', 'macroman, macintosh', 'Western Europe']
,['mac_turkish','macturkish', 'Turkish']
,['ptcp154', 'csptcp154, pt154, cp154, cyrillic-asian', 'Kazakh']
,['utf_32', 'U32, utf32', 'all languages']
,['utf_32_be', 'UTF-32BE', 'all languages']
,['utf_32_le', 'UTF-32LE', 'all languages']
,['utf_16', 'U16, utf16', 'all languages']
,['utf_16_be', 'UTF-16BE', 'all languages']
,['utf_16_le', 'UTF-16LE', 'all languages']
,['utf_7', 'U7, unicode-1-1-utf-7','all languages']
,['utf_8', 'U8, UTF, utf8', 'all languages']
,['utf_8_sig', '', 'all languages']
]
def get_encoding_names():
# return ['{}\t{}'.format(
# codec[0]
# , ('({}) '.format(codec[1]) if codec[1]!='' else '')+codec[2]
# )
# for codec in ENCODINGS]
return ['{}\t{}'.format(
codec[0]+(' ({}) '.format(codec[1]) if codec[1]!='' else '')
, codec[2]
)
for codec in ENCODINGS]
#def get_encoding_names