-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate_encodings.py
123 lines (103 loc) · 3.47 KB
/
generate_encodings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import sys
from asyncio import open_connection, run
from pathlib import Path
from protocol.application import MySQL
from protocol.async_support import create_stream_reader, create_stream_writer
async def collect_data(port: int):
reader, writer = await open_connection(
host='127.0.0.1',
port=port,
)
mysql = MySQL(
create_stream_writer(writer, 2),
create_stream_reader(reader, 2),
)
await mysql.connect(
username='root',
password='local',
database='information_schema',
charset='utf8mb4',
)
rs = await mysql.query(
"""
SELECT *
FROM (
SELECT id AS id,
character_set_name AS name,
character_set_name AS parent
FROM collations
WHERE is_default = 'Yes'
UNION
SELECT id AS id,
collation_name AS name,
character_set_name AS parent
FROM collations
) t
ORDER BY id;
"""
)
charsets = {}
python_charsets = {}
for result in rs.rows:
col_id = result['id']
col_name = result['name']
col_parent = result['parent']
if col_parent in ('utf8mb4', 'utf8mb3'):
col_parent = 'utf8'
elif col_parent == 'latin1':
# https://dev.mysql.com/doc/refman/8.0/en/charset-we-sets.html
col_parent = 'cp1252'
elif col_parent == 'koi8r':
# https://docs.python.org/3.8/library/codecs.html#standard-encodings
col_parent = 'koi8_r'
elif col_parent == 'koi8u':
col_parent = 'koi8_u'
elif col_parent == 'ucs2':
# https://en.wikipedia.org/wiki/UTF-16
col_parent = 'utf16'
elif col_parent == 'utf16le':
col_parent = 'utf-16-le'
charsets[col_name] = int(col_id)
try:
'a'.encode(col_parent)
python_charsets[col_name] = col_parent
except LookupError:
print('Unsupported', col_name)
continue
return charsets, python_charsets
async def main():
print('MYSQL')
charsets, python_charsets = await collect_data(3306)
print('MARIADB')
charsets_maria, python_charsets_maria = await collect_data(3307)
charsets.update(charsets_maria)
python_charsets.update(python_charsets_maria)
reverse = {}
for k, v in charsets.items():
if v not in reverse and k in python_charsets:
reverse[v] = python_charsets[k]
reverse_charsets = {}
for k, v in charsets.items():
if v not in reverse_charsets:
reverse_charsets[v] = k
elif len(reverse_charsets[v]) < len(k):
reverse_charsets[v] = k
with open(Path(sys.argv[0]).parent / 'protocol' / 'charsets.py', 'w') as f:
f.write('CHARSETS = {\n')
for k, v in charsets.items():
f.write(f" '{k}': {v},\n")
f.write('}\n\n')
f.write('CHARSETS_REVERSE = {\n')
for k, v in reverse_charsets.items():
f.write(f" {k}: '{v}',\n")
f.write('}\n\n')
f.write('PYTHON_CHARSETS = {\n')
for k, v in python_charsets.items():
f.write(f" '{k}': '{v}',\n")
f.write('}\n\n')
f.write('PYTHON_CHARSETS_FROM_CODE = {\n')
for k, v in reverse.items():
f.write(f" {k}: '{v}',\n")
f.write('}\n')
if __name__ == '__main__':
run(main())