diff --git a/MyQR/mylibs/constant.py b/MyQR/mylibs/constant.py index d7b7f3d..f6120fa 100644 --- a/MyQR/mylibs/constant.py +++ b/MyQR/mylibs/constant.py @@ -41,7 +41,7 @@ [(1, 19, 0, 0), (1, 16, 0, 0), (1, 13, 0, 0), (1, 9, 0, 0)], [(1, 34, 0, 0), (1, 28, 0, 0), (1, 22, 0, 0), (1, 16, 0, 0)], [(1, 55, 0, 0), (1, 44, 0, 0), (2, 17, 0, 0), (2, 13, 0, 0)], [(1, 80, 0, 0), (2, 32, 0, 0), (2, 24, 0, 0), (4, 9, 0, 0)], [(1, 108, 0, 0), (2, 43, 0, 0), (2, 15, 2, 16), (2, 11, 2, 12)], [(2, 68, 0, 0), (4, 27, 0, 0), (4, 19, 0, 0), (4, 15, 0, 0)], [(2, 78, 0, 0), (4, 31, 0, 0), (2, 14, 4, 15), (4, 13, 1, 14)], [(2, 97, 0, 0), (2, 38, 2, 39), (4, 18, 2, 19), (4, 14, 2, 15)], [(2, 116, 0, 0), (3, 36, 2, 37), (4, 16, 4, 17), (4, 12, 4, 13)], [(2, 68, 2, 69), (4, 43, 1, 44), (6, 19, 2, 20), (6, 15, 2, 16)], [(4, 81, 0, 0), (1, 50, 4, 51), (4, 22, 4, 23), (3, 12, 8, 13)], [(2, 92, 2, 93), (6, 36, 2, 37), (4, 20, 6, 21), (7, 14, 4, 15)], [(4, 107, 0, 0), (8, 37, 1, 38), (8, 20, 4, 21), (12, 11, 4, 12)], [(3, 115, 1, 116), (4, 40, 5, 41), (11, 16, 5, 17), (11, 12, 5, 13)], [(5, 87, 1, 88), (5, 41, 5, 42), (5, 24, 7, 25), (11, 12, 7, 13)], [(5, 98, 1, 99), (7, 45, 3, 46), (15, 19, 2, 20), (3, 15, 13, 16)], [(1, 107, 5, 108), (10, 46, 1, 47), (1, 22, 15, 23), (2, 14, 17, 15)], [(5, 120, 1, 121), (9, 43, 4, 44), (17, 22, 1, 23), (2, 14, 19, 15)], [(3, 113, 4, 114), (3, 44, 11, 45), (17, 21, 4, 22), (9, 13, 16, 14)], [(3, 107, 5, 108), (3, 41, 13, 42), (15, 24, 5, 25), (15, 15, 10, 16)], [(4, 116, 4, 117), (17, 42, 0, 0), (17, 22, 6, 23), (19, 16, 6, 17)], [(2, 111, 7, 112), (17, 46, 0, 0), (7, 24, 16, 25), (34, 13, 0, 0)], [(4, 121, 5, 122), (4, 47, 14, 48), (11, 24, 14, 25), (16, 15, 14, 16)], [(6, 117, 4, 118), (6, 45, 14, 46), (11, 24, 16, 25), (30, 16, 2, 17)], [(8, 106, 4, 107), (8, 47, 13, 48), (7, 24, 22, 25), (22, 15, 13, 16)], [(10, 114, 2, 115), (19, 46, 4, 47), (28, 22, 6, 23), (33, 16, 4, 17)], [(8, 122, 4, 123), (22, 45, 3, 46), (8, 23, 26, 24), (12, 15, 28, 16)], [(3, 117, 10, 118), (3, 45, 23, 46), (4, 24, 31, 25), (11, 15, 31, 16)], [(7, 116, 7, 117), (21, 45, 7, 46), (1, 23, 37, 24), (19, 15, 26, 16)], [(5, 115, 10, 116), (19, 47, 10, 48), (15, 24, 25, 25), (23, 15, 25, 16)], [(13, 115, 3, 116), (2, 46, 29, 47), (42, 24, 1, 25), (23, 15, 28, 16)], [(17, 115, 0, 0), (10, 46, 23, 47), (10, 24, 35, 25), (19, 15, 35, 16)], [(17, 115, 1, 116), (14, 46, 21, 47), (29, 24, 19, 25), (11, 15, 46, 16)], [(13, 115, 6, 116), (14, 46, 23, 47), (44, 24, 7, 25), (59, 16, 1, 17)], [(12, 121, 7, 122), (12, 47, 26, 48), (39, 24, 14, 25), (22, 15, 41, 16)], [(6, 121, 14, 122), (6, 47, 34, 48), (46, 24, 10, 25), (2, 15, 64, 16)], [(17, 122, 4, 123), (29, 46, 14, 47), (49, 24, 10, 25), (24, 15, 46, 16)], [(4, 122, 18, 123), (13, 46, 32, 47), (48, 24, 14, 25), (42, 15, 32, 16)], [(20, 117, 4, 118), (40, 47, 7, 48), (43, 24, 22, 25), (10, 15, 67, 16)], [(19, 118, 6, 119), (18, 47, 31, 48), (34, 24, 34, 25), (20, 15, 61, 16)] ] -mode_indicator = {'numeric': '0001', 'alphanumeric': '0010', 'byte': '0100', 'kanji': '1000'} +mode_indicator = {'numeric': '0001', 'alphanumeric': '0010', 'byte': '0100', 'kanji': '1101'} diff --git a/MyQR/mylibs/data.py b/MyQR/mylibs/data.py index e03faba..e0ed4d7 100644 --- a/MyQR/mylibs/data.py +++ b/MyQR/mylibs/data.py @@ -1,21 +1,67 @@ # -*- coding: utf-8 -*- +import re from MyQR.mylibs.constant import char_cap, required_bytes, mindex, lindex, num_list, alphanum_list, grouping_list, mode_indicator - + +RE_numpat = re.compile('[0-9]+') +RE_alpnumpat = re.compile(r'[' + re.escape('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ $%*+-./:') + r']+') +RE_chinesepat = re.compile('[\u4E00-\u9FA5\uFB00-\uFFFD\u3000-\u303F]+') +RE_bytespat = re.compile('[\x00-\xff]+') + +ModeNameList = ['numeric', 'alphanumeric', 'byte', 'kanji'] + +ModePattern = { + 'numeric':RE_numpat, + 'alphanumeric':RE_alpnumpat, + 'byte':RE_bytespat, + 'kanji':RE_chinesepat +} + # ecl: Error Correction Level(L,M,Q,H) def encode(ver, ecl, str): mode_encoding = { 'numeric': numeric_encoding, 'alphanumeric': alphanumeric_encoding, 'byte': byte_encoding, - 'kanji': kanji_encoding + 'kanji': chinese_encoding } - - ver, mode = analyse(ver, ecl, str) + # analyse all modes. + tmpstr = str + code_list = [] + while tmpstr: + mode, span = analyse(tmpstr) + if not mode: + break + print('line 16: mode:', mode) + code_list.append({ + 'str':tmpstr[span], + 'mode':mode, + 'mode_encoding':mode_encoding[mode](tmpstr[span])}) + tmpstr = tmpstr[span.stop:] - print('line 16: mode:', mode) + totalbits = get_required_bits(ver, code_list) + + tmpver = ver + while 8*required_bytes[tmpver-1][lindex[ecl]] < totalbits: + tmpver += 1 + if tmpver > ver: + ver = tmpver - code = mode_indicator[mode] + get_cci(ver, mode, str) + mode_encoding[mode](str) + # try it again + totalbits = get_required_bits(ver, code_list) + while 8*required_bytes[tmpver-1][lindex[ecl]] < totalbits: + tmpver += 1 + if tmpver > ver: + ver = tmpver + + code = '' + for code_item in code_list: + str, mode, mode_encoding = code_item['str'], code_item['mode'], code_item['mode_encoding'] + if mode == 'kanji': + code += mode_indicator[mode] + '0001' + get_cci(ver, mode, str) + mode_encoding + else: + code += mode_indicator[mode] + get_cci(ver, mode, str) + mode_encoding + # code = mode_indicator[mode] + get_cci(ver, mode, str) + mode_encoding[mode](str) # Add a Terminator rqbits = 8 * required_bytes[ver-1][lindex[ecl]] @@ -44,22 +90,20 @@ def encode(ver, ecl, str): return ver, data_codewords -def analyse(ver, ecl, str): - if all(i in num_list for i in str): - mode = 'numeric' - elif all(i in alphanum_list for i in str): - mode = 'alphanumeric' - else: - mode = 'byte' - - m = mindex[mode] - l = len(str) - for i in range(40): - if char_cap[ecl][i][m] > l: - ver = i + 1 if i+1 > ver else ver +def analyse(data): + # analyse mode + if not data: + return None + mode = None + for mode_name in ModeNameList: + match = ModePattern[mode_name].match(data) + if match: + mode = mode_name break - - return ver, mode + if not mode: + raise ValueError('Invalid data to match: %s' % data) + span = match.span() + return mode, slice(span[0], span[1]) def numeric_encoding(str): str_list = [str[i:i+3] for i in range(0,len(str),3)] @@ -96,9 +140,37 @@ def byte_encoding(str): code += c return code -def kanji_encoding(str): - pass - +def chinese_encoding(str): + gb2312 = str.encode('gb2312') + code = '' + span1 = (0xa1,0xaa) + span2 = (0xb0,0xfa) + spansecond = (0xa1,0xfe) + for i in range(0, len(gb2312), 2): + first,second = gb2312[i], gb2312[i+1] + if span1[0] <= first <= span1[1]: + if not spansecond[0] <= second <= spansecond[1]: + raise ValueError('Invalid chinese character : %s' % str(i//2)) + # type 1 + first = first - 0xa1 + second = second - 0xa1 + tmpcode = bin(first*0x60+second)[2:] + tmpcode_len = len(tmpcode) + tmpcode = '0'*(13-tmpcode_len) + tmpcode + code += tmpcode + + elif span2[0] <= first <= span2[1]: + if not spansecond[0] <= second <= spansecond[1]: + raise ValueError('Invalid chinese character : %s' % str(i//2)) + # type 2 + first = first - 0xa6 + second = second - 0xa1 + tmpcode = bin(first*0x60+second)[2:] + tmpcode_len = len(tmpcode) + tmpcode = '0'*(13-tmpcode_len) + tmpcode + code += tmpcode + return code + # cci: character count indicator def get_cci(ver, mode, str): if 1 <= ver <= 9: @@ -112,6 +184,17 @@ def get_cci(ver, mode, str): cci = '0' * (cci_len - len(cci)) + cci return cci +def get_required_bits(ver, code_list): + totalbits = 0 + for i in code_list: + if i['mode'] == 'kanji': + totalbits += 8 + else: + totalbits += 4 + totalbits += len(get_cci(ver, i['mode'], i['str'])) + totalbits += len(i['mode_encoding']) + return totalbits + if __name__ == '__main__': s = '123456789' v, datacode = encode(1, 'H', s) diff --git a/MyQR/myqr.py b/MyQR/myqr.py index 458bb4e..55e4988 100644 --- a/MyQR/myqr.py +++ b/MyQR/myqr.py @@ -25,7 +25,9 @@ def run(words, version=1, level='H', picture=None, colorized=False, contrast=1.0 # check every parameter - if not isinstance(words, str) or any(i not in supported_chars for i in words): + # if not isinstance(words, str) or any(i not in supported_chars for i in words): + # raise ValueError('Wrong words! Make sure the characters are supported!') + if not isinstance(words, str) or (any(i not in supported_chars for i in words) and not any( 0x4E00 <= ord(i) <= 0x9fA5 or 0xFB00 <= ord(i) <=FFFD or 0x3000<= ord(i) <= 0x303F for i in words )): raise ValueError('Wrong words! Make sure the characters are supported!') if not isinstance(version, int) or version not in range(1, 41): raise ValueError('Wrong version! Please choose a int-type value from 1 to 40!')