Skip to content

Commit 30a2f2d

Browse files
authoredFeb 17, 2021
Merge pull request #20 from kusakata/master
Make skk2cdb.py compatible to Python 3
2 parents fc6b8dd + d71f513 commit 30a2f2d

File tree

1 file changed

+23
-22
lines changed

1 file changed

+23
-22
lines changed
 

‎skk2cdb.py

+23-22
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/usr/bin/env python
1+
#!/usr/bin/env python3
22
##
33
## skk2cdb.py - convertion tool for SKK dictionary.
44
## by Yusuke Shinyama
@@ -16,11 +16,12 @@
1616
import sys, os
1717
from struct import pack, unpack
1818
from array import array
19+
from functools import reduce
1920

2021

2122
# calc hash value with a given key
22-
def cdbhash(s, n=0L):
23-
return reduce(lambda h,c: ((h*33) ^ ord(c)) & 0xffffffffL, s, n+5381L)
23+
def cdbhash(s, n=0):
24+
return reduce(lambda h,c: ((h*33) ^ ord(c)) & 0xffffffff, s, n+5381)
2425

2526
if pack('=i',1) == pack('>i',1):
2627
# big endian
@@ -30,14 +31,14 @@ def decode(x):
3031
return a
3132
def encode(a):
3233
a.byteswap()
33-
return a.tostring()
34+
return a
3435
else:
3536
# little endian
3637
def decode(x):
3738
a = array('I', x)
3839
return a
3940
def encode(a):
40-
return a.tostring()
41+
return a
4142

4243

4344
## CDB
@@ -62,9 +63,9 @@ class CDBReader:
6263

6364
def __init__(self, cdbname, docache=1):
6465
self.name = cdbname
65-
self._fp = file(cdbname, 'rb')
66+
self._fp = open(cdbname, 'rb')
6667
hash0 = decode(self._fp.read(2048))
67-
self._hash0 = [ (hash0[i], hash0[i+1]) for i in xrange(0, 512, 2) ]
68+
self._hash0 = [ (hash0[i], hash0[i+1]) for i in range(0, 512, 2) ]
6869
self._hash1 = [ None ] * 256
6970
(self._eod,_) = self._hash0[0]
7071
self._docache = docache
@@ -93,7 +94,7 @@ def __getitem__(self, k):
9394
self._hash1[h1] = hs
9495
i = ((h >> 8) % ncells) * 2
9596
n = ncells*2
96-
for _ in xrange(ncells):
97+
for _ in range(ncells):
9798
p1 = hs[i+1]
9899
if p1 == 0: raise KeyError(k)
99100
if hs[i] == h:
@@ -122,7 +123,7 @@ def has_key(self, k):
122123
return False
123124

124125
def __contains__(self, k):
125-
return self.has_key(k)
126+
return k in self
126127

127128
def firstkey(self):
128129
self._keyiter = None
@@ -132,15 +133,15 @@ def nextkey(self):
132133
if not self._keyiter:
133134
self._keyiter = ( k for (k,v) in cdbiter(self._fp, self._eod) )
134135
try:
135-
return self._keyiter.next()
136+
return next(self._keyiter)
136137
except StopIteration:
137138
return None
138139

139140
def each(self):
140141
if not self._eachiter:
141142
self._eachiter = cdbiter(self._fp, self._eod)
142143
try:
143-
return self._eachiter.next()
144+
return next(self._eachiter)
144145
except StopIteration:
145146
return None
146147

@@ -159,9 +160,9 @@ def __init__(self, cdbname, tmpname):
159160
self.fn = cdbname
160161
self.fntmp = tmpname
161162
self.numentries = 0
162-
self._fp = file(tmpname, 'wb')
163+
self._fp = open(tmpname, 'wb')
163164
self._pos = 2048 # sizeof((h,p))*256
164-
self._bucket = [ array('I') for _ in xrange(256) ]
165+
self._bucket = [ array('I') for _ in range(256) ]
165166
return
166167

167168
def __len__(self):
@@ -178,8 +179,8 @@ def add(self, k, v):
178179
(klen, vlen) = (len(k), len(v))
179180
self._fp.seek(self._pos)
180181
self._fp.write(pack('<II', klen, vlen))
181-
self._fp.write(k)
182-
self._fp.write(v)
182+
self._fp.write(k.encode())
183+
self._fp.write(v.encode())
183184
h = cdbhash(k)
184185
b = self._bucket[h % 256]
185186
b.append(h)
@@ -197,7 +198,7 @@ def finish(self):
197198
if not b1: continue
198199
blen = len(b1)
199200
a = array('I', [0]*blen*2)
200-
for j in xrange(0, blen, 2):
201+
for j in range(0, blen, 2):
201202
(h,p) = (b1[j],b1[j+1])
202203
i = ((h >> 8) % blen)*2
203204
while a[i+1]: # is cell[i] already occupied?
@@ -238,7 +239,7 @@ def txt2cdb(self, lines):
238239

239240
# cdbdump
240241
def cdbdump(cdbname):
241-
fp = file(cdbname, 'rb')
242+
fp = open(cdbname, 'rb')
242243
(eor,) = unpack('<I', fp.read(4))
243244
return cdbiter(fp, eor)
244245

@@ -248,7 +249,7 @@ def cdbmerge(iters):
248249
q = []
249250
for it in iters:
250251
try:
251-
q.append((it.next(),it))
252+
q.append((next(it),it))
252253
except StopIteration:
253254
pass
254255
k0 = None
@@ -262,7 +263,7 @@ def cdbmerge(iters):
262263
vs.append(v)
263264
k0 = k
264265
try:
265-
q.append((it.next(),it))
266+
q.append((next(it),it))
266267
except StopIteration:
267268
continue
268269
if vs: yield (k0,vs)
@@ -280,7 +281,7 @@ def main(argv):
280281
import fileinput
281282
import os.path
282283
def usage():
283-
print 'usage: %s [-f] outfile [infile ...]' % argv[0]
284+
print('usage: %s [-f] outfile [infile ...]' % argv[0])
284285
return 100
285286
try:
286287
(opts, args) = getopt.getopt(argv[1:], 'dfo:')
@@ -292,11 +293,11 @@ def usage():
292293
if not args: return usage()
293294
outfile = args.pop(0)
294295
if not force and os.path.exists(outfile):
295-
print >>sys.stderr, 'file exists: %r' % outfile
296+
print('file exists: %r' % outfile, file=sys.stderr)
296297
return 1
297298
#
298299
maker = CDBMaker(outfile, outfile+'.tmp')
299-
for line in fileinput.input(args):
300+
for line in fileinput.input(args, openhook=fileinput.hook_encoded('euc-jp')):
300301
line = line.strip()
301302
if line.startswith(';'): continue
302303
try:

0 commit comments

Comments
 (0)
Please sign in to comment.