#!/usr/bin/env python ## ## skk2cdb.py - convertion tool for SKK dictionary. ## by Yusuke Shinyama ## * public domain * ## ## usage: skk2cdb.py [-f] outfile [infile ...] ## options: -f: force overwriting. ## ## example: ## $ skk2cdb.py SKK-JISYO.L.cdb SKK-JISYO.L ## # taken from pycdb.py - Python implementation of cdb import sys, os from struct import pack, unpack from array import array # calc hash value with a given key def cdbhash(s, n=0L): return reduce(lambda h,c: ((h*33) ^ ord(c)) & 0xffffffffL, s, n+5381L) if pack('=i',1) == pack('>i',1): # big endian def decode(x): a = array('I', x) a.byteswap() return a def encode(a): a.byteswap() return a.tostring() else: # little endian def decode(x): a = array('I', x) return a def encode(a): return a.tostring() ## CDB ## # cdbiter def cdbiter(fp, eod): kloc = 2048 while kloc < eod: fp.seek(kloc) (klen, vlen) = unpack('> 8) % ncells) * 2 n = ncells*2 for _ in xrange(ncells): p1 = hs[i+1] if p1 == 0: raise KeyError(k) if hs[i] == h: self._fp.seek(p1) (klen, vlen) = unpack('> 8) % blen)*2 while a[i+1]: # is cell[i] already occupied? i = (i+2) % len(a) a[i] = h a[i+1] = p self._fp.write(encode(a)) # write header self._fp.seek(0) a = array('I') for b1 in self._bucket: a.append(pos_hash) a.append(len(b1)) pos_hash += len(b1)*8 self._fp.write(encode(a)) # close self._fp.close() os.rename(self.fntmp, self.fn) return # txt2cdb def txt2cdb(self, lines): import re HEAD = re.compile(r'^\+(\d+),(\d+):') for line in lines: m = HEAD.match(line) if not m: break (klen, vlen) = (int(m.group(1)), int(m.group(2))) i = len(m.group(0)) k = line[i:i+klen] i += klen if line[i:i+2] != '->': raise ValueError('invalid separator: %r' % line) i += 2 v = line[i:i+vlen] self.add(k, v) return self # cdbdump def cdbdump(cdbname): fp = file(cdbname, 'rb') (eor,) = unpack('>sys.stderr, 'file exists: %r' % outfile return 1 # maker = CDBMaker(outfile, outfile+'.tmp') for line in fileinput.input(args): line = line.strip() if line.startswith(';'): continue try: i = line.index(' ') except ValueError: continue (k,v) = (line[:i], line[i+1:]) maker.add(k, v) maker.finish() return if __name__ == '__main__': sys.exit(main(sys.argv))