bitstringを使ってみた
$ sqlite3 test.db
SQLite version 3.7.7.1 2011-06-28 17:39:05
Enter ".help" for instructions
Enter SQL statements terminated with a ";"
sqlite> CREATE TABLE Products(name text, price integer, locale text);
sqlite> CREATE TABLE People(name text, age integer);
sqlite> CREATE TABLE Tbl(one text, two text, three integer, four integer);
sqlite> .exit
のようにして作ったファイルを読み込むスクリプトをbitstringを使って書きました。使い方は
$./bitstring_test.py test.db
type=table, name=Products, tbl_name=Products, rootpage=2, sql=CREATE TABLE Products(name text, price integer, locale text)
type=table, name=People, tbl_name=People, rootpage=3, sql=CREATE TABLE People(name text, age integer)
type=table, name=Tbl, tbl_name=Tbl, rootpage=4, sql=CREATE TABLE Tbl(one text, two text, three integer, four integer)
です。
#!/usr/bin/env python2.6 import bitstring HEADER_OFFSET_PAGE1 = 100 INTKEY = 0x01 ZERO_DATA = 0x02 LEAF_DATA = 0x04 LEAF = 0x08 def get2byte(fp): return fp.read('uint:8') << 8 | fp.read('uint:8') def get_pagesize(fp): fp.pos = 16*8 return fp.read('uint:8') << 8 | fp.read('uint:8') << 16 def get_cellsize(fp): fp.pos = (HEADER_OFFSET_PAGE1 + 3)*8 return get2byte(fp) def get_pagetype(fp): fp.pos = (HEADER_OFFSET_PAGE1)*8 return fp.read('uint:8') class Record(object): def __init__(self, type, name, tbl_name, rootpage, sql): self.type = type self.name = name self.tbl_name = tbl_name self.rootpage = rootpage self.sql =sql def __repr__(self): return "type=%s, name=%s, tbl_name=%s, rootpage=%d, sql=%s" \ % (self.type, self.name, self.tbl_name, self.rootpage, self.sql) def find_record(fp, idx): cell_offset = find_cell_offset(fp, idx) fp.pos = cell_offset*8 n = 0 offsets = [] nPayload, tn = getVarint(fp) n += tn intKey, tn = getVarint(fp) assert(idx+1 == intKey) n += tn cell_hdr_offset = n toff, tn = getVarint(fp) offsets.append(toff) for i in range(5): serial_type, tn = getVarint(fp) n += tn offsets.append(get_fieldsize(serial_type)) fp.pos = (cell_offset + cell_hdr_offset + offsets[0])*8 type = fp.read('bytes:%d' % offsets[1]) name = fp.read('bytes:%d' % offsets[2]) tbl_name = fp.read('bytes:%d' % offsets[3]) rootpage = fp.read('int:%d' % (offsets[4]*8)) sql = fp.read('bytes:%d' % (offsets[5])) tot = 0 for i in offsets: tot += i assert(tot == nPayload) return Record(type, name, tbl_name, rootpage, sql) def find_cell_offset(fp, idx): mask = get_pagesize(fp) - 0x01 celloffset = (HEADER_OFFSET_PAGE1+8)*8 fp.pos = celloffset + idx*16 return mask & get2byte(fp) def getVarint(fp): v = fp.read('uint:8') if not (v & 0x80): return v, 1 p = fp.read('uint:8') if not (p & 0x80): v &= 0x7f v <<= 7 v |= p & 0x7f return v, 2 raise Exception('too long') SIZE = [0,1,2,3,4,6,8,8,0,0,0,0] def get_fieldsize(serial_type): if serial_type >= 12: return (serial_type-12)/2 else: return SIZE[serial_type]; import sys if __name__ == '__main__': if len(sys.argv) != 2: print "usage:%s dabasefile" % sys.argv[0] fp = bitstring.ConstBitStream(filename=sys.argv[1]) assert(INTKEY|LEAF_DATA|LEAF == get_pagetype(fp)) for idx in range(get_cellsize(fp)): print find_record(fp, idx)