sqlite_masterの中身を読み込む(2)
前回のものはページタイプ(INTKEY|LEAF_DATA|LEAF)のときのみ読み込めるスクリプトでしたが、ページタイプが(INTKEY|LEAF_DATA)のときも読み込めるように変更しました。例えばtracのデータベースファイルを読み込んでみると、次のように表示されます。
rowid=1, type=table, name=system, tbl_name=system, rootpage=2, sql=CREATE TABLE system (
name text PRIMARY KEY,
value text
)
rowid=2, type=index, name=sqlite_autoindex_system_1, tbl_name=system, rootpage=3, sql=None
rowid=3, type=table, name=permission, tbl_name=permission, rootpage=4, sql=CREATE TABLE permission (
username text,
action text,
UNIQUE (username,action)
)
rowid=4, type=index, name=sqlite_autoindex_permission_1, tbl_name=permission, rootpage=5, sql=None
rowid=5, type=table, name=auth_cookie, tbl_name=auth_cookie, rootpage=6, sql=CREATE TABLE auth_cookie (
cookie text,
name text,
ipnr text,
time integer,
UNIQUE (cookie,ipnr,name)
)
rowid=6, type=index, name=sqlite_autoindex_auth_cookie_1, tbl_name=auth_cookie, rootpage=7, sql=None
rowid=7, type=table, name=session, tbl_name=session, rootpage=8, sql=CREATE TABLE session (
sid text,
authenticated integer,
last_visit integer,
UNIQUE (sid,authenticated)
)
rowid=8, type=index, name=sqlite_autoindex_session_1, tbl_name=session, rootpage=9, sql=None
...(続きます)
コードは次の通りです。
#!/usr/bin/env python2.6 import bitstring HEADER_OFFSET_PAGE1 = 100 INTKEY = 0x01 ZERO_DATA = 0x02 LEAF_DATA = 0x04 LEAF = 0x08 def get2byte(fp): return fp.read('uint:8') << 8 | fp.read('uint:8') def get4byte(fp): return fp.read('uint:8') << 24 | fp.read('uint:8') << 16 |\ fp.read('uint:8') << 8 | fp.read('uint:8') def get_pagesize(fp): fp.pos = 16*8 return fp.read('uint:8') << 8 | fp.read('uint:8') << 16 def get_cellsize(fp, iTab=0): if iTab == 0: fp.pos = (HEADER_OFFSET_PAGE1 + 3)*8 else: fp.pos = (get_pagesize(fp)*(iTab-1)+3)*8 return get2byte(fp) def get_pagetype(fp, iTab=0): if iTab == 0: fp.pos = (HEADER_OFFSET_PAGE1)*8 else: fp.pos = get_pagesize(fp)*(iTab-1)*8 return fp.read('uint:8') class Record(object): def __init__(self, intKey, type, name, tbl_name, rootpage, sql): self.intKey = intKey self.type = type self.name = name self.tbl_name = tbl_name self.rootpage = rootpage if sql == '': self.sql = None else: self.sql =sql def __repr__(self): return "rowid=%d, type=%s, name=%s, tbl_name=%s, rootpage=%d, sql=%s" \ % (self.intKey, self.type, self.name, self.tbl_name, self.rootpage, self.sql) def find_record(fp, iCell, iTab=0, ptr_size=0): cell_offset = find_cell_offset(fp, iCell, ptr_size, iTab) if iTab == 0: fp.pos = cell_offset*8 else: fp.pos = (get_pagesize(fp)*(iTab-1) + cell_offset)*8 n = 0 offsets = [] nPayload, tn = getVarint(fp) n += tn intKey, tn = getVarint(fp) cell_hdr_offset = n + tn toff, tn = getVarint(fp)# keylength offsets.append(toff) for i in range(5): serial_type, tn = getVarint(fp) n += tn offsets.append(get_fieldsize(serial_type)) if iTab == 0: fp.pos = (cell_offset + cell_hdr_offset + offsets[0])*8 else: fp.pos = (get_pagesize(fp)*(iTab-1) + cell_offset + cell_hdr_offset + offsets[0])*8 type = fp.read('bytes:%d' % offsets[1]) name = fp.read('bytes:%d' % offsets[2]) tbl_name = fp.read('bytes:%d' % offsets[3]) rootpage = fp.read('int:%d' % (offsets[4]*8)) sql = fp.read('bytes:%d' % (offsets[5])) tot = 0 for i in offsets: tot += i assert(tot == nPayload) return Record(intKey, type, name, tbl_name, rootpage, sql) def find_cell_offset(fp, idx, ptr_size, iTab): mask = get_pagesize(fp) - 0x01 if iTab == 0: celloffset = HEADER_OFFSET_PAGE1 + 8 + ptr_size else: celloffset = get_pagesize(fp)*(iTab-1) + 8 + ptr_size fp.pos = (celloffset + idx*2)*8 return mask & get2byte(fp) def getVarint(fp): v = fp.read('uint:8') if not (v & 0x80): return v, 1 p = fp.read('uint:8') if not (p & 0x80): v &= 0x7f v <<= 7 v |= p & 0x7f return v, 2 raise Exception('too long') SIZE = [0,1,2,3,4,6,8,8,0,0,0,0] def get_fieldsize(serial_type): if serial_type >= 12: return (serial_type-12)/2 else: return SIZE[serial_type]; import sys if __name__ == '__main__': if len(sys.argv) != 2: print "usage:%s dabasefile" % sys.argv[0] fp = bitstring.ConstBitStream(filename=sys.argv[1]) if(INTKEY|LEAF_DATA|LEAF == get_pagetype(fp)): for idx in range(get_cellsize(fp)): print find_record(fp, idx) else: assert(INTKEY|LEAF_DATA , get_pagetype(fp)) nCells = [] children = [] for idx in range(get_cellsize(fp)): fp.pos = find_cell_offset(fp, idx, 4, 0)*8 leaf_child = get4byte(fp) children.append(leaf_child) assert(INTKEY|LEAF_DATA|LEAF , get_pagetype(fp, leaf_child)) nCells.append(get_cellsize(fp, leaf_child)) # right-child fp.pos = (HEADER_OFFSET_PAGE1 + 8)*8 most_right_child = get4byte(fp) assert(INTKEY|LEAF_DATA|LEAF , get_pagetype(fp, most_right_child)) nCells.append(get_cellsize(fp, most_right_child)) children.append(most_right_child) for idx, iTab in enumerate(children): for iCell in range(nCells[idx]): print find_record(fp, iCell, iTab)