読者です 読者をやめる 読者になる 読者になる

sqlite_masterの中身を読み込む(2)

前回のものはページタイプ(INTKEY|LEAF_DATA|LEAF)のときのみ読み込めるスクリプトでしたが、ページタイプが(INTKEY|LEAF_DATA)のときも読み込めるように変更しました。例えばtracのデータベースファイルを読み込んでみると、次のように表示されます。


rowid=1, type=table, name=system, tbl_name=system, rootpage=2, sql=CREATE TABLE system (
name text PRIMARY KEY,
value text
)
rowid=2, type=index, name=sqlite_autoindex_system_1, tbl_name=system, rootpage=3, sql=None
rowid=3, type=table, name=permission, tbl_name=permission, rootpage=4, sql=CREATE TABLE permission (
username text,
action text,
UNIQUE (username,action)
)
rowid=4, type=index, name=sqlite_autoindex_permission_1, tbl_name=permission, rootpage=5, sql=None
rowid=5, type=table, name=auth_cookie, tbl_name=auth_cookie, rootpage=6, sql=CREATE TABLE auth_cookie (
cookie text,
name text,
ipnr text,
time integer,
UNIQUE (cookie,ipnr,name)
)
rowid=6, type=index, name=sqlite_autoindex_auth_cookie_1, tbl_name=auth_cookie, rootpage=7, sql=None
rowid=7, type=table, name=session, tbl_name=session, rootpage=8, sql=CREATE TABLE session (
sid text,
authenticated integer,
last_visit integer,
UNIQUE (sid,authenticated)
)
rowid=8, type=index, name=sqlite_autoindex_session_1, tbl_name=session, rootpage=9, sql=None
...(続きます)

コードは次の通りです。

#!/usr/bin/env python2.6
import bitstring

HEADER_OFFSET_PAGE1 = 100
INTKEY = 0x01
ZERO_DATA = 0x02
LEAF_DATA = 0x04
LEAF = 0x08
def get2byte(fp):
    return fp.read('uint:8') << 8 | fp.read('uint:8')
def get4byte(fp):
    return fp.read('uint:8') << 24 | fp.read('uint:8') << 16 |\
      fp.read('uint:8') << 8 | fp.read('uint:8')
def get_pagesize(fp):
        fp.pos = 16*8
        return fp.read('uint:8') << 8 | fp.read('uint:8') << 16
def get_cellsize(fp, iTab=0):
    if iTab == 0:
        fp.pos = (HEADER_OFFSET_PAGE1 + 3)*8
    else:
        fp.pos = (get_pagesize(fp)*(iTab-1)+3)*8
    return get2byte(fp)
def get_pagetype(fp, iTab=0):
    if iTab == 0:
        fp.pos = (HEADER_OFFSET_PAGE1)*8
    else:
        fp.pos = get_pagesize(fp)*(iTab-1)*8
    return fp.read('uint:8')
class Record(object):
    def __init__(self, intKey, type, name, tbl_name, rootpage, sql):
        self.intKey = intKey
        self.type = type
        self.name = name
        self.tbl_name = tbl_name
        self.rootpage = rootpage
        if sql == '':
            self.sql = None
        else:
            self.sql =sql
    def __repr__(self):
        return "rowid=%d, type=%s, name=%s, tbl_name=%s, rootpage=%d, sql=%s" \
        % (self.intKey, self.type, self.name, self.tbl_name, self.rootpage, self.sql)
def find_record(fp, iCell, iTab=0, ptr_size=0):
    cell_offset = find_cell_offset(fp, iCell, ptr_size, iTab)
    if iTab == 0:
        fp.pos = cell_offset*8
    else:
        fp.pos = (get_pagesize(fp)*(iTab-1) + cell_offset)*8
    n = 0
    offsets = []
    nPayload, tn =  getVarint(fp)
    n += tn
    intKey, tn = getVarint(fp)
    cell_hdr_offset = n + tn
    toff, tn = getVarint(fp)# keylength
    offsets.append(toff)
    for i in range(5):
        serial_type, tn = getVarint(fp)
        n += tn
        offsets.append(get_fieldsize(serial_type))
    if iTab == 0:
        fp.pos = (cell_offset + cell_hdr_offset + offsets[0])*8
    else:
        fp.pos = (get_pagesize(fp)*(iTab-1) + cell_offset + cell_hdr_offset + offsets[0])*8
    type = fp.read('bytes:%d' % offsets[1])
    name = fp.read('bytes:%d' % offsets[2])
    tbl_name = fp.read('bytes:%d' % offsets[3])
    rootpage = fp.read('int:%d' % (offsets[4]*8))
    sql = fp.read('bytes:%d' % (offsets[5]))
    tot = 0
    for i in offsets:
        tot += i
    assert(tot == nPayload)
    return Record(intKey, type, name, tbl_name, rootpage, sql)
def find_cell_offset(fp, idx, ptr_size, iTab):
    mask = get_pagesize(fp) - 0x01
    if iTab == 0:
        celloffset = HEADER_OFFSET_PAGE1 + 8 + ptr_size
    else:
        celloffset = get_pagesize(fp)*(iTab-1) + 8 + ptr_size
    fp.pos = (celloffset + idx*2)*8
    return mask & get2byte(fp)
def getVarint(fp):
    v = fp.read('uint:8')
    if not (v & 0x80):
        return v, 1
    p = fp.read('uint:8')
    if not (p & 0x80):
        v &= 0x7f
        v <<= 7
        v |= p & 0x7f
        return v, 2
    raise Exception('too long')
SIZE = [0,1,2,3,4,6,8,8,0,0,0,0]
def get_fieldsize(serial_type):
    if serial_type >= 12:
        return (serial_type-12)/2
    else:
        return SIZE[serial_type];
import sys
if __name__ == '__main__':
    if len(sys.argv) != 2:
        print "usage:%s dabasefile" % sys.argv[0]
    fp = bitstring.ConstBitStream(filename=sys.argv[1])
    if(INTKEY|LEAF_DATA|LEAF == get_pagetype(fp)):
        for idx in range(get_cellsize(fp)):
            print find_record(fp, idx)
    else:
        assert(INTKEY|LEAF_DATA , get_pagetype(fp))
        nCells = []
        children = []
        for idx in range(get_cellsize(fp)):
            fp.pos = find_cell_offset(fp, idx, 4, 0)*8
            leaf_child = get4byte(fp)
            children.append(leaf_child)
            assert(INTKEY|LEAF_DATA|LEAF , get_pagetype(fp, leaf_child))
            nCells.append(get_cellsize(fp, leaf_child))
        # right-child
        fp.pos = (HEADER_OFFSET_PAGE1 + 8)*8
        most_right_child = get4byte(fp)
        assert(INTKEY|LEAF_DATA|LEAF , get_pagetype(fp, most_right_child))
        nCells.append(get_cellsize(fp, most_right_child))
        children.append(most_right_child)
        for idx, iTab in enumerate(children):
            for iCell in range(nCells[idx]):
                print find_record(fp, iCell, iTab)