bitstringを使ってみた


$ sqlite3 test.db
SQLite version 3.7.7.1 2011-06-28 17:39:05
Enter ".help" for instructions
Enter SQL statements terminated with a ";"
sqlite> CREATE TABLE Products(name text, price integer, locale text);
sqlite> CREATE TABLE People(name text, age integer);
sqlite> CREATE TABLE Tbl(one text, two text, three integer, four integer);
sqlite> .exit

のようにして作ったファイルを読み込むスクリプトをbitstringを使って書きました。使い方は

$./bitstring_test.py test.db
type=table, name=Products, tbl_name=Products, rootpage=2, sql=CREATE TABLE Products(name text, price integer, locale text)
type=table, name=People, tbl_name=People, rootpage=3, sql=CREATE TABLE People(name text, age integer)
type=table, name=Tbl, tbl_name=Tbl, rootpage=4, sql=CREATE TABLE Tbl(one text, two text, three integer, four integer)

です。

#!/usr/bin/env python2.6
import bitstring

HEADER_OFFSET_PAGE1 = 100
INTKEY = 0x01
ZERO_DATA = 0x02
LEAF_DATA = 0x04
LEAF = 0x08
def get2byte(fp):
    return fp.read('uint:8') << 8 | fp.read('uint:8')
def get_pagesize(fp):
        fp.pos = 16*8
        return fp.read('uint:8') << 8 | fp.read('uint:8') << 16
def get_cellsize(fp):
        fp.pos = (HEADER_OFFSET_PAGE1 + 3)*8
        return get2byte(fp)
def get_pagetype(fp):
    fp.pos = (HEADER_OFFSET_PAGE1)*8
    return fp.read('uint:8')
class Record(object):
    def __init__(self, type, name, tbl_name, rootpage, sql):
        self.type = type
        self.name = name
        self.tbl_name = tbl_name
        self.rootpage = rootpage
        self.sql =sql
    def __repr__(self):
        return "type=%s, name=%s, tbl_name=%s, rootpage=%d, sql=%s" \
        % (self.type, self.name, self.tbl_name, self.rootpage, self.sql)
def find_record(fp, idx):
    cell_offset = find_cell_offset(fp, idx)
    fp.pos = cell_offset*8
    n = 0
    offsets = []
    nPayload, tn =  getVarint(fp)
    n += tn
    intKey, tn = getVarint(fp)
    assert(idx+1 == intKey)
    n += tn
    cell_hdr_offset = n
    toff, tn = getVarint(fp)
    offsets.append(toff)
    for i in range(5):
        serial_type, tn = getVarint(fp)
        n += tn
        offsets.append(get_fieldsize(serial_type))

    fp.pos = (cell_offset + cell_hdr_offset + offsets[0])*8
    type = fp.read('bytes:%d' % offsets[1])
    name = fp.read('bytes:%d' % offsets[2])
    tbl_name = fp.read('bytes:%d' % offsets[3])
    rootpage = fp.read('int:%d' % (offsets[4]*8))
    sql = fp.read('bytes:%d' % (offsets[5]))
    tot = 0
    for i in offsets:
        tot += i
    assert(tot == nPayload)
    return Record(type, name, tbl_name, rootpage, sql)

def find_cell_offset(fp, idx):
    mask = get_pagesize(fp) - 0x01
    celloffset = (HEADER_OFFSET_PAGE1+8)*8
    fp.pos = celloffset + idx*16
    return mask & get2byte(fp)
def getVarint(fp):
    v = fp.read('uint:8')
    if not (v & 0x80):
        return v, 1
    p = fp.read('uint:8')
    if not (p & 0x80):
        v &= 0x7f
        v <<= 7
        v |= p & 0x7f
        return v, 2
    raise Exception('too long')
SIZE = [0,1,2,3,4,6,8,8,0,0,0,0]
def get_fieldsize(serial_type):
    if serial_type >= 12:
        return (serial_type-12)/2
    else:
        return SIZE[serial_type];
import sys
if __name__ == '__main__':
    if len(sys.argv) != 2:
        print "usage:%s dabasefile" % sys.argv[0]
    fp = bitstring.ConstBitStream(filename=sys.argv[1])
    assert(INTKEY|LEAF_DATA|LEAF == get_pagetype(fp))
    for idx in range(get_cellsize(fp)):
        print find_record(fp, idx)