読者です 読者をやめる 読者になる 読者になる

sqliteのデータベースファイルを読み込む

前回から大きく変わったところはないですが、sqlite_master以外のテーブルも読み込むように変更しました。以前のようにデータベースファイルだけ指定すると、sqlite_masterのレコードが標準出力されます。


$./select.py test.db
(1, 'table', 'Products', 'Products', 2, 'CREATE TABLE Products(name text, price integer)')
(2, 'table', 'People', 'People', 3, 'CREATE TABLE People(name text, age integer)')
(3, 'table', 'Tbl', 'Tbl', 4, 'CREATE TABLE Tbl(col1 text, col2 text, col3 integer, col4 integer)')

更にテーブル名を指定すると、指定したテーブルのレコードが標準出力されます。

$./select.py test.db Products
(1, 'apple', 40)
(2, 'orange', 30)

select.pyは以下の通りです。

#!/usr/bin/env python2.6
import bitstring
HEADER_OFFSET_PAGE1 = 100
#page type
INTKEY = 0x01
ZERO_DATA = 0x02
LEAF_DATA = 0x04
LEAF = 0x08
def get2byte(fp):
    return fp.read('uint:8') << 8 | fp.read('uint:8')
def get4byte(fp):
    return fp.read('uint:8') << 24 | fp.read('uint:8') << 16 |\
      fp.read('uint:8') << 8 | fp.read('uint:8')
def get_pagesize(fp):
    fp.pos = 16*8
    return fp.read('uint:8') << 8 | fp.read('uint:8') << 16
def set_value(fp, serial_type, values):
    size = get_fieldsize(serial_type)
    if serial_type == 0 or serial_type == 10 or serial_type == 11:
        values.append(None)
    elif 1 <= serial_type and serial_type <= 6:
        values.append(fp.read('int:%d' % (size*8)))
    else:
        values.append(fp.read('bytes:%d' % size))
    return size
def get_cellsize(fp, iTab=1):
    if iTab == 1:
        fp.pos = (HEADER_OFFSET_PAGE1 + 3)*8
    else:
        fp.pos = (get_pagesize(fp)*(iTab-1)+3)*8
    return get2byte(fp)
def get_pagetype(fp, iTab=1):
    if iTab == 1:
        fp.pos = (HEADER_OFFSET_PAGE1)*8
    else:
        fp.pos = get_pagesize(fp)*(iTab-1)*8
    return fp.read('uint:8')
def find_record(fp, iCell, iTab=1, ptr_size=0):
    cell_offset = find_cell_offset(fp, iCell, ptr_size, iTab)
    fp.pos = (get_pagesize(fp)*(iTab-1) + cell_offset)*8
    n = 0
    nPayload, tn =  getVarint(fp)
    n += tn
    rowid, tn = getVarint(fp)
    cell_hdr_offset = n + tn
    keyoff, tn = getVarint(fp)
    stypes = []
    tpos = fp.pos
    pos = (get_pagesize(fp)*(iTab-1) + cell_offset + cell_hdr_offset + keyoff)*8
    fp.pos = tpos
    while fp.pos != pos:
        serial_type, tn = getVarint(fp)
        n += tn
        stypes.append(serial_type)
    values = [rowid]
    fsizes = []
    for serial_type in stypes:
        fsizes.append(set_value(fp, serial_type, values))
    tot = keyoff
    for fsize in fsizes:
        tot += fsize
    assert(tot == nPayload)
    return tuple(values)
def find_cell_offset(fp, idx, ptr_size, iTab):
    mask = get_pagesize(fp) - 0x01
    if iTab == 1:
        celloffset = HEADER_OFFSET_PAGE1 + 8 + ptr_size
    else:
        celloffset = get_pagesize(fp)*(iTab-1) + 8 + ptr_size
    fp.pos = (celloffset + idx*2)*8
    return mask & get2byte(fp)
def getVarint(fp):
    p = []
    v = fp.read('uint:8')
    if not (v & 0x80):
        return v, 1
    p.append(fp.read('uint:8'))
    if not (p[0] & 0x80):
        v &= 0x7f
        v <<= 7
        v |= p[0]
        return v, 2
    p.append(fp.read('uint:8'))
    if not (p[1] & 0x80):
        v &= 0x7f
        v <<= 14
        v |= (p[0] & 0x7f) << 7 | p[1]
        return v, 3        
    raise Exception('too long')
SIZE = [0,1,2,3,4,6,8,8,0,0,0,0]
def get_fieldsize(serial_type):
    if serial_type >= 12:
        return (serial_type-12)/2
    else:
        return SIZE[serial_type];
TABLES = {'sqlite_master':1}
def tables_add(row):
    if row[1] == 'table':
        TABLES[row[2]] = row[4]
def printf(row):
    print row
def read_db(fp, iTab, funcp):
    if(INTKEY|LEAF_DATA|LEAF == get_pagetype(fp, iTab)):        
        for idx in range(get_cellsize(fp, iTab)):
            funcp(find_record(fp, idx, iTab))
    else:
        nCells = []
        children = []
        for idx in range(get_cellsize(fp, iTab)):
            fp.pos = (get_pagesize(fp)*(iTab-1)+find_cell_offset(fp, idx, 4, iTab))*8
            leaf_child = get4byte(fp)
            children.append(leaf_child)
            nCells.append(get_cellsize(fp, leaf_child))
        # right-child
        if iTab == 1:
            fp.pos = (HEADER_OFFSET_PAGE1 + 8)*8
        else:
            fp.pos = (get_pagesize(fp)*(iTab-1) + 8)*8
        right_child = get4byte(fp)
        nCells.append(get_cellsize(fp, right_child))
        children.append(right_child)
        for idx, iTab in enumerate(children):
            for iCell in range(nCells[idx]):
                funcp(find_record(fp, iCell, iTab)) 
def init_reader(fname):
    iTab = 1
    fp = bitstring.ConstBitStream(filename=fname)
    read_db(fp, iTab, tables_add)
    return fp
import sys
if __name__ == '__main__':
    argc = len(sys.argv)
    if argc < 2 or argc > 3:
        print "usage:%s <dabasefile> <table>?" % sys.argv[0]
        sys.exit(1)
    fp = init_reader(sys.argv[1])
    iTab = 1
    if argc == 3:
        iTab = TABLES[sys.argv[2]]
    read_db(fp, iTab, printf)