sqliteのデータベースファイルを読み込む
前回から大きく変わったところはないですが、sqlite_master以外のテーブルも読み込むように変更しました。以前のようにデータベースファイルだけ指定すると、sqlite_masterのレコードが標準出力されます。
$./select.py test.db
(1, 'table', 'Products', 'Products', 2, 'CREATE TABLE Products(name text, price integer)')
(2, 'table', 'People', 'People', 3, 'CREATE TABLE People(name text, age integer)')
(3, 'table', 'Tbl', 'Tbl', 4, 'CREATE TABLE Tbl(col1 text, col2 text, col3 integer, col4 integer)')
更にテーブル名を指定すると、指定したテーブルのレコードが標準出力されます。
$./select.py test.db Products
(1, 'apple', 40)
(2, 'orange', 30)
select.pyは以下の通りです。
#!/usr/bin/env python2.6 import bitstring HEADER_OFFSET_PAGE1 = 100 #page type INTKEY = 0x01 ZERO_DATA = 0x02 LEAF_DATA = 0x04 LEAF = 0x08 def get2byte(fp): return fp.read('uint:8') << 8 | fp.read('uint:8') def get4byte(fp): return fp.read('uint:8') << 24 | fp.read('uint:8') << 16 |\ fp.read('uint:8') << 8 | fp.read('uint:8') def get_pagesize(fp): fp.pos = 16*8 return fp.read('uint:8') << 8 | fp.read('uint:8') << 16 def set_value(fp, serial_type, values): size = get_fieldsize(serial_type) if serial_type == 0 or serial_type == 10 or serial_type == 11: values.append(None) elif 1 <= serial_type and serial_type <= 6: values.append(fp.read('int:%d' % (size*8))) else: values.append(fp.read('bytes:%d' % size)) return size def get_cellsize(fp, iTab=1): if iTab == 1: fp.pos = (HEADER_OFFSET_PAGE1 + 3)*8 else: fp.pos = (get_pagesize(fp)*(iTab-1)+3)*8 return get2byte(fp) def get_pagetype(fp, iTab=1): if iTab == 1: fp.pos = (HEADER_OFFSET_PAGE1)*8 else: fp.pos = get_pagesize(fp)*(iTab-1)*8 return fp.read('uint:8') def find_record(fp, iCell, iTab=1, ptr_size=0): cell_offset = find_cell_offset(fp, iCell, ptr_size, iTab) fp.pos = (get_pagesize(fp)*(iTab-1) + cell_offset)*8 n = 0 nPayload, tn = getVarint(fp) n += tn rowid, tn = getVarint(fp) cell_hdr_offset = n + tn keyoff, tn = getVarint(fp) stypes = [] tpos = fp.pos pos = (get_pagesize(fp)*(iTab-1) + cell_offset + cell_hdr_offset + keyoff)*8 fp.pos = tpos while fp.pos != pos: serial_type, tn = getVarint(fp) n += tn stypes.append(serial_type) values = [rowid] fsizes = [] for serial_type in stypes: fsizes.append(set_value(fp, serial_type, values)) tot = keyoff for fsize in fsizes: tot += fsize assert(tot == nPayload) return tuple(values) def find_cell_offset(fp, idx, ptr_size, iTab): mask = get_pagesize(fp) - 0x01 if iTab == 1: celloffset = HEADER_OFFSET_PAGE1 + 8 + ptr_size else: celloffset = get_pagesize(fp)*(iTab-1) + 8 + ptr_size fp.pos = (celloffset + idx*2)*8 return mask & get2byte(fp) def getVarint(fp): p = [] v = fp.read('uint:8') if not (v & 0x80): return v, 1 p.append(fp.read('uint:8')) if not (p[0] & 0x80): v &= 0x7f v <<= 7 v |= p[0] return v, 2 p.append(fp.read('uint:8')) if not (p[1] & 0x80): v &= 0x7f v <<= 14 v |= (p[0] & 0x7f) << 7 | p[1] return v, 3 raise Exception('too long') SIZE = [0,1,2,3,4,6,8,8,0,0,0,0] def get_fieldsize(serial_type): if serial_type >= 12: return (serial_type-12)/2 else: return SIZE[serial_type]; TABLES = {'sqlite_master':1} def tables_add(row): if row[1] == 'table': TABLES[row[2]] = row[4] def printf(row): print row def read_db(fp, iTab, funcp): if(INTKEY|LEAF_DATA|LEAF == get_pagetype(fp, iTab)): for idx in range(get_cellsize(fp, iTab)): funcp(find_record(fp, idx, iTab)) else: nCells = [] children = [] for idx in range(get_cellsize(fp, iTab)): fp.pos = (get_pagesize(fp)*(iTab-1)+find_cell_offset(fp, idx, 4, iTab))*8 leaf_child = get4byte(fp) children.append(leaf_child) nCells.append(get_cellsize(fp, leaf_child)) # right-child if iTab == 1: fp.pos = (HEADER_OFFSET_PAGE1 + 8)*8 else: fp.pos = (get_pagesize(fp)*(iTab-1) + 8)*8 right_child = get4byte(fp) nCells.append(get_cellsize(fp, right_child)) children.append(right_child) for idx, iTab in enumerate(children): for iCell in range(nCells[idx]): funcp(find_record(fp, iCell, iTab)) def init_reader(fname): iTab = 1 fp = bitstring.ConstBitStream(filename=fname) read_db(fp, iTab, tables_add) return fp import sys if __name__ == '__main__': argc = len(sys.argv) if argc < 2 or argc > 3: print "usage:%s <dabasefile> <table>?" % sys.argv[0] sys.exit(1) fp = init_reader(sys.argv[1]) iTab = 1 if argc == 3: iTab = TABLES[sys.argv[2]] read_db(fp, iTab, printf)