Hide
Bored? Check out the Recent Activity on Siafoo Join Siafoo Now or Learn More

Simple PostgreSQL index test code Atom Feed 0

In Brief This is a somewhat specialized script for testing the performance of various index configurations in a PostgreSQL database. It was written for the GeoIP data loading article (See Loading the GeoIP City database into PostgreSQL) but can probably be generalized or customized to test index configurations for some other database.... more
# 's
  1import datetime, csv, pickle, psycopg2, psycopg2.extensions, sys, time
2
3from subprocess import check_call
4from load_geoip import GeoIPDataLoader
5from numpy import *
6from pylab import *
7
8MAIN_DB = "dbname='sample_db' user='geoip' host='localhost'"
9
10# Load the random start_ip data
11random_starts = [int(b) for b in open('random_ips.txt').read().split('\n') if b.isdigit()]
12
13index_data = [
14 {'name':'No index',
15 'ix_name':'no indexes',
16 'load_time':0,
17 'index_time':0,
18 'test_time':0,
19 'index_size':0
20 },
21 {'name':'Start_IP Index',
22 'ix_name':'start',
23 'ddl':''' CREATE INDEX ix_start_ip ON blocks USING btree (start_ip) WITH (FILLFACTOR=100); ''',
24 },
25 {'name':'End_IP Index',
26 'ix_name':'end',
27 'ddl':''' CREATE INDEX ix_end_ip ON blocks USING btree (end_ip) WITH (FILLFACTOR=100); ''',
28 },
29 {'name':'Start-End Index',
30 'ix_name':'start_end',
31 'ddl':''' CREATE INDEX ix_start_end_ip ON blocks USING btree (start_ip, end_ip) WITH (FILLFACTOR=100); ''',
32 },
33 {'name':'End-Start Index',
34 'ix_name':'end_start',
35 'ddl':''' CREATE INDEX ix_end_start_ip ON blocks USING btree (end_ip, start_ip) WITH (FILLFACTOR=100); ''',
36 },
37 {'name':'All Indexes',
38 'ix_name':'all indexes',
39 'ddl':'''
40 CREATE INDEX ix_start_ip ON blocks USING btree (start_ip) WITH (FILLFACTOR=100);
41 CREATE INDEX ix_end_ip ON blocks USING btree (end_ip) WITH (FILLFACTOR=100);
42 CREATE INDEX ix_start_end_ip ON blocks USING btree (start_ip, end_ip) WITH (FILLFACTOR=100);
43 CREATE INDEX ix_end_start_ip ON blocks USING btree (end_ip, start_ip) WITH (FILLFACTOR=100);
44 '''
45 },
46 {'name':'Start and End Indexes',
47 'ix_name':'start+end',
48 'ddl':'''
49 CREATE INDEX ix_start_ip ON blocks USING btree (start_ip) WITH (FILLFACTOR=100);
50 CREATE INDEX ix_end_ip ON blocks USING btree (end_ip) WITH (FILLFACTOR=100);
51 '''
52 },
53 {'name':'Start-End and End-Start Indexes',
54 'ix_name':'start_end+end_start',
55 'ddl':'''
56 CREATE INDEX ix_start_end_ip ON blocks USING btree (start_ip, end_ip) WITH (FILLFACTOR=100);
57 CREATE INDEX ix_end_start_ip ON blocks USING btree (end_ip, start_ip) WITH (FILLFACTOR=100);
58 '''
59 },
60 {'name':'Start and End-Start Indexes',
61 'ix_name':'start+end_start',
62 'ddl':'''
63 CREATE INDEX ix_start_ip ON blocks USING btree (start_ip) WITH (FILLFACTOR=100);
64 CREATE INDEX ix_end_start_ip ON blocks USING btree (end_ip, start_ip) WITH (FILLFACTOR=100);
65 '''
66 },
67 {'name':'Start-End and End Indexes',
68 'ix_name':'start_end+end',
69 'ddl':'''
70 CREATE INDEX ix_start_end_ip ON blocks USING btree (start_ip, end_ip) WITH (FILLFACTOR=100);
71 CREATE INDEX ix_end_ip ON blocks USING btree (end_ip) WITH (FILLFACTOR=100);
72 '''
73 },
74 {'name':'Start and Start-End Indexes',
75 'ix_name':'start+start_end',
76 'ddl':'''
77 CREATE INDEX ix_start_ip ON blocks USING btree (start_ip) WITH (FILLFACTOR=100);
78 CREATE INDEX ix_start_end_ip ON blocks USING btree (start_ip, end_ip) WITH (FILLFACTOR=100);
79
80 '''
81 },
82 {'name':'End and End-Start Indexes',
83 'ix_name':'end+end_start',
84 'ddl':'''
85 CREATE INDEX ix_end_ip ON blocks USING btree (end_ip) WITH (FILLFACTOR=100);
86 CREATE INDEX ix_end_start_ip ON blocks USING btree (end_ip, start_ip) WITH (FILLFACTOR=100);
87 '''
88 },
89
90 {'name':'Start and End and Start-End',
91 'ix_name':'start_end+start_end',
92 'ddl':'''
93 CREATE INDEX ix_start_ip ON blocks USING btree (start_ip) WITH (FILLFACTOR=100);
94 CREATE INDEX ix_end_ip ON blocks USING btree (end_ip) WITH (FILLFACTOR=100);
95 CREATE INDEX ix_start_end_ip ON blocks USING btree (start_ip, end_ip) WITH (FILLFACTOR=100);
96 '''
97 },
98 {'name':'Start and End and End-Start',
99 'ix_name':'start+end+end_start',
100 'ddl':'''
101 CREATE INDEX ix_start_ip ON blocks USING btree (start_ip) WITH (FILLFACTOR=100);
102 CREATE INDEX ix_end_ip ON blocks USING btree (end_ip) WITH (FILLFACTOR=100);
103 CREATE INDEX ix_end_start_ip ON blocks USING btree (end_ip, start_ip) WITH (FILLFACTOR=100);
104 '''
105 },
106 {'name':'End and Start-End and End-Start',
107 'ix_name':'end+start_end+end_start',
108 'ddl':'''
109 CREATE INDEX ix_end_ip ON blocks USING btree (end_ip) WITH (FILLFACTOR=100);
110 CREATE INDEX ix_start_end_ip ON blocks USING btree (start_ip, end_ip) WITH (FILLFACTOR=100);
111 CREATE INDEX ix_end_start_ip ON blocks USING btree (end_ip, start_ip) WITH (FILLFACTOR=100);
112 '''
113 },
114
115]
116
117
118def drop_create(db_name):
119
120 # Connect to the main database so we can drop/create stuff
121 con = psycopg2.connect(MAIN_DB)
122 con.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
123 cur = con.cursor()
124
125 # Drop database
126 try:
127 cur.execute('DROP DATABASE %s;' % db_name)
128 except:
129 pass
130
131 # Create a database
132 cur.execute('''CREATE DATABASE %s WITH OWNER = geoip ENCODING = 'UTF8';''' % db_name)
133 # Close main database connection
134 con.close()
135
136def init_tests(out_file, data_file):
137
138 for index_def in index_data:
139# db_name = 'geoip_db_%s' % index_def['ix_name']
140 db_name = 'geoip_db_test'
141
142 test_db_dsn = "dbname='%s' user='geoip' host='localhost'" % db_name
143
144 drop_create(db_name)
145
146 print u'Testing \033[1;31m%s\033[1;m' % index_def['name']
147
148 # Load the data
149 loader = GeoIPDataLoader(test_db_dsn)
150
151 loader.create_tables()
152 start_time = time.time()
153 loader.load_data()
154 index_def['load_time'] = (time.time() - start_time)
155
156 # Create an index
157 if index_def.has_key('ddl'):
158# print 'Indexing'
159 start_time = time.time()
160 loader.db_execute(index_def['ddl'])
161 index_def['index_time'] = (time.time() - start_time)
162
163 # Vacuum and analyze
164 loader.vacuum()
165 # Close the connection
166 loader.close()
167
168 # Restart Server
169 retcode = check_call(['/etc/init.d/postgresql', 'restart'])
170 print 'Running Test'
171 # Run test code
172 index_def['test_time'], index_def['all_times'] = run_test(test_db_dsn)
173 # Record results
174 out_file.write('%(name)s\t%(load_time)f\t%(index_time)f\t%(test_time)f\n' % index_def)
175 print '\n'
176
177 pickle.dump(index_data, data_file)
178
179def run_test(db_name):
180
181 con = psycopg2.connect(db_name)
182 cur = con.cursor()
183
184 time_array = zeros(len(random_starts), dtype=float)
185
186 for ix, start in enumerate(random_starts):
187 start_sub_time = time.time()
188 cur.execute('SELECT location_id FROM blocks WHERE %i BETWEEN start_ip AND end_ip' % start)
189 cur.fetchall()
190 time_array[ix] = time.time() - start_sub_time
191
192 con.close()
193
194 return time_array.sum(), time_array
195
196def main(argv):
197
198 out_file = open('results_new.txt', 'wb')
199 out_file.write('Name\tLoad Time\t Indexing Time\t Test Time\n')
200
201 data_file = open('data_file.dat', 'wb')
202
203 #TODO: try / catch and drop the database
204 init_tests(out_file, data_file)
205
206if __name__ == "__main__":
207 main(sys.argv[1:])

This is a somewhat specialized script for testing the performance of various index configurations in a PostgreSQL database. It was written for the GeoIP data loading article (See Loading the GeoIP City database into PostgreSQL) but can probably be generalized or customized to test index configurations for some other database.

See Utility for loading GeoIP data into a PostgreSQL database for the data loading code.

A graph can be generated from the data using the following SciPy code:

# 's
 1import pickle
2from pylab import *
3from scipy import stats
4
5data = pickle.load(open('data_file_full_100.dat', 'r'))
6
7d_stderr = [round(stats.stderr(d['all_times']), 3) for d in data]
8d_mean = [round(stats.mean(d['all_times']), 3) for d in data]
9labels = [d['ix_name'] for d in data]
10
11locations = array(range(len(d_mean))) + 0.5
12barh(locations, d_mean, xerr=d_stderr, height=0.5)
13yticks(locations+0.25, labels)