Completely rewrote the table sampling code...
...to work with multiple tables and cleaned it up significantly in the process.
This commit is contained in:
parent
e32d3dd0d0
commit
b55784c099
1 changed files with 24 additions and 40 deletions
|
@ -23,49 +23,33 @@ def sample(size):
|
|||
conn = MySQLdb.connect(configuration.DB_HOSTNAME,configuration.DB_USERNAME,configuration.DB_PASSWORD,configuration.DB_NAME)
|
||||
curs = conn.cursor()
|
||||
|
||||
#find the maximum game_id in the DB
|
||||
curs.execute("SELECT MAX(game_id) FROM GAMES")
|
||||
curs.execute("SELECT DISTINCT `table` FROM _wsviews")
|
||||
results = curs.fetchall()
|
||||
max_id = results[0][0]
|
||||
|
||||
#look for an existing table with this sample size and drop it if it exists, then create a new one
|
||||
tblname = configuration.DB_TABLE_PREFIX+TBLSTRING+str(size)
|
||||
curs.execute("SELECT count(*) FROM information_schema.tables WHERE table_schema = 'corn' AND table_name = '"+tblname+"'")
|
||||
results = curs.fetchall()
|
||||
exists = results[0][0] == 1
|
||||
if exists:
|
||||
curs.execute("DROP TABLE IF EXISTS "+tblname)
|
||||
curs.execute("""
|
||||
CREATE TABLE `"""+tblname+"""` (
|
||||
`game_id` int(11) NOT NULL auto_increment,
|
||||
`timestamp` datetime NOT NULL,
|
||||
`user_id` char(32) NOT NULL,
|
||||
`serial` char(18) NOT NULL,
|
||||
`platform` char(8) default NULL,
|
||||
`version` char(14) default NULL,
|
||||
`campaign` char(40) default NULL,
|
||||
`difficulty` char(20) default NULL,
|
||||
`gold` int(11) default NULL,
|
||||
`turns` int(11) default NULL,
|
||||
`scenario` char(40) default NULL,
|
||||
`start_turn` int(11) default NULL,
|
||||
`time` int(11) default NULL,
|
||||
`result` enum('victory','defeat','quit') default NULL,
|
||||
`end_time` int(11) default NULL,
|
||||
`end_gold` int(11) default NULL,
|
||||
`end_turn` int(11) default NULL,
|
||||
PRIMARY KEY (`game_id`)
|
||||
) ENGINE=MyISAM AUTO_INCREMENT=2450740 DEFAULT CHARSET=utf8 """)
|
||||
|
||||
#randomly pick size number of entries from the main DB and put them into this sample
|
||||
choices = random.sample(range(1,max_id),size)
|
||||
for c in choices:
|
||||
curs.execute("SELECT * FROM GAMES WHERE `game_id`=%s",c)
|
||||
for tbl in results:
|
||||
tbl = tbl[0]
|
||||
|
||||
sample_tbl = "%s%s%s%d" % (configuration.DB_TABLE_PREFIX,TBLSTRING,tbl,size)
|
||||
curs.execute("DROP TABLE IF EXISTS %s" % (sample_tbl))
|
||||
curs.execute("CREATE TABLE %s LIKE %s" % (sample_tbl,tbl))
|
||||
|
||||
curs.execute("SELECT MAX(game_id) FROM %s" % (tbl,)) #we assume game_id is always the primary key
|
||||
results = curs.fetchall()
|
||||
if len(results) != 0:
|
||||
#print results[0]
|
||||
curs.execute("""INSERT INTO %s (game_id,timestamp,user_id,serial,platform,version,
|
||||
campaign,difficulty,gold,turns,scenario,start_turn,time,result,end_time,end_gold,end_turn) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)""",results[0])
|
||||
max_id = int(results[0][0])
|
||||
|
||||
if max_id < size:
|
||||
size = max_id
|
||||
|
||||
choices = random.sample(range(0,max_id),size)
|
||||
inserts = 0
|
||||
for c in choices:
|
||||
curs.execute("SELECT * FROM %s WHERE game_id=%d" % (tbl,c))
|
||||
results = curs.fetchall()
|
||||
if len(results) != 0:
|
||||
query = "INSERT INTO %s SELECT * FROM %s WHERE game_id=%d" % (sample_tbl,tbl,c)
|
||||
curs.execute(query)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
sample(10000)
|
||||
sample(100000)
|
||||
|
|
Loading…
Add table
Reference in a new issue