forked from laironald/PythonBase
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathMySQL.py
506 lines (463 loc) · 20.4 KB
/
MySQL.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
def dbAdd(db):
"""
I tire of this code...
"""
return db!=None and "%s." % db or ""
class MySQL:
"""
The following extends sqlite3, a commonly used library for the Patent Team
- remember, in classes "self" refers to the class and is not really a paramater
- table is an optional parameter for many of these functions.
If the variable is not set, it defaults to the table set during initialization
"""
def __init__(self, cfg=None, table="main"):
"""
Initiate the database.
Specify the location of the database and the table.
- Automatically opens the connection
- tbl and table are the same!
"""
self.tbl = table
if cfg==None:
import SQLite
cfg = SQLite.MySQL_cfg(cfg={})
self.cfg = cfg
self.open()
## def optimize(self):
## """
## Some basic optimization stuff w/r http://web.utk.edu/~jplyon/sqlite/SQLite_optimization_FAQ.html
## """
## self.c.executescript("""
## PRAGMA cache_size=2000000;
## PRAGMA synchronous=OFF;
## PRAGMA temp_store=2;
## """)
def getTbl(self, table=None):
if table==None:
return self.tbl
else:
return table
def chgTbl(self, table):
"""
Not too complex. Allows you to change the default table
"""
self.tbl = table
## def commit(self):
## """
## Simply because .conn.commit() is too much work for my simple mind.
## This is often necessary after execute statements.
## Commit the data into the table/database.
## """
## self.conn.commit()
def open(self):
"""
Opens the connection.
"""
import MySQLdb
cfg = self.cfg
self.conn = MySQLdb.connect(host=cfg['host'], user=cfg['user'], passwd=cfg['passwd'], db=cfg['db'])
self.c = self.conn.cursor()
def close(self):
"""
Close the connection. (The database is locked as the sqlite3 API is used)
"""
self.c.close()
self.conn.close()
def fetch(self, table=None, field=None, limit=None, random=False, fetch=True):
"""
This replicates the common function where you return an array of values
associated with a SQLite table
"""
import re
table = self.getTbl(table)
field = (field==None) and "*" or ", ".join(field)
more = "%s %s" % (random and "ORDER BY rand()" or " ",
(limit==None) and " " or ("LIMIT %s" % limit))
query = "SELECT {field} FROM {table} {more}".format(field=field, table=table, more=more)
query = re.sub(" +", " ", query).strip()
if fetch:
self.c.execute(query)
return self.c.fetchall()
else:
self.c.execute(query)
## def attach(self, database, name="db"):
## """
## Attaches a database and defaults it as db.
## """
## try:
## self.c.execute("ATTACH DATABASE '%s' AS %s" % (database, name))
## except:
## self.c.execute("DETACH DATABASE %s" % (name))
## self.c.execute("ATTACH DATABASE '%s' AS %s" % (database, name))
## def detach(self, name="db"):
## """
## Detaches a database (default, db).
## """
## self.c.execute("DETACH DATABASE %s" % (name))
## def vacuum(self, name="db"):
## """
## Vacuum the database.
## (When indices and table are added and dropped, the database stays the same size.)
## """
## self.c.execute("vacuum")
## self.commit()
## def csvInput(self, fname):
## def decode(lst):
## import unicodedata
## try:
## return [x.decode("iso-8859-1") for x in lst]
## except:
## print lst
## return lst
## import csv
## f = open(fname, "rb")
## t = [decode(x) for x in csv.reader(f)]
## f.close()
## return t
def tables(self, lookup=None, db=None):
"""
Returns a list of table names that exist within the database.
If lookup is specified, does that table exist within the list of tables?
"""
db = (db == None and self.cfg['db'] or db)
self.c.execute("SHOW TABLES FROM %s" % db)
retList = [x[0] for x in self.c.fetchall()]
if lookup==None:
return retList
else:
return lookup in retList
def indexes(self, table=None, lookup=None, db=None, search=None):
"""
Returns a list of index names that exist within the database
If lookup is specified, does that index exist within the list of indexes?
If search is specified, returns a list of indices that match the criteria
"""
import re
table = self.getTbl(table)
self.c.execute("SHOW INDEXES FROM %s" % table)
retList = list(set([x[2] for x in self.c.fetchall()]))
if search!=None: #returns a range of numbers related to the search term
nums = [(lambda x:len(x)>0 and int(x[0]) or 1)(re.findall('[0-9]+', x)) for x in retList if x.find(search)>=0]
if len(nums)==0:
return [0, 0]
else:
return [min(nums), max(nums)]
elif lookup!=None:
return lookup.lower() in retList
else:
return retList
def count(self, table=None):
"""
I like these basic reports where I am curious about the size of the default table.
And for kicks- why not throw in the current time.
"""
import datetime
table = self.getTbl(table)
if self.tables(lookup=table):
self.c.execute("SELECT count(*) FROM %s" % table)
cnt = self.c.fetchone()[0]
print datetime.datetime.now(), cnt
return cnt
else:
print datetime.datetime.now()
return 0
def addSQL(self, data, db=None, table=None, header=False, field=True, insert=""):
"""
This serves three functions depending the type of data (flat CSV, pure data, existing table)
If data is a link to a database -- load the data into CSV
- If data = table name, use the data as a base
- If table doesn't exist, replicate ELSE Insert
- Else
- If data = filename (CSV) ... Generate table using quickSQL (header toggle is for this one)
- ElseIf data = data, sounds good...!
- Insert data
Field=True, defaults that field names must match 1-1
"""
import types
table = self.getTbl(table)
## if type(data)==types.StringType and not self.tables(db=db, lookup=table):
## data = self.csvInput(data)
## if insert!="":
## insert = "OR %s" % insert
if type(data)==types.StringType and self.tables(db=db, lookup=data):
if self.tables(db=db, lookup=table):
self.replicate(tableTo=table, table=data, db=db)
if field:
fieldTo = set(self.columns(table=table, output=False))
fieldFr = set(self.columns(table=data, db=db, output=False))
colList = ", ".join(list(fieldTo & fieldFr))
self.c.execute("INSERT %s INTO %s (%s) SELECT %s FROM %s%s" % (insert, table, colList, colList, dbAdd(db), data))
else:
self.c.execute("INSERT %s INTO %s SELECT * FROM %s%s" % (insert, table, dbAdd(db), data))
## #if table exists ... just add data into it
## elif self.tables(db=db, lookup=table):
## self.c.executemany("INSERT %s INTO %s VALUES (%s)" % (insert, table, ", ".join(["?"]*len(data[0]))), data[int(header):])
## else:
## self.quickSQL(data, table=table, header=header)
## #need to make this so the variables are more flexible
## def quickSQL(self, data, table=None, override=False, header=False, typescan=50, typeList=[]):
## import re, types
## table = self.getTbl(table)
## if override:
## self.c.execute("DROP TABLE IF EXISTS %s" % table)
## elif self.tables(db=None, lookup=table):
## return
##
## if header:
## headLst = []
## for x in data[0]:
## headLst.append(re.sub("[()!@#$%^&*'-]+", "", x).replace(" ", "_"))
## if headLst[-1] in headLst[:-1]:
## headLst[-1]+=str(headLst[:-1].count(headLst[-1])+1)
##
## tList = []
## for i,x in enumerate(data[1]):
## if str(typeList).upper().find("%s " % data[0][i].upper())<0:
## cType = {types.StringType:"VARCHAR", types.UnicodeType:"VARCHAR", types.IntType:"INTEGER", types.FloatType: "REAL"}[type(x)]
## if type(typescan)==types.IntType and cType=="VARCHAR":
## least = 2
## ints = 1
## for j in range(1, min(typescan+1, len(data))):
## if type(data[j][i])==types.StringType or type(data[j][i])==types.UnicodeType:
## if re.sub(r"[-,.]", "", data[j][i]).isdigit():
## if len(re.findall(r"[.]", data[j][i]))==0: pass
## elif len(re.findall(r"[.]", data[j][i]))==1: ints = 0
## else: least = 0; break
## else: least = 0; break
## cType = {0:"VARCHAR", 1:"INTEGER", 2:"REAL"}[max(least-ints, 0)]
## if header:
## tList.append("%s %s" % (headLst[i], cType))
## else:
## tList.append("v%d %s" % (i, cType))
## else:
## tList.extend([y for y in typeList if y.upper().find("%s " % data[0][i].upper())==0])
##
## self.c.execute("CREATE TABLE IF NOT EXISTS %s (%s)" % (table, ", ".join(tList)))
## if header==False:
## self.c.executemany("INSERT INTO %s VALUES (%s)" % (table, ", ".join(["?"]*len(data[0]))), data)
## else:
## self.c.executemany("INSERT INTO %s VALUES (%s)" % (table, ", ".join(["?"]*len(data[0]))), data[1:])
## self.conn.commit()
def columns(self, table=None, output=True, db=None, Type=False, lower=True):
table = self.getTbl(table)
self.c.execute("DESCRIBE %s" % table)
if output:
for x in self.c.fetchall():
print x
else:
if Type:
if lower:
return dict([[x[0].lower(), x[1]] for x in self.c.fetchall()])
else:
return dict([x[:2] for x in self.c.fetchall()])
else:
return [x[0] for x in self.c.fetchall()]
def rename(self, fromKey, toKey, table=None):
table = self.getTbl(table)
import types
if type(fromKey)!=types.ListType:
fromKey = [fromKey]
toKey = [toKey]
colType = self.columns(table=table, output=False, Type=True)
for i in range(0, len(fromKey)):
try:
self.c.execute("ALTER TABLE %s CHANGE COLUMN %s %s %s" % (table, fromKey[i], toKey[i], colType[fromKey[i].lower()]))
except:
x=0
def drop(self, keys, table=None):
import types
table = self.getTbl(table)
if type(keys)!=types.ListType:
keys = [keys]
for x in keys:
try:
self.c.execute("ALTER TABLE %s DROP COLUMN %s" % (table, x))
except:
y=0
def add(self, key, typ=None, src=None, table=None):
table = self.getTbl(table)
import types
if src == None:
if type(key)!=types.ListType:
key = [key]
typ = [typ]
for i in range(0, len(key)):
try:
self.c.execute("ALTER TABLE %s ADD COLUMN %s %s" % (table, key[i], typ[i]))
except:
x=0
else:
key = [type(x)==types.StringType and [x,x] or x for x in key]
colType = self.columns(table=src, output=False, Type=True)
for i,x in enumerate(key):
self.add(x[0], typ!=None and typ[i] or colType[x[1].lower()])
## def delete(self, table=None): #delete table
## table = self.getTbl(table)
## self.c.execute("DROP TABLE IF EXISTS %s" % table)
## self.conn.commit()
def replicate(self, tableTo=None, table=None, db=None):
"""
Replicates the basic structure of another table
"""
table = self.getTbl(table)
try:
self.c.execute("CREATE TABLE %s LIKE %s" % (tableTo, table))
except:
y=0
def baseIndex(self, table=None, idx=None, db=None, full=False):
"""
Boils down a Index to its most basic form.
Throw in an idx (string) to process that specific SQL.
full basically returns the full representation of an index
"""
import re
table = self.getTbl(table)
self.c.execute("SHOW INDEXES FROM %s" % table)
idxLst = {}
for x in self.c.fetchall():
if x[2] not in idxLst:
idxLst[x[2]] = [[], []]
idxLst[x[2]][0].append(x[4].lower())
idxLst[x[2]][1] = x[1]==0 and "unique " or ""
if idx==None:
sqls = [["create %sindex %s on %s (%s)" % (x[1][1], x[0], table, ", ".join(x[1][0])),] for x in idxLst.items()]
else:
sqls = [[idx,]]
if full:
return sqls
#simplify the list
idxLst = [re.sub(" +", " ", re.sub(", ", ",", re.sub(re.compile('INDEX .*? ON', re.I), 'INDEX ON', x[0]))).lower() for x in sqls if x[0]!=None]
#reorders the keys (so sequentiality matters!)
idxLst = [re.sub("[(].*?[)]", "(%s)" % ",".join(sorted(re.findall("[(](.*?)[)]", x)[0].split(','))), x) for x in idxLst]
if idx==None:
return idxLst
else:
return idxLst[0]
def index(self, keys, index=None, table=None, db=None, unique=False):
"""
Hey Amy! Look, documentation
Index is for index name
Indicates if Index is created with Index name or None
"""
import re
table = self.getTbl(table)
if index==None:
index = [(lambda x:len(x)>0 and int(x[0]) or 1)(re.findall('[0-9]+', x)) for x in self.indexes(db=db) if x.find('idx_idx')>=0]
index = len(index)==0 and "idx_idx" or "idx_idx%d" % (max(index)+1)
#only create indexes if its necessary! (it doens't already exist)
idxA = self.baseIndex(table=table)
idxSQL = "CREATE %sINDEX %s%s ON %s (%s)" % (unique and "UNIQUE " or "", dbAdd(db), index, table, ",".join(keys))
if self.baseIndex(idx=idxSQL, table=table, db=db) not in idxA:
self.c.execute(idxSQL)
return "%s%s" % (dbAdd(db), index)
else:
return None
#----- MERGE -----#
def merge(self, key, on, tableFrom, keyType=None, table=None):
"""
Matches the on variables from two tables and updates the key values
Example of usage: (its on the table perspective, so that's first)
On and Keys take an iterable with values of string or list:
ie.
key = ["ed", ["eric", "amy"]]
on = ["ron", ["ron1", "amy"]]
keyType = ['VARCHAR', 'VARCHAR'] #if nothing will just be blanks
All together:
.add('ed', 'VARCHAR')
.add('eric', 'VARCHAR')
c.executemany("UPDATE table SET ed=?, eric=? WHERE ron=? AND ron1=?",
c.execute("SELECT b.ed, b.amy, b.ron, b.amy
FROM table AS a INNER JOIN tableFrom AS b
ON a.ron=b.ron AND a.ron1=b.amy").fetchall())
"""
import types, datetime
table = self.getTbl(table)
key = [type(x)==types.StringType and [x,x] or x for x in key]
on = [type(x)==types.StringType and [x,x] or x for x in on]
self.add(key, typ=keyType, src=tableFrom)
## colType = self.columns(table=tableFrom, output=False, Type=True)
## for i,x in enumerate(key):
## self.add(x[0], keyType!=None and keyType[i] or colType[x[1].lower()])
def huggleMe(lst, sep=","):
return ("%s " % sep).join(["a.%s = b.%s" % tuple(x) for x in lst])
idxT = self.index(keys=[x[0] for x in on], table=table)
idxF = self.index(keys=[x[1] for x in on], table=tableFrom)
## print("""
## UPDATE %s a, %s b
## SET %s
## WHERE %s;
## """ % (table, tableFrom, huggleMe(key), huggleMe(on, sep="AND")))
self.c.execute("""
UPDATE %s a, %s b
SET %s
WHERE %s;
""" % (table, tableFrom, huggleMe(key), huggleMe(on, sep="AND")))
if idxT != None:
self.c.execute("DROP INDEX %s ON %s" % (idxT, table))
if idxF != None:
self.c.execute("DROP INDEX %s ON %s" % (idxF, tableFrom))
#----- OUTPUTS -----#
def sqlite_output(self, db=":memory:", table=None, tableFrom=None, sLite=None):
"""
output a new SQLite table
sLite is for an existing SQLite table
"""
import SQLite
table = self.getTbl(table)
if tableFrom==None:
tableFrom = table
if sLite==None:
s = SQLite.SQLite(db=db, table=table)
else:
s = sLite
s.chgTbl(table)
query = "CREATE TABLE {table} ({fields})".format(table=table, fields=", ".join(self.columns(table=table, output=False)))
s.c.execute(query)
for query in self.baseIndex(table=table, full=True):
s.c.execute(query[0])
self.fetch(table=table, fetch=False)
while True:
item = self.c.fetchone()
if not item:
break
s.c.execute("INSERT INTO {table} VALUES ({ques})".format(table=table, ques=", ".join(["?"]*len(item))), item)
s.conn.commit()
return s
## """
## EXPERIMENTAL FUNCTIONS
## """
## # IGRAPH / VISUALIZATION RELATED FUNCTIONS, very very preliminary
##
## def igraph(self, where, table=None,
## vx="Invnum_N", ed="Patent", order="AppYear",
## va=", Lastname||', '||Firstname AS Name, City||'-'||State||'-'||Country AS Loc, Assignee, AsgNum",
## ea=", a.AppYear AS AppYear", eg=', a.AppYear'):
## import math, datetime, senGraph
## table = self.getTbl(table)
## tab = senGraph.senTab()
## self.c.executescript("""
## DROP TABLE IF EXISTS G0;
## DROP TABLE IF EXISTS vx0;
## DROP TABLE IF EXISTS ed0;
## CREATE TEMP TABLE G0 AS
## SELECT * FROM %s WHERE %s ORDER BY %s;
## CREATE INDEX G_id ON G0 (%s);
## CREATE INDEX G_ed ON G0 (%s, %s);
## CREATE TEMPORARY TABLE vx0 AS
## SELECT %s, count(*) AS Patents %s FROM G0
## GROUP BY %s;
## CREATE INDEX vx_id ON vx0 (%s);
## CREATE TEMPORARY TABLE ed0 AS
## SELECT a.%s, b.%s, a.%s AS hId, b.%s AS tId, count(*) AS Weight %s
## FROM G0 AS a INNER JOIN G0 AS b
## ON a.%s=b.%s AND a.%s<b.%s
## GROUP BY a.%s, b.%s %s;
## """ % (table, where, order, ed, vx, ed, vx, va, vx, vx,
## vx, vx, vx, vx, ea, ed, ed, vx, vx, vx, vx, eg))
##
## tab.vList = self.c.execute("SELECT * FROM vx0").fetchall()
## tab.vlst = self.columns(table="vx0", output=False)[1:]
## tab.eList = self.c.execute("SELECT * FROM ed0").fetchall()
## tab.elst = self.columns(table="ed0", output=False)[2:]
## s = senGraph.senGraph(tab, "vertex")
## return s