-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlist2DB.py
61 lines (37 loc) · 1018 Bytes
/
list2DB.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# -*- coding: utf-8 -*-
# list2DB.py
import text2list
import ratesAndDB
import collections
import MySQLdb
'''
# build dictionary for the whole words
csdn = {}
zhihu = {}
buptbbs = {}
ustcbbs = {}
baiduQA = {}
othersites = {}
'''
#get rates of every word from each txt file
def getRatesFromList(filename):
dic = {}
wordlist = text2list.readtxt(filename)
for word in wordlist:
if word not in dic:
dic[word] = 0
dic[word] += 1
return dic
csdn = getRatesFromList("CSDN.txt")
zhihu = getRatesFromList("zhihu.txt")
buptbbs = getRatesFromList("buptbbs.txt")
ustcbbs = getRatesFromList("ustcbbs.txt")
baiduQA = getRatesFromList("baiduQA.txt")
othersites = getRatesFromList("othersites.txt")
#write dictionary to each table in database:postana
ratesAndDB.in2db(csdn,"csdntable")
ratesAndDB.in2db(zhihu,"zhihutable")
ratesAndDB.in2db(buptbbs,"buptbbstable")
ratesAndDB.in2db(ustcbbs,"ustcbbstable")
ratesAndDB.in2db(baiduQA,"baiduQAtable")
ratesAndDB.in2db(othersites,"othersitestable")