-
Notifications
You must be signed in to change notification settings - Fork 3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Migrate script 1 #28
Open
zhzhzoo
wants to merge
5
commits into
CAPU-ENG:master
Choose a base branch
from
zhzhzoo:migrate-script-1
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Migrate script 1 #28
Changes from 1 commit
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,179 @@ | ||
# -*- coding:utf-8 -*- | ||
import MySQLdb | ||
import config | ||
import uuid | ||
import cStringIO | ||
|
||
''' | ||
Q: 这个脚本都做了什么呀? | ||
A: 它把老的数据库转换成了新的数据库? | ||
|
||
Q: 它都做了哪些转换呀? | ||
A: 它转换了用户,版面,帖子,楼,楼中楼和站内信, | ||
然后链接的转换也是很大的。 | ||
|
||
Q: 用户怎么转的呀? | ||
A: 老的用户表叫 userinfo,新的叫 users。 | ||
没有什么特别要说的,除了我规定 gender 男生是 1 女生是 0。 | ||
|
||
Q: 版面怎么转的呀? | ||
A: 自己看吧~ | ||
|
||
Q: 帖子怎么转的呀? | ||
A: 新 tid 是按老 (bid, tid) 排序后重新编的。 | ||
这里有个坑,就是老的 timestamp(最后更新) 是 unix 时间戳, | ||
我直接用了 from_unixtime 把它转成 MySQL 中的 datetime, | ||
可能会损失精度或者需要考虑时区问题。 | ||
|
||
Q: 楼怎么转的呀? | ||
A: 新 pid 是按老 (bid, tid, pid) 排序后重新编的。 | ||
这里也有坑,坑 1 是两个时间的处理有上一条里说的问题。 | ||
坑 2 是帖子签名档原来存的是“引用”现在直接存内容了, | ||
坑 3 是 parse_type 现在取值 'html' 或 'plain'。 | ||
|
||
Q: 楼中楼怎么转的呀? | ||
A: 无可奉告! | ||
|
||
Q: 站内信怎么转的呀? | ||
A: 发送者不是 system 的认为是站内信。时间方面有前述的坑。 | ||
|
||
Q: 嗯好像大部分问题都问完了,然后就剩你说的很大那个, | ||
链接作了哪些替换? | ||
A: 哦这个呀,我们现在做的就是把以下四种格式的链接换成新链接: | ||
http://www.chexie.net/bbs/content/?bid=<bid>&tid=<tid>[&p=<p>] | ||
http://chexie.net/bbs/content/?bid=<bid>&tid=<tid>[&p=<p>] | ||
http://www.chexie.net/cgi-bin/bbs.pl?see=<see>&b=<b>[&p=<p>] | ||
http://chexie.net/cgi-bin/bbs.pl?see=<see>&b=<b>[&p=<p>] | ||
新链接格式是 /thread/<new_tid>/[page/<p>/] | ||
|
||
Q: 嗯好吧好像没什么问题了,中秋快乐 ^_^ | ||
A: 很惭愧,做了一些微小的工作,谢谢大家 Θ..Θ | ||
''' | ||
|
||
db = MySQLdb.connect(host=config.DB_SERVER, passwd=config.DB_PASSWORD, db=config.DB_NAME, user=config.DB_USERNAME) | ||
|
||
c = db.cursor() | ||
|
||
c.execute("""insert into users(username, password, gender, avatar, intro, sig1, sig2, sig3, hobby, qq, mail, registration_date, last_login_time, num_post, num_reply, num_water, num_sign, current_board, user_agent) select username, password, sex = "男", icon, intro, sig1, sig2, sig3, hobby, qq, mail, regdate, lastdate, post, reply, water, sign, nowboard, logininfo from capubbs.userinfo""") | ||
|
||
c.execute("""insert into boards(bid, name, invisible) select bid, bbstitle, hide from capubbs.boardinfo""") | ||
|
||
c.execute("""alter table capubbs.threads add column new_tid integer""") | ||
c.execute("""update capubbs.threads, (select bid, tid, @cur_tid := if(@cur_tid is null, 1, @cur_tid + 1) as new_tid from capubbs.threads order by bid, tid) as ord set threads.new_tid = ord.new_tid where threads.bid = ord.bid and threads.tid = ord.tid""") | ||
c.execute("""insert into Threads(tid, author_uid, bid, title, replyer_uid, num_click, num_reply, good, sticky, created_at, replied_at) select new_tid, author.uid, bid, title, replier.uid, click, threads.reply, threads.extr, top, postdate, from_unixtime(timestamp) from (capubbs.threads as threads join users as author on threads.author = author.username) left join users as replier on replyer = replier.username""") | ||
|
||
c.execute("""alter table capubbs.posts add column new_pid integer""") | ||
c.execute("""update capubbs.posts, (select bid, tid, pid, @cur_pid := if(@cur_pid is null, 1, @cur_pid + 1) as new_pid from capubbs.posts order by bid, tid, pid) as ord set posts.new_pid = ord.new_pid where posts.bid = ord.bid and posts.tid = ord.tid and posts.pid = ord.pid""") | ||
c.execute("""insert into posts(pid, uid, bid, tid, title, content, created_at, updated_at, signature, ip, parse_type) select new_pid, author.uid, posts.bid, threads.new_tid, posts.title, text, from_unixtime(replytime), from_unixtime(updatetime), case posts.sig when 1 then author.sig1 when 2 then author.sig2 when 3 then author.sig3 end, ip, case ishtml when 'YES' then 'html' else 'plain' end from capubbs.posts join users as author on posts.author = author.username join capubbs.threads on threads.bid = posts.bid and threads.tid = posts.tid""") | ||
|
||
c.execute("""insert into comments(cid, pid, uid, content, time, deleted) select lzl.id, posts.new_pid, users.uid, lzl.text, from_unixtime(time), !lzl.visible from capubbs.lzl join capubbs.posts on lzl.fid = posts.fid join users on lzl.author = users.username""") | ||
|
||
c.execute("""insert into messages(mid, sender_uid, receiver_uid, content, time, is_read, sender_deleted, receiver_deleted) select id, sender.uid, receiver.uid, text, from_unixtime(time), hasread, 0, 0 from capubbs.messages join users as sender on messages.sender = sender.username join users as receiver on messages.receiver = receiver.username where sender != 'system'""") | ||
|
||
#c.execute("""insert into notifications(nid, uid, time, type, pid, is_read) select id, receiver.uid, from_unixtime(time), case messages.text when 'reply' then 1 when 'at' then 2 when 'replylzl' then 3 when 'replylzlreply' then 4 when 'quote' then 5 end, new_pid, hasread from capubbs.messages join users as receiver on messages.receiver = receiver.username join capubbs.posts on messages.rbid = posts.bid and messages.rtid = posts.tid where sender = 'system'""") | ||
|
||
c.close() | ||
|
||
|
||
c2 = db.cursor() | ||
c3 = db.cursor() | ||
|
||
def parse_args(s, pos, begin): | ||
stat = 'k' | ||
d = {} | ||
k = '' | ||
v = '' | ||
while True: | ||
if pos >= len(s): | ||
d[k] = v | ||
break | ||
nxt = s[pos] | ||
pos += 1 | ||
if stat == 'k': | ||
if nxt.isalnum() or nxt == '_': | ||
k += nxt | ||
elif nxt == '=': | ||
stat = 'v' | ||
else: | ||
break | ||
raise KeyError | ||
elif stat == 'v': | ||
if nxt.isalnum() or nxt == '_': | ||
v += nxt | ||
elif nxt == '&': | ||
if s[pos:pos + 4] == 'amp;': | ||
pos += 4 | ||
d[k] = v | ||
stat = 'k' | ||
k = '' | ||
v = '' | ||
else: | ||
d[k] = v | ||
break | ||
return (begin, pos, d) | ||
|
||
def parse_url(s, pattern): | ||
res = [] | ||
pos = s.find(pattern) | ||
if pos == -1: | ||
return None | ||
while pos != -1: | ||
res.append(parse_args(s, pos + len(pattern), pos)) | ||
pos = s.find(pattern, pos + 1) | ||
return res | ||
|
||
tbl_name = "tbl_" + uuid.uuid4().hex | ||
c3.execute("""create table %s (bid integer, tid integer, n integer primary key auto_increment)""" % tbl_name) | ||
|
||
def tihuan(pattern, proc): | ||
c2.execute("""select pid, content from posts where locate('%s', content) > 0 order by pid desc""" % pattern) | ||
res = c2.fetchall() | ||
actions = [] | ||
lookups = [] | ||
for x in res: | ||
ca = (x, parse_url(x[1], pattern)) | ||
if ca[1] == None: | ||
actions.append(None) | ||
else: | ||
ca1 = [] | ||
for y in ca[1]: | ||
p = proc(y[2]) | ||
if p: | ||
lookups.append(p) | ||
ca1.append(y) | ||
actions.append((x, ca1)) | ||
|
||
c3.executemany("""insert into """ + tbl_name + """(bid, tid) values (%s, %s)""", lookups) | ||
c3.execute("""select * from """ + tbl_name) | ||
c3.execute("""select capubbs.threads.new_tid, capubbs.threads.tid, capubbs.threads.bid, n from %s left join capubbs.threads on %s.bid = capubbs.threads.bid and %s.tid = capubbs.threads.tid order by n""" % (tbl_name, tbl_name, tbl_name)) | ||
res = c3.fetchall() | ||
updates = [] | ||
rc = 0 | ||
for x in actions: | ||
last = 0 | ||
o = cStringIO.StringIO() | ||
s = x[0][1] | ||
for a in x[1]: | ||
o.write(s[last:a[0]]) | ||
o.write('/thread/') | ||
o.write(str(res[rc][0])) | ||
o.write('/') | ||
if 'p' in x[1][0][2]: | ||
o.write('page/') | ||
o.write(x[1][0][2]['p']) | ||
o.write('/') | ||
last = a[1] - 1 | ||
o.write(s[last:]) | ||
updates.append((o.getvalue(), x[0][0])) | ||
o.close() | ||
|
||
c3.executemany("""update posts set content = %s where pid = %s""", updates) | ||
|
||
def decode26(s): | ||
return reduce(lambda tot, x: tot * 26 + ord(x) - ord('a'), s, 0) + 1 | ||
|
||
tihuan("""http://www.chexie.net/bbs/content/?""", lambda x: (x['bid'], x['tid']) if ('bid' in x) and ('tid' in x) else None) | ||
tihuan("""http://chexie.net/bbs/content/?""", lambda x: (x['bid'], x['tid']) if ('bid' in x) and ('tid' in x) else None) | ||
tihuan("""http://www.chexie.net/cgi-bin/bbs.pl?""", lambda x: (x['b'], decode26(x['see'])) if ('b' in x) and (x['b'] != '') and ('see' in x) else None) | ||
tihuan("""http://chexie.net/cgi-bin/bbs.pl?""", lambda x: (x['b'], decode26(x['see'])) if ('b' in x) and (x['b'] != '') and ('see' in x) else None) | ||
c3.execute("""drop table %s""" % tbl_name) |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
这些东西写在PR的comment里面啊,不是让你写在代码里:unamused:。
然后把内容整理清楚,不需要用对话模式~