-
Notifications
You must be signed in to change notification settings - Fork 6
/
so-import
executable file
·36 lines (24 loc) · 947 Bytes
/
so-import
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/usr/bin/env python
import xml.parsers.expat
import pymongo
from pymongo import Connection
# Each of our data files which will be used as collection names also.
files = [ "badges", "comments", "posts", "users", "votes" ]
connection = Connection()
# Name our database, can change this for Server Fault/etc..
db = connection['stackoverflow']
for file in files:
print "init import: %s.xml" % file
collection = db[file]
def start_element(name, attrs):
# Use SO entry id instead of autogenerated mongo one
if u'Id' in attrs:
attrs['_id'] = attrs[u'Id']
del attrs[u'Id']
# Attributes are all ready dict's of the right format, how convenient!
collection.insert( attrs )
parser = xml.parsers.expat.ParserCreate()
parser.StartElementHandler = start_element
parser.ParseFile( open( file + ".xml", "r") )
print "imported %s %s" % ( collection.count() , files )
print "finished import: %s.xml" % file