-
Notifications
You must be signed in to change notification settings - Fork 0
/
parseAnnotations.py
69 lines (61 loc) · 2.94 KB
/
parseAnnotations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# -*- coding: utf-8 -*-
"""
Created on Fri Jan 17 15:16:28 2014
@author: proto
"""
import pyparsing as pyp
def syntaxDefinition():
commentDemarker = pyp.Suppress(pyp.Literal('###'))
literal = pyp.Word(pyp.alphanums + "_")
quotedLiteral = pyp.Suppress(pyp.Literal("'")) + pyp.Word(pyp.alphanums + "._@ :/") + pyp.Suppress(pyp.Literal("'"))
tags = pyp.Suppress(pyp.Literal('#@')) + literal.setResultsName('tags')
listDefinition = (pyp.Suppress(pyp.Literal('['))
+ pyp.Group(pyp.delimitedList(quotedLiteral.setResultsName('definitionElement'))).setResultsName('definitionList')
+ pyp.Suppress(pyp.Literal(']')))
documentation = pyp.Group(((tags + pyp.Suppress(pyp.Literal(':'))) + (quotedLiteral.setResultsName('definitionElement') | listDefinition))).setResultsName('documentationElement')
bnglDocHeader = (commentDemarker + pyp.ZeroOrMore(documentation) + commentDemarker).setResultsName('bnglDocumentation')
return bnglDocHeader
def bitSyntaxDefinition():
commentDemarker = pyp.Suppress(pyp.Literal('###'))
literal = pyp.Word(pyp.alphanums + "_")
quotedLiteral = pyp.Suppress(pyp.Literal("'")) + pyp.Word(pyp.alphanums + "._@ :/") + pyp.Suppress(pyp.Literal("'"))
tags = pyp.Suppress(pyp.Literal('@')) + literal.setResultsName('tags')
listDefinition = (pyp.Suppress(pyp.Literal('['))
+ pyp.Group(pyp.delimitedList(quotedLiteral.setResultsName('definitionElement'))).setResultsName('definitionList')
+ pyp.Suppress(pyp.Literal(']')))
documentation = pyp.Suppress(commentDemarker) + (((tags + pyp.Suppress(pyp.Literal(':'))) + (quotedLiteral.setResultsName('definitionElement') | listDefinition))).setResultsName('documentationElement')
bitDocumentation = pyp.OneOrMore(documentation)
return bitDocumentation
def dict2DatabaseFormat(tagsDict):
finalDict = {}
finalDict['structuredTags'] = []
finalDict['notes'] = []
for element in tagsDict:
if element == 'creatorName':
finalDict['author'] = tagsDict[element]
elif element != 'creatorEmail':
if not isinstance(tagsDict[element],str) and not isinstance(tagsDict[element],unicode):
finalDict['structuredTags'].extend(tagsDict[element])
else:
finalDict['notes'] = tagsDict[element]
return finalDict
def parseAnnotations(bnglString):
docSyntax = syntaxDefinition()
speciesDocSyntax = bitSyntaxDefinition()
docSyntax = syntaxDefinition()
scans = speciesDocSyntax.searchString(bnglString)
try:
tokens = docSyntax.parseString(bnglString)
except pyp.ParseException:
print 'Parsing Error'
return {}
tagsDict = {}
for element in tokens:
tagsDict[element[0]] = element[1]
return tagsDict
if __name__ == "__main__":
with open('output48.bngl') as f:
lines = f.read()
tagsDict = parseAnnotations(lines)
print tagsDict
print dict2DatabaseFormat(tagsDict)