Skip to content
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 0 additions & 9 deletions Gopkg.lock

This file was deleted.

22 changes: 0 additions & 22 deletions Gopkg.toml

This file was deleted.

21 changes: 21 additions & 0 deletions Thoughts.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
A place to convey my thoughts.

Feb 4th 2019\
DiffParser is mostly "complete" functionally speaking but some of the regex are hard coded and untested.\
Before moving to ASTParser I will do a small check. Then once both AST and Diff are complete, I will do another sweep to refactor the code.\
- I tried allowing users to input a flag they wanted to log information on, but allowing users to manipulate the regex seems to be opening up the program a bit too much, instead the information should be kept within\
- Is there any way I can group the list of "if" statements? \
- I think I have misunderstood what functionCalls is asking for.\
- Commas are optional for regions\
- Replaced [^n]* with .*
- For some reason it catches 196480 in * (0x007d0000-0x00800000) starting at offset 196480 (0x2ff80). as a function call.\

Feb 16th 2019\
DiffParser FunctionCall is still incorrect, but I have moved on to ASTParser.\
ASTParser seems relatively simple, because we are only looking for declared variables.
- Recursive traversal of AST should return a node instead of variable_declaration
- Should variable_declaration be in ast_result? Would a tuple suffice?

Feb 21st 2019\
DiffParser is complete, but functionCall captures "8" as a functional when parsing assembly offset such as 8(%rdi).
ASTParser is complete, not sure if the tree traversal is too hardcoded.
34 changes: 0 additions & 34 deletions astResult.go

This file was deleted.

55 changes: 55 additions & 0 deletions ast_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import json
from ast_result import ASTResult

# *Goal*
# Parse an AST to return all the declared variables in the following format
# {int}{myInt}
# {string}{myInt}
# {Foo}{myFooObject}


class ASTParser:
def __init__(self):
print("ASTParser created")

# Load JSON file into dictionary and parse
def parse(self, file):
y = json.load(file)
ast_res = ASTResult()
root = y['Root']
varNodes = []
traverseSearch(root, 'VariableDeclaration', varNodes) # Returns a list of nodes from the AST that are variables
ast_res.variableDeclarations = nodeToVar(varNodes) # Parses each variable node to a variable tuple and returns a list of tuples
return ast_res

# Converts each variable node to a variable tuple
def nodeToVar(varNodes):
newList = []
for node in varNodes:
isArray = []
traverseSearch(node, 'ArrayCreationExpression', isArray)
varType = findVal(node, 'PredefinedType')
varName = findVal(node, 'VariableDeclarator')
if isArray:
varType += "[]"
newList.append((varType, varName))
return newList

# Recursive traversal of AST to find a node with Type == lookfor
# Appends all nodes that match to resultList which is maintained because python is pass by reference
def traverseSearch(root,lookfor, resultList):
for child in root['Children']:
if child['Type'] == lookfor:
resultList.append(child)
else:
traverseSearch(child, lookfor, resultList)

# Use traverseSearch() to find...
# Variable Name found under node VariableDeclarator
# Variable Type found under node PredefinedType
def findVal(varNode, lookFor):
found = []
traverseSearch(varNode, lookFor, found)
if found:
return found[0]['Children'][0]['ValueText']

8 changes: 8 additions & 0 deletions ast_result.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
class ASTResult:
def __init__(self):
self.variableDeclarations = []

def toText(self):
with open('astResult.txt', 'w') as output:
for variable in self.variableDeclarations:
output.write("{" + variable[0] + "}{" + variable[1] + "}\n")
50 changes: 0 additions & 50 deletions diffResult.go

This file was deleted.

52 changes: 52 additions & 0 deletions diff_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import re
from diff_result import DiffResult

# *Goal*
# Parse a diff files in the most efficient way possible.
# Keep these in mind, speed, maintainability, evolvability, etc....
# Compute the following
# - List of files in the diffs
# - number of regions
# - number of lines added
# - number of lines deleted
# - list of function calls seen in the diffs and their number of calls


class DiffParser:

def __init__(self):
print("DiffParser created")

def parse(self, file):
# Regex Patterns
filelist_rgx = r'^diff --[^\s]* (.*)'
region_rgx = r'^@@ -\d+(,\d+)? \+\d+(,\d+)? @@.*'
added_rgx = r'^(\+).*'
deleted_rgx = r'^(\-).*'
fnList_rgx = r'(?<=(?:\s|\.))([\w]+)(?=\()'

# Object holding results
diff_res = DiffResult()

lines = file.readlines()
# Lines such as
# +++ <filename>
# --- <filename>
# are caught in the regex for added lines, having a "bubble" after a region starts allows us to manually filter those out.
area_start = 0
for line in lines:
if re.search(filelist_rgx, line):
for filepath in re.search(filelist_rgx, line).group(1).split(" "):
diff_res.files.append(filepath)
area_start = 4
if re.search(region_rgx, line):
diff_res.regions += 1
if re.search(added_rgx, line) and area_start < 0:
diff_res.lineAdded += 1
if re.search(deleted_rgx, line) and area_start < 0:
diff_res.lineDeleted += 1
if re.search(fnList_rgx, line):
diff_res.functionCalls[re.search(fnList_rgx, line).group(1)] += 1
area_start -= 1
return diff_res

30 changes: 30 additions & 0 deletions diff_result.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from collections import defaultdict
class DiffResult:
def __init__(self):
self.files = []
self.regions = 0
self.lineAdded = 0
self.lineDeleted = 0
self.functionCalls = defaultdict(int)

def toText(self):
with open('diffResult.txt', 'w') as output:
output.write("Files: \n")
for file in self.files:
output.write(" - ")
output.write(file)
output.write("\n")
output.write("Regions: " + str(self.regions) + "\n")
output.write("Lines Added: " + str(self.lineAdded) + "\n")
output.write("Lines Deleted: " + str(self.lineDeleted) + "\n")
output.write("Function Calls: \n")
for key,value in self.functionCalls.items():
output.write(" - ")
output.write(key + ": " + str(value))
output.write("\n")






Loading