-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 50d0054
Showing
2,548 changed files
with
3,852,196 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,238 @@ | ||
## R | ||
|
||
# History files | ||
.Rhistory | ||
.Rapp.history | ||
|
||
# Session Data files | ||
.RData | ||
|
||
# User-specific files | ||
.Ruserdata | ||
|
||
# Example code in package build process | ||
*-Ex.R | ||
|
||
# Output files from R CMD build | ||
/*.tar.gz | ||
|
||
# Output files from R CMD check | ||
/*.Rcheck/ | ||
|
||
# RStudio files | ||
.Rproj.user/ | ||
|
||
# produced vignettes | ||
vignettes/*.html | ||
vignettes/*.pdf | ||
|
||
# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 | ||
.httr-oauth | ||
|
||
# knitr and R markdown default cache directories | ||
*_cache/ | ||
/cache/ | ||
|
||
# Temporary files created by R markdown | ||
*.utf8.md | ||
*.knit.md | ||
|
||
# R Environment Variables | ||
.Renviron | ||
|
||
# pkgdown site | ||
docs/ | ||
|
||
# translation temp files | ||
po/*~ | ||
|
||
|
||
## Python | ||
|
||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
*$py.class | ||
|
||
# C extensions | ||
*.so | ||
|
||
# Distribution / packaging | ||
.Python | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
wheels/ | ||
share/python-wheels/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
MANIFEST | ||
|
||
# PyInstaller | ||
# Usually these files are written by a python script from a template | ||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | ||
*.manifest | ||
*.spec | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
|
||
# Unit test / coverage reports | ||
htmlcov/ | ||
.tox/ | ||
.nox/ | ||
.coverage | ||
.coverage.* | ||
.cache | ||
nosetests.xml | ||
coverage.xml | ||
*.cover | ||
*.py,cover | ||
.hypothesis/ | ||
.pytest_cache/ | ||
cover/ | ||
|
||
# Translations | ||
*.mo | ||
*.pot | ||
|
||
# Django stuff: | ||
*.log | ||
local_settings.py | ||
db.sqlite3 | ||
db.sqlite3-journal | ||
|
||
# Flask stuff: | ||
instance/ | ||
.webassets-cache | ||
|
||
# Scrapy stuff: | ||
.scrapy | ||
|
||
# Sphinx documentation | ||
docs/_build/ | ||
|
||
# PyBuilder | ||
.pybuilder/ | ||
target/ | ||
|
||
# Jupyter Notebook | ||
.ipynb_checkpoints | ||
|
||
# IPython | ||
profile_default/ | ||
ipython_config.py | ||
|
||
# pyenv | ||
# For a library or package, you might want to ignore these files since the code is | ||
# intended to run in multiple environments; otherwise, check them in: | ||
# .python-version | ||
|
||
# pipenv | ||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. | ||
# However, in case of collaboration, if having platform-specific dependencies or dependencies | ||
# having no cross-platform support, pipenv may install dependencies that don't work, or not | ||
# install all needed dependencies. | ||
#Pipfile.lock | ||
|
||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow | ||
__pypackages__/ | ||
|
||
# Celery stuff | ||
celerybeat-schedule | ||
celerybeat.pid | ||
|
||
# SageMath parsed files | ||
*.sage.py | ||
|
||
# Environments | ||
.env | ||
.venv | ||
env/ | ||
venv/ | ||
ENV/ | ||
env.bak/ | ||
venv.bak/ | ||
|
||
# Spyder project settings | ||
.spyderproject | ||
.spyproject | ||
|
||
# Rope project settings | ||
.ropeproject | ||
|
||
# mkdocs documentation | ||
/site | ||
|
||
# mypy | ||
.mypy_cache/ | ||
.dmypy.json | ||
dmypy.json | ||
|
||
# Pyre type checker | ||
.pyre/ | ||
|
||
# pytype static type analyzer | ||
.pytype/ | ||
|
||
# Cython debug symbols | ||
cython_debug/ | ||
|
||
|
||
## Go | ||
|
||
# Binaries for programs and plugins | ||
*.exe | ||
*.exe~ | ||
*.dll | ||
*.so | ||
*.dylib | ||
|
||
# Test binary, built with `go test -c` | ||
*.test | ||
|
||
# Output of the go coverage tool, specifically when used with LiteIDE | ||
*.out | ||
|
||
# Dependency directories (remove the comment below to include it) | ||
vendor/ | ||
|
||
|
||
## macOS | ||
|
||
# General | ||
.DS_Store | ||
.AppleDouble | ||
.LSOverride | ||
|
||
# Icon must end with two \r | ||
Icon | ||
|
||
# Thumbnails | ||
._* | ||
|
||
# Files that might appear in the root of a volume | ||
.DocumentRevisions-V100 | ||
.fseventsd | ||
.Spotlight-V100 | ||
.TemporaryItems | ||
.Trashes | ||
.VolumeIcon.icns | ||
.com.apple.timemachine.donotpresent | ||
|
||
# Directories potentially created on remote AFP share | ||
.AppleDB | ||
.AppleDesktop | ||
Network Trash Folder | ||
Temporary Items | ||
.apdisk |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
# Predicting Unstable Software Benchmarks Using Static Source Code Features | ||
|
||
## Replication package | ||
|
||
This replication package can be used to interactively replicate the study we performed | ||
in our paper with the title *Predicting Unstable Software Benchmarks Using Static Source Code Features*. | ||
It is organized as follows. | ||
|
||
### Approach | ||
[`approach/`](approach/) contains all data and scripts of our approach, including feature extraction and combination, variability computation, and generating the resulting files for the machine learning part in [`study/`](study/). | ||
The approach's [README](approach/README.md) contains detailed information. | ||
|
||
|
||
### Study | ||
|
||
[`study/`](study/) contains the majority of the data and scripts to completely reproduce the study we conducted to evaluate our approach. | ||
|
||
[`r_analyses/`](r_analyses/) contains the data and scripts for running the individual feature analysis of RQ 2 as well as the scripts for creating the scatter plot of Figure 2. | ||
|
||
[`resources/variabilities_5_iterations.csv`](study/resources/variabilities_5_iterations.csv), | ||
[`resources/variabilities_10_iterations.csv`](study/resources/variabilities_10_iterations.csv), | ||
[`resources/variabilities_20_iterations.csv`](study/resources/variabilities_20_iterations.csv), | ||
and | ||
[`resources/variabilities_30_iterations.csv`](study/resources/variabilities_30_iterations.csv) are the files containing the data we collected by running the benchmarks, with a number of iterations of `5`, `10`, `20`, and `30`, respectively. | ||
|
||
## Classification (RQ 1) | ||
* [`classification.py`](study/classification.py) is a *Python* script to run all the experiments to train the machine learning models and evaluate their performance. | ||
* [`resources/classification_results.csv.xz`](study/resources/classification_results.csv.xz) is the outcome of the previous step, containing all the computed metrics for all the combinations of machine learning algorithm, number of iterations, threshold, and fold. | ||
* [`classification_study.ipynb`](study/classification_study.ipynb) is a *Jupyter Notebook* we used to study the prediction performance of our approach. | ||
|
||
## Feature Importance (RQ 2) | ||
|
||
### Individual Features | ||
* [`feature_importance.py`](study/feature_importance.py) is a *Python* script to run the permutation feature importance of individual features. | ||
* [`resources/feature_importance_mcc_results.csv`](study/resources/feature_importance_mcc_results.csv) is the outcome of the previous step, containing all MCC feature importances for each variability measure (RCIW Maritz-Jarrett, RCIW bootstrap, and RMAD) and each fold. | ||
We also provide feature importances for other prediction preformances metrics, i.e., AUC and F-measure. | ||
* [`individual_feature_importance.R`](r_analyses/individual_feature_importance.R) is an R script to run the individual feature analyses and plot Figure 10. | ||
To plot the figure, run the R function `run`. | ||
To get statistics, run the R function `run_individual_features_stats`. | ||
|
||
### Feature Categories | ||
* [`group_importance.py`](study/group_importance.py) is a *Python* script to run the feature importance for feature categories. | ||
* [`resources/group_importance_mcc_results.csv`](study/resources/group_importance_mcc_results.csv) is the outcome of the previous step, containing all feature importances for each feature category, variability measure (RCIW Maritz-Jarrett, RCIW bootstrap, and RMAD), and fold. | ||
* [`group_importance_study.ipynb`](study/group_importance_study.ipynb) is a *Jupyter Notebook* we used to study the feature importance of feature categories. | ||
|
||
For Jupyter Notebooks, we also provide the compiled *HTML* file with included output. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
run `python downloadProjects.py` | ||
|
||
This is the downloader script for the Go projects. | ||
|
||
Usage: | ||
1. get project_commit.csv from BenchmarkVariabilities project | ||
2. run downloader and specify a path to download projects (this will automatically create GOPATH while downloading) | ||
$ python downloadProjects "place_to_download" | ||
you can use ./ if you want to download projects in the same folder | ||
3. it will download projects and will give an output as "project_commit_place.csv" for the FeatureExtraction to iterate projects and extract source code features. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
# Downloads projects from Github | ||
|
||
import subprocess, csv, os, sys | ||
|
||
# Give here the folder to download the projects into | ||
download_folder = sys.argv[1] | ||
|
||
newfile = open('project_commit_place.csv', 'w', encoding='utf-8', newline='') | ||
|
||
with open("project_commit.csv", 'r', encoding='utf-8') as store: | ||
reader = csv.reader(store, delimiter=';') | ||
writer = csv.writer(newfile, delimiter=';') | ||
counter = 0 | ||
failed_projects = [] | ||
|
||
for line in reader: | ||
counter += 1 | ||
splitted = line[0].split('/') | ||
name = splitted[0] | ||
project = splitted[1] | ||
commit = line[1] | ||
new_folder = download_folder + os.sep + name + os.sep + project | ||
errors = 0 | ||
|
||
if not os.path.exists(new_folder): | ||
os.makedirs(new_folder) | ||
os.environ['GOPATH'] = new_folder | ||
url = f"github.com/{name}/{project}" | ||
print(url) | ||
|
||
try: | ||
get = subprocess.run(['go', 'get', url], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) | ||
except subprocess.CalledProcessError as e: | ||
errors += 1 | ||
print(f"Failed to go get {url}") | ||
|
||
project_folder = new_folder + os.sep + 'src' + os.sep + url.replace('/', os.sep) | ||
try: | ||
changecommit = subprocess.run(['git', 'checkout', commit], cwd=project_folder, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) | ||
except subprocess.CalledProcessError as e: | ||
print(e.output) | ||
print(f"Failed to checkout to {commit} on {url}") | ||
except NotADirectoryError as n: | ||
print(n) | ||
print("This directory doesn't exits: " + project_folder) | ||
failed_projects.append(url) | ||
except FileNotFoundError as f: | ||
print(f) | ||
failed_projects.append(url) | ||
else: | ||
writer.writerow([line[0], line[1], project_folder]) | ||
print(counter) | ||
|
||
print("Failed projects: ") | ||
print(failed_projects) |
Oops, something went wrong.