Skip to content

Commit

Permalink
fixes issue eellak#34
Browse files Browse the repository at this point in the history
  • Loading branch information
shivanshuraj1333 committed Mar 12, 2019
1 parent 2447821 commit 0606f0f
Show file tree
Hide file tree
Showing 3 changed files with 184 additions and 76 deletions.
50 changes: 50 additions & 0 deletions src/migrations/versions/ad83c3873756_.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""empty message
Revision ID: ad83c3873756
Revises: 0a9673b6f14a
Create Date: 2019-03-11 19:11:57.157719
"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = 'ad83c3873756'
down_revision = '0a9673b6f14a'
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('product',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('name', sa.String(length=128), nullable=False),
sa.Column('version', sa.String(length=128), nullable=False),
sa.Column('license_id', sa.Integer(), nullable=True),
sa.Column('owner', sa.String(length=128), nullable=True),
sa.Column('approver', sa.String(length=128), nullable=True),
sa.Column('approval_date', sa.DateTime(), nullable=True),
sa.ForeignKeyConstraint(['license_id'], ['license.id'], ),
sa.PrimaryKeyConstraint('id'),
sa.UniqueConstraint('name')
)
op.create_table('product_component_conn',
sa.Column('product_id', sa.Integer(), nullable=False),
sa.Column('component_id', sa.Integer(), nullable=False),
sa.Column('relation', sa.String(length=128), nullable=True),
sa.Column('modification', sa.Boolean(), nullable=True),
sa.Column('delivery', sa.String(length=128), nullable=True),
sa.ForeignKeyConstraint(['component_id'], ['component.id'], ),
sa.ForeignKeyConstraint(['product_id'], ['product.id'], ),
sa.PrimaryKeyConstraint('product_id', 'component_id')
)
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('product_component_conn')
op.drop_table('product')
# ### end Alembic commands ###
207 changes: 131 additions & 76 deletions src/populate_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,115 +11,169 @@
from datetime import datetime
from specification import *
from models import *
from tqdm import tqdm
import sys


def populate_license(directory):
path = os.path.join(os.getcwd(), os.path.join(
directory, 'license-info.csv'))
sizecounter=0
with open(path, 'r', encoding='utf-8') as input_file:
read_csv = csv.reader(input_file, delimiter=',')
for row in read_csv:
full_name = row[0]
identifier = row[1]
if(row[2] == 'FSF Libre'):
fsf_free_libre = True
else:
fsf_free_libre = False
if(row[3] == 'OSI Approved'):
osi_approved = True
else:
osi_approved = False
license_category = row[4]
license_text = row[5]
l = License(full_name, identifier, fsf_free_libre,
osi_approved, license_category, license_text)
db.session.add(l)
db.session.commit()
size = os.stat(path).st_size
for i in read_csv:
sizecounter += sys.getsizeof(i)
with open(path, 'r', encoding='utf-8') as input_file:
read_csv = csv.reader(input_file, delimiter=',')
size = os.stat(path).st_size
with tqdm(total=sizecounter,unit_scale=True, unit_divisor=128, unit='B') as pbar:
for row in (read_csv):
full_name = row[0]
identifier = row[1]
if(row[2] == 'FSF Libre'):
fsf_free_libre = True
else:
fsf_free_libre = False
if(row[3] == 'OSI Approved'):
osi_approved = True
else:
osi_approved = False
license_category = row[4]
license_text = row[5]
l = License(full_name, identifier, fsf_free_libre,
osi_approved, license_category, license_text)
buf = sys.getsizeof(row)
db.session.add(l)
db.session.commit()
pbar.set_postfix()
pbar.update(buf)


def populate_component(directory):
path = os.path.join(os.getcwd(), os.path.join(
directory, 'component-info.csv'))

sizecounter=0
with open(path, 'r', encoding='utf-8') as input_file:
read_csv = csv.reader(input_file, delimiter=',')
size = os.stat(path).st_size
for i in read_csv:
sizecounter += sys.getsizeof(i)

with open(path, 'r', encoding='utf-8') as input_file:
read_csv = csv.reader(input_file, delimiter=',')
for row in read_csv:
name = row[0]
version = row[1]
created_by = row[2]
pub_date = row[3]
if(pub_date):
pub_date = datetime.strptime(pub_date, '%Y-%m-%d')
else:
pub_date = None
origin = row[4]
source_url = row[5]
license_expression = row[6]
ext_link = row[7]
c = Component(name, version, created_by, pub_date,
origin, source_url, license_expression, ext_link)
db.session.add(c)
db.session.commit()
with tqdm(total=sizecounter, unit='B', unit_scale=True, unit_divisor=1024) as pbar:
for row in tqdm(read_csv):
name = row[0]
version = row[1]
created_by = row[2]
pub_date = row[3]
if(pub_date):
pub_date = datetime.strptime(pub_date, '%Y-%m-%d')
else:
pub_date = None
origin = row[4]
source_url = row[5]
license_expression = row[6]
ext_link = row[7]
c = Component(name, version, created_by, pub_date,
origin, source_url, license_expression, ext_link)

buf = sys.getsizeof(row)
db.session.add(c)
db.session.commit()
pbar.set_postfix()
pbar.update(buf)


def populate_component_conn(directory):
path = os.path.join(os.getcwd(), os.path.join(
directory, 'component-relationship.csv'))
sizecounter=0
with open(path, 'r', encoding='utf-8') as input_file:
read_csv = csv.reader(input_file, delimiter=',')
for row in read_csv:
if(row[1] in valid_relationship):
input_c1_name = row[0]
c1 = Component.query.filter_by(name=input_c1_name).first()
input_c2_name = row[2]
c2 = Component.query.filter_by(name=input_c2_name).first()
c1.components.append(c2)
db.session.commit()

size = os.stat(path).st_size
for i in read_csv:
sizecounter += sys.getsizeof(i)
with open(path, 'r', encoding='utf-8') as input_file:
read_csv = csv.reader(input_file, delimiter=',')
with tqdm(total=sizecounter, unit='B', unit_scale=True, unit_divisor=1024) as pbar:
for row in read_csv:
if(row[1] in valid_relationship):
input_c1_name = row[0]
c1 = Component.query.filter_by(name=input_c1_name).first()
input_c2_name = row[2]
c2 = Component.query.filter_by(name=input_c2_name).first()
c1.component.append(c2)
buf = sys.getsizeof(row)
pbar.set_postfix()
pbar.update(buf)
db.session.commit()

def populate_product(directory):
path = os.path.join(os.getcwd(), os.path.join(
directory, 'product-info.csv'))
sizecounter=0
with open(path, 'r', encoding='utf-8') as input_file:
read_csv = csv.reader(input_file, delimiter=',')
for row in read_csv:
name = row[0]
version = row[1]
owner = row[2]
approver = row[3]
approval_date = row[4]
approval_date = datetime.strptime(approval_date, '%Y-%m-%d')
license_identifier = row[5]
l = License.query.filter_by(identifier=license_identifier).first()
p = Product(name, version, owner, approver, approval_date)
p.license = l
db.session.add(p)
db.session.commit()

size = os.stat(path).st_size
for i in read_csv:
sizecounter += sys.getsizeof(i)
with open(path, 'r', encoding='utf-8') as input_file:
read_csv = csv.reader(input_file, delimiter=',')
with tqdm(total=sizecounter, unit='B', unit_scale=True, unit_divisor=1024) as pbar:
for row in tqdm(read_csv):
name = row[0]
version = row[1]
owner = row[2]
approver = row[3]
approval_date = row[4]
approval_date = datetime.strptime(approval_date, '%Y-%m-%d')
license_identifier = row[5]
l = License.query.filter_by(identifier=license_identifier).first()
p = Product(name, version, owner, approver, approval_date)
p.license = l
buf = sys.getsizeof(row)
db.session.add(p)
db.session.commit()
pbar.set_postfix()
pbar.update(buf)

def populate_product_component_conn(directory):
path = os.path.join(os.getcwd(), os.path.join(
directory, 'product-component-relationship.csv'))
sizecounter=0
with open(path, 'r', encoding='utf-8') as input_file:
read_csv = csv.reader(input_file, delimiter=',')
for row in read_csv:
if(row[1] in valid_relationship):
product_info = row[0].split('-')
p = Product.query.filter_by(
name=product_info[0], version=product_info[1]).first()
relation = row[1]
component_info = row[2].split('-')
c = Component.query.filter_by(
name=component_info[0], version=component_info[1]).first()
modification = row[3]
if(modification == 'MODIFIED'):
modification = True
else:
modification = False
delivery = row[4]
product_component_conn = Product_Component_conn(
p, c, relation, modification, delivery)
db.session.add(product_component_conn)
db.session.commit()
size = os.stat(path).st_size
for i in read_csv:
sizecounter += sys.getsizeof(i)
with open(path, 'r', encoding='utf-8') as input_file:
read_csv = csv.reader(input_file, delimiter=',')
with tqdm(total=sizecounter, unit='B', unit_scale=True, unit_divisor=1024) as pbar:
for row in (read_csv):
if(row[1] in valid_relationship):
product_info = row[0].split('-')
p = Product.query.filter_by(
name=product_info[0], version=product_info[1]).first()
relation = row[1]
component_info = row[2].split('-')
c = Component.query.filter_by(
name=component_info[0], version=component_info[1]).first()
modification = row[3]
if(modification == 'MODIFIED'):
modification = True
else:
modification = False
delivery = row[4]
product_component_conn = Product_Component_conn(
p, c, relation, modification, delivery)
buf = sys.getsizeof(row)
db.session.add(product_component_conn)
db.session.commit()
pbar.set_postfix()
pbar.update(buf)


if __name__ == '__main__':
Expand All @@ -130,6 +184,7 @@ def populate_product_component_conn(directory):
db.create_all()

directory = 'dataset'
print("-> 5 processes will run.")
populate_license(directory)
populate_component(directory)
populate_component_conn(directory)
Expand Down
3 changes: 3 additions & 0 deletions src/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,6 @@ pyasn1-modules==0.2.2
python-ldap==3.1.0
pyldap==3.0.0.post1
flask-simpleldap==1.2.0
system-service==0.3
systemd-python==234
tqdm==4.31.1

0 comments on commit 0606f0f

Please sign in to comment.